/usr/share/pyshared/Bio/PDB/MMCIF2Dict.py

# Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk)
# This code is part of the Biopython distribution and governed by its
# license.  Please see the LICENSE file that should have been included
# as part of this package.

"""Turn an mmCIF file into a dictionary."""

import os.path
import warnings
import Bio.PDB.mmCIF.MMCIFlex
from UserDict import UserDict


class MMCIF2Dict(UserDict):
    # The token identifiers
    NAME=1
    LOOP=2
    DATA=3
    SEMICOLONS=4    
    DOUBLEQUOTED=5
    QUOTED=6
    SIMPLE=7

    def __init__(self, filename):
        # this dict will contain the name/data pairs 
        self.data={}
        # entry for garbage
        self.data[None]=[]
        if not os.path.isfile(filename):
            raise IOError("File not found.")
        Bio.PDB.mmCIF.MMCIFlex.open_file(filename)
        self._make_mmcif_dict()
        Bio.PDB.mmCIF.MMCIFlex.close_file()

    def _make_mmcif_dict(self): 
        # local copies
        NAME=self.NAME
        LOOP=self.LOOP
        DATA=self.DATA
        SEMICOLONS=self.SEMICOLONS
        DOUBLEQUOTED=self.DOUBLEQUOTED
        QUOTED=self.QUOTED
        SIMPLE=self.SIMPLE
        get_token=Bio.PDB.mmCIF.MMCIFlex.get_token
        # are we looping?
        loop_flag=0
        # list of names in loop
        temp_list=[]
        # last encountered name
        current_name=None
        # get first token/value pair
        token, value=get_token()
        # print token, value
        mmcif_dict=self.data
        # loop until EOF (token==0)
        while token:
            if token==NAME:
                if loop_flag:
                    # Make lists for all the names in the loop
                    while token==NAME:
                        # create  a list for each name encountered in loop
                        new_list=mmcif_dict[value]=[]
                        temp_list.append(new_list)
                        token, value=get_token()  
                        # print token, value
                    loop_flag=0         
                    # nr of data items parsed
                    data_counter=0
                    # corresponding data name
                    pos=0
                    nr_fields=len(temp_list)
                    # Now fill all lists with the data
                    while token>3:
                        pos=data_counter%nr_fields
                        data_counter=data_counter+1
                        temp_list[pos].append(value)
                        token, value=get_token()  
                        # print token, value
                    if pos!=nr_fields-1:
                        warnings.warn("ERROR: broken name-data pair "
                                      "(data missing)!", RuntimeWarning)
                    # The last token was not used, so
                    # don't set token to None! (this means the 
                    # last parsed token goes through the loop again)
                else:   
                    # simple name-data pair (no loop)
                    # so next token should be the data
                    next_token, data=get_token()  
                    # print token, value
                    mmcif_dict[value]=data
                    if next_token<4:
                        warnings.warn("ERROR: broken name-data pair "
                                      "(name-non data pair)!", RuntimeWarning)
                        # print token, value
                    else:   
                        # get next token
                        token=None
            elif token==LOOP:
                loop_flag=1
                temp_list=[]
                # get next token
                token=None
            elif token==DATA:
                mmcif_dict[value[0:5]]=value[5:]
                token=None
            else:
                # we found some complete garbage
                warnings.warn("ERROR: broken name-data pair "
                              "(missing name)!\n%s %s" % (token, value),
                              RuntimeWarning)
                mmcif_dict[None].append(value)
                # get next token
                token=None
            if token==None:
                token, value=get_token()
                # print token, value

    def __getitem__(self, key):
        return self.data[key]


if __name__=="__main__":

    import sys

    if len(sys.argv)!=2:
        print "Usage: python MMCIF2Dict filename."

    filename=sys.argv[1]    

    mmcif_dict=MMCIF2Dict(filename)

    entry = ""
    print "Now type a key ('q' to end, 'k' for a list of all keys):"
    while(entry != "q"):
        entry = raw_input("MMCIF dictionary key ==> ")    
        if entry == "q":
            sys.exit()
        if entry == "k":
            for key in mmcif_dict:
                print key
            continue
        try:
            value=mmcif_dict[entry]
            if type(value)==type([]):
                for item in value:
                    print item
            else:
                print value
        except KeyError:
            print "No such key found."
python-biopython 1.58-1 / usr / share / pyshared / Bio / PDB / MMCIF2Dict.py