Package Bio :: Package PDB :: Module PDBParser'
[hide private]
[frames] | no frames]

Source Code for Module Bio.PDB.PDBParser'

  1  # Copyright (C) 2002, Thomas Hamelryck (thamelry@binf.ku.dk) 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package.   
  5   
  6  # Python stuff 
  7  import sys 
  8  from string import split 
  9  from Numeric import array, Float0 
 10   
 11  # My stuff 
 12  from StructureBuilder import StructureBuilder 
 13  from PDBExceptions import PDBConstructionException 
 14  from parse_pdb_header import _parse_pdb_header_list 
 15   
 16  __doc__="Parser for PDB files." 
 17   
 18   
 19  # If PDB spec says "COLUMNS 18-20" this means line[17:20] 
 20   
 21   
22 -class PDBParser:
23 """ 24 Parse a PDB file and return a Structure object. 25 """ 26
27 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
28 """ 29 The PDB parser call a number of standard methods in an aggregated 30 StructureBuilder object. Normally this object is instanciated by the 31 PDBParser object itself, but if the user provides his own StructureBuilder 32 object, the latter is used instead. 33 34 Arguments: 35 o PERMISSIVE - int, if this is 0 exceptions in constructing the 36 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are 37 caught, but some residues or atoms will be missing. THESE EXCEPTIONS 38 ARE DUE TO PROBLEMS IN THE PDB FILE!. 39 o structure_builder - an optional user implemented StructureBuilder class. 40 """ 41 if structure_builder!=None: 42 self.structure_builder=structure_builder 43 else: 44 self.structure_builder=StructureBuilder() 45 self.header=None 46 self.trailer=None 47 self.line_counter=0 48 self.PERMISSIVE=PERMISSIVE
49 50 # Public methods 51
52 - def get_structure(self, id, file):
53 """Return the structure. 54 55 Arguments: 56 o id - string, the id that will be used for the structure 57 o file - name of the PDB file OR an open filehandle 58 """ 59 self.header=None 60 self.trailer=None 61 # Make a StructureBuilder instance (pass id of structure as parameter) 62 self.structure_builder.init_structure(id) 63 if isinstance(file, basestring): 64 file=open(file) 65 self._parse(file.readlines()) 66 self.structure_builder.set_header(self.header) 67 # Return the Structure instance 68 return self.structure_builder.get_structure()
69
70 - def get_header(self):
71 "Return the header." 72 return self.header
73
74 - def get_trailer(self):
75 "Return the trailer." 76 return self.trailer
77 78 # Private methods 79
80 - def _parse(self, header_coords_trailer):
81 "Parse the PDB file." 82 # Extract the header; return the rest of the file 83 self.header, coords_trailer=self._get_header(header_coords_trailer) 84 # Parse the atomic data; return the PDB file trailer 85 self.trailer=self._parse_coordinates(coords_trailer)
86
87 - def _get_header(self, header_coords_trailer):
88 "Get the header of the PDB file, return the rest." 89 structure_builder=self.structure_builder 90 for i in range(0, len(header_coords_trailer)): 91 structure_builder.set_line_counter(i+1) 92 line=header_coords_trailer[i] 93 record_type=line[0:6] 94 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '): 95 break 96 header=header_coords_trailer[0:i] 97 # Return the rest of the coords+trailer for further processing 98 self.line_counter=i 99 coords_trailer=header_coords_trailer[i:] 100 header_dict=_parse_pdb_header_list(header) 101 return header_dict, coords_trailer
102
103 - def _parse_coordinates(self, coords_trailer):
104 "Parse the atomic data in the PDB file." 105 local_line_counter=0 106 structure_builder=self.structure_builder 107 current_model_id=0 108 # Flag we have an open model 109 model_open=0 110 current_chain_id=None 111 current_segid=None 112 current_residue_id=None 113 current_resname=None 114 for i in range(0, len(coords_trailer)): 115 line=coords_trailer[i] 116 record_type=line[0:6] 117 global_line_counter=self.line_counter+local_line_counter+1 118 structure_builder.set_line_counter(global_line_counter) 119 if(record_type=='ATOM ' or record_type=='HETATM'): 120 # Initialize the Model - there was no explicit MODEL record 121 if not model_open: 122 structure_builder.init_model(current_model_id) 123 current_model_id+=1 124 model_open=1 125 fullname=line[12:16] 126 # get rid of whitespace in atom names 127 split_list=split(fullname) 128 if len(split_list)!=1: 129 # atom name has internal spaces, e.g. " N B ", so 130 # we do not strip spaces 131 name=fullname 132 else: 133 # atom name is like " CA ", so we can strip spaces 134 name=split_list[0] 135 altloc=line[16:17] 136 resname=line[17:20] 137 chainid=line[21:22] 138 try: 139 serial_number=int(line[6:11]) 140 except: 141 serial_number=0 142 resseq=int(split(line[22:26])[0]) # sequence identifier 143 icode=line[26:27] # insertion code 144 if record_type=='HETATM': # hetero atom flag 145 if resname=="HOH" or resname=="WAT": 146 hetero_flag="W" 147 else: 148 hetero_flag="H" 149 else: 150 hetero_flag=" " 151 residue_id=(hetero_flag, resseq, icode) 152 # atomic coordinates 153 x=float(line[30:38]) 154 y=float(line[38:46]) 155 z=float(line[46:54]) 156 coord=array((x, y, z), Float0) 157 # occupancy & B factor 158 occupancy=float(line[54:60]) 159 bfactor=float(line[60:66]) 160 segid=line[72:76] 161 if current_segid!=segid: 162 current_segid=segid 163 structure_builder.init_seg(current_segid) 164 if current_chain_id!=chainid: 165 current_chain_id=chainid 166 structure_builder.init_chain(current_chain_id) 167 current_residue_id=residue_id 168 current_resname=resname 169 try: 170 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 171 except PDBConstructionException, message: 172 self._handle_PDB_exception(message, global_line_counter) 173 elif current_residue_id!=residue_id or current_resname!=resname: 174 current_residue_id=residue_id 175 current_resname=resname 176 try: 177 structure_builder.init_residue(resname, hetero_flag, resseq, icode) 178 except PDBConstructionException, message: 179 self._handle_PDB_exception(message, global_line_counter) 180 # init atom 181 try: 182 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number) 183 except PDBConstructionException, message: 184 self._handle_PDB_exception(message, global_line_counter) 185 elif(record_type=='ANISOU'): 186 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70])) 187 # U's are scaled by 10^4 188 anisou_array=(array(anisou, Float0)/10000.0).astype(Float0) 189 structure_builder.set_anisou(anisou_array) 190 elif(record_type=='MODEL '): 191 structure_builder.init_model(current_model_id) 192 current_model_id+=1 193 model_open=1 194 current_chain_id=None 195 current_residue_id=None 196 elif(record_type=='END ' or record_type=='CONECT'): 197 # End of atomic data, return the trailer 198 self.line_counter=self.line_counter+local_line_counter 199 return coords_trailer[local_line_counter:] 200 elif(record_type=='ENDMDL'): 201 model_open=0 202 current_chain_id=None 203 current_residue_id=None 204 elif(record_type=='SIGUIJ'): 205 # standard deviation of anisotropic B factor 206 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70])) 207 # U sigma's are scaled by 10^4 208 siguij_array=(array(siguij, Float0)/10000.0).astype(Float0) 209 structure_builder.set_siguij(siguij_array) 210 elif(record_type=='SIGATM'): 211 # standard deviation of atomic positions 212 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66])) 213 sigatm_array=array(sigatm, Float0) 214 structure_builder.set_sigatm(sigatm_array) 215 local_line_counter=local_line_counter+1 216 # EOF (does not end in END or CONECT) 217 self.line_counter=self.line_counter+local_line_counter 218 return []
219
220 - def _handle_PDB_exception(self, message, line_counter):
221 """ 222 This method catches an exception that occurs in the StructureBuilder 223 object (if PERMISSIVE==1), or raises it again, this time adding the 224 PDB line number to the error message. 225 """ 226 message="%s at line %i." % (message, line_counter) 227 if self.PERMISSIVE: 228 # just print a warning - some residues/atoms will be missing 229 print "PDBConstructionException: %s" % message 230 print "Exception ignored.\nSome atoms or residues will be missing in the data structure." 231 else: 232 # exceptions are fatal - raise again with new message (including line nr) 233 raise PDBConstructionException, message
234 235 236 if __name__=="__main__": 237 238 import sys 239 240 p=PDBParser(PERMISSIVE=1) 241 242 s=p.get_structure("scr", sys.argv[1]) 243 244 for m in s: 245 p=m.get_parent() 246 assert(p is s) 247 for c in m: 248 p=c.get_parent() 249 assert(p is m) 250 for r in c: 251 print r 252 p=r.get_parent() 253 assert(p is c) 254 for a in r: 255 p=a.get_parent() 256 if not p is r: 257 print p, r 258