1
2
3
4
5
6
7 import warnings
8 import numpy
9
10
11 from StructureBuilder import StructureBuilder
12 from PDBExceptions import PDBConstructionException, PDBConstructionWarning
13 from parse_pdb_header import _parse_pdb_header_list
14
15 __doc__="Parser for PDB files."
16
17
18
19
20
22 """
23 Parse a PDB file and return a Structure object.
24 """
25
26 - def __init__(self, PERMISSIVE=1, get_header=0, structure_builder=None):
27 """
28 The PDB parser call a number of standard methods in an aggregated
29 StructureBuilder object. Normally this object is instanciated by the
30 PDBParser object itself, but if the user provides his own StructureBuilder
31 object, the latter is used instead.
32
33 Arguments:
34 o PERMISSIVE - int, if this is 0 exceptions in constructing the
35 SMCRA data structure are fatal. If 1 (DEFAULT), the exceptions are
36 caught, but some residues or atoms will be missing. THESE EXCEPTIONS
37 ARE DUE TO PROBLEMS IN THE PDB FILE!.
38 o structure_builder - an optional user implemented StructureBuilder class.
39 """
40 if structure_builder!=None:
41 self.structure_builder=structure_builder
42 else:
43 self.structure_builder=StructureBuilder()
44 self.header=None
45 self.trailer=None
46 self.line_counter=0
47 self.PERMISSIVE=PERMISSIVE
48
49
50
52 """Return the structure.
53
54 Arguments:
55 o id - string, the id that will be used for the structure
56 o file - name of the PDB file OR an open filehandle
57 """
58 self.header=None
59 self.trailer=None
60
61 self.structure_builder.init_structure(id)
62 if isinstance(file, basestring):
63 file=open(file)
64 self._parse(file.readlines())
65 self.structure_builder.set_header(self.header)
66
67 return self.structure_builder.get_structure()
68
70 "Return the header."
71 return self.header
72
74 "Return the trailer."
75 return self.trailer
76
77
78
79 - def _parse(self, header_coords_trailer):
85
87 "Get the header of the PDB file, return the rest."
88 structure_builder=self.structure_builder
89 for i in range(0, len(header_coords_trailer)):
90 structure_builder.set_line_counter(i+1)
91 line=header_coords_trailer[i]
92 record_type=line[0:6]
93 if(record_type=='ATOM ' or record_type=='HETATM' or record_type=='MODEL '):
94 break
95 header=header_coords_trailer[0:i]
96
97 self.line_counter=i
98 coords_trailer=header_coords_trailer[i:]
99 header_dict=_parse_pdb_header_list(header)
100 return header_dict, coords_trailer
101
103 "Parse the atomic data in the PDB file."
104 local_line_counter=0
105 structure_builder=self.structure_builder
106 current_model_id=0
107
108 model_open=0
109 current_chain_id=None
110 current_segid=None
111 current_residue_id=None
112 current_resname=None
113 for i in range(0, len(coords_trailer)):
114 line=coords_trailer[i]
115 record_type=line[0:6]
116 global_line_counter=self.line_counter+local_line_counter+1
117 structure_builder.set_line_counter(global_line_counter)
118 if(record_type=='ATOM ' or record_type=='HETATM'):
119
120 if not model_open:
121 structure_builder.init_model(current_model_id)
122 current_model_id+=1
123 model_open=1
124 fullname=line[12:16]
125
126 split_list=fullname.split()
127 if len(split_list)!=1:
128
129
130 name=fullname
131 else:
132
133 name=split_list[0]
134 altloc=line[16:17]
135 resname=line[17:20]
136 chainid=line[21:22]
137 try:
138 serial_number=int(line[6:11])
139 except:
140 serial_number=0
141 resseq=int(line[22:26].split()[0])
142 icode=line[26:27]
143 if record_type=='HETATM':
144 if resname=="HOH" or resname=="WAT":
145 hetero_flag="W"
146 else:
147 hetero_flag="H"
148 else:
149 hetero_flag=" "
150 residue_id=(hetero_flag, resseq, icode)
151
152 try :
153 x=float(line[30:38])
154 y=float(line[38:46])
155 z=float(line[46:54])
156 except :
157
158
159 raise PDBContructionError("Invalid or missing coordinate(s) at line %i." \
160 % global_line_counter)
161 coord=numpy.array((x, y, z), 'f')
162
163 try :
164 occupancy=float(line[54:60])
165 except :
166 self._handle_PDB_exception("Invalid or missing occupancy",
167 global_line_counter)
168 occupancy = 0.0
169 try :
170 bfactor=float(line[60:66])
171 except :
172 self._handle_PDB_exception("Invalid or missing B factor",
173 global_line_counter)
174 bfactor = 0.0
175 segid=line[72:76]
176 if current_segid!=segid:
177 current_segid=segid
178 structure_builder.init_seg(current_segid)
179 if current_chain_id!=chainid:
180 current_chain_id=chainid
181 structure_builder.init_chain(current_chain_id)
182 current_residue_id=residue_id
183 current_resname=resname
184 try:
185 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
186 except PDBConstructionException, message:
187 self._handle_PDB_exception(message, global_line_counter)
188 elif current_residue_id!=residue_id or current_resname!=resname:
189 current_residue_id=residue_id
190 current_resname=resname
191 try:
192 structure_builder.init_residue(resname, hetero_flag, resseq, icode)
193 except PDBConstructionException, message:
194 self._handle_PDB_exception(message, global_line_counter)
195
196 try:
197 structure_builder.init_atom(name, coord, bfactor, occupancy, altloc, fullname, serial_number)
198 except PDBConstructionException, message:
199 self._handle_PDB_exception(message, global_line_counter)
200 elif(record_type=='ANISOU'):
201 anisou=map(float, (line[28:35], line[35:42], line[43:49], line[49:56], line[56:63], line[63:70]))
202
203 anisou_array=(numpy.array(anisou, 'f')/10000.0).astype('f')
204 structure_builder.set_anisou(anisou_array)
205 elif(record_type=='MODEL '):
206 structure_builder.init_model(current_model_id)
207 current_model_id+=1
208 model_open=1
209 current_chain_id=None
210 current_residue_id=None
211 elif(record_type=='END ' or record_type=='CONECT'):
212
213 self.line_counter=self.line_counter+local_line_counter
214 return coords_trailer[local_line_counter:]
215 elif(record_type=='ENDMDL'):
216 model_open=0
217 current_chain_id=None
218 current_residue_id=None
219 elif(record_type=='SIGUIJ'):
220
221 siguij=map(float, (line[28:35], line[35:42], line[42:49], line[49:56], line[56:63], line[63:70]))
222
223 siguij_array=(numpy.array(siguij, 'f')/10000.0).astype('f')
224 structure_builder.set_siguij(siguij_array)
225 elif(record_type=='SIGATM'):
226
227 sigatm=map(float, (line[30:38], line[38:45], line[46:54], line[54:60], line[60:66]))
228 sigatm_array=numpy.array(sigatm, 'f')
229 structure_builder.set_sigatm(sigatm_array)
230 local_line_counter=local_line_counter+1
231
232 self.line_counter=self.line_counter+local_line_counter
233 return []
234
236 """
237 This method catches an exception that occurs in the StructureBuilder
238 object (if PERMISSIVE==1), or raises it again, this time adding the
239 PDB line number to the error message.
240 """
241 message="%s at line %i." % (message, line_counter)
242 if self.PERMISSIVE:
243
244 warnings.warn("PDBConstructionException: %s\n"
245 "Exception ignored.\n"
246 "Some atoms or residues may be missing in the data structure."
247 % message, PDBConstructionWarning)
248 else:
249
250 raise PDBConstructionException(message)
251
252
253 if __name__=="__main__":
254
255 import sys
256
257 p=PDBParser(PERMISSIVE=1)
258
259 filename = sys.argv[1]
260 s=p.get_structure("scr", filename)
261
262 for m in s:
263 p=m.get_parent()
264 assert(p is s)
265 for c in m:
266 p=c.get_parent()
267 assert(p is m)
268 for r in c:
269 print r
270 p=r.get_parent()
271 assert(p is c)
272 for a in r:
273 p=a.get_parent()
274 if not p is r:
275 print p, r
276