1
2
3
4
5
6 import os
7 import numpy
8
9 from Bio.SVDSuperimposer import SVDSuperimposer
10 from Bio.PDB import *
11
12 __doc__="""
13 Classify protein backbone structure according to Kolodny et al's fragment
14 libraries. It can be regarded as a form of objective secondary structure
15 classification. Only fragments of length 5 or 7 are supported (ie. there is a
16 'central' residue).
17
18 Full reference:
19
20 Kolodny R, Koehl P, Guibas L, Levitt M.
21 Small libraries of protein fragments model native protein structures accurately.
22 J Mol Biol. 2002 323(2):297-307.
23
24 The definition files of the fragments can be obtained from:
25
26 U{http://csb.stanford.edu/~rachel/fragments/}
27
28 You need these files to use this module.
29
30 The following example uses the library with 10 fragments of length 5.
31 The library files can be found in directory 'fragment_data'.
32
33 >>> model=structure[0]
34 >>> fm=FragmentMapper(lsize=10, flength=5, dir="fragment_data")
35 >>> fm.map(model)
36 >>> fragment=fm[residue]
37 """
38
39
40
41
42
43 _FRAGMENT_FILE="lib_%s_z_%s.txt"
44
45
47 """
48 Read a fragment spec file (available from
49 U{http://csb.stanford.edu/rachel/fragments/}
50 and return a list of Fragment objects.
51
52 @param size: number of fragments in the library
53 @type size: int
54
55 @param length: length of the fragments
56 @type length: int
57
58 @param dir: directory where the fragment spec files can be found
59 @type dir: string
60 """
61 filename=(dir+"/"+_FRAGMENT_FILE) % (size, length)
62 fp=open(filename, "r")
63 flist=[]
64
65 fid=0
66 for l in fp.readlines():
67
68 if l[0]=="*" or l[0]=="\n":
69 continue
70 sl=l.split()
71 if sl[1]=="------":
72
73 f=Fragment(length, fid)
74 flist.append(f)
75
76 fid+=1
77 continue
78
79 coord=numpy.array(map(float, sl[0:3]))
80
81 f.add_residue("XXX", coord)
82 fp.close()
83 return flist
84
85
87 """
88 Represent a polypeptide C-alpha fragment.
89 """
91 """
92 @param length: length of the fragment
93 @type length: int
94
95 @param fid: id for the fragment
96 @type fid: int
97 """
98
99 self.length=length
100
101 self.counter=0
102 self.resname_list=[]
103
104 self.coords_ca=numpy.zeros((length, 3), "d")
105 self.fid=fid
106
108 """
109 @return: the residue names
110 @rtype: [string, string,...]
111 """
112 return self.resname_list
113
115 """
116 @return: id for the fragment
117 @rtype: int
118 """
119 return self.fid
120
122 """
123 @return: the CA coords in the fragment
124 @rtype: Numeric (Nx3) array
125 """
126 return self.coords_ca
127
129 """
130 @param resname: residue name (eg. GLY).
131 @type resname: string
132
133 @param ca_coord: the c-alpha coorinates of the residues
134 @type ca_coord: Numeric array with length 3
135 """
136 if self.counter>=self.length:
137 raise PDBException("Fragment boundary exceeded.")
138 self.resname_list.append(resname)
139 self.coords_ca[self.counter]=ca_coord
140 self.counter=self.counter+1
141
143 """
144 @return: length of fragment
145 @rtype: int
146 """
147 return self.length
148
150 """
151 Return rmsd between two fragments.
152
153 Example:
154 >>> rmsd=fragment1-fragment2
155
156 @return: rmsd between fragments
157 @rtype: float
158 """
159 sup=SVDSuperimposer()
160 sup.set(self.coords_ca, other.coords_ca)
161 sup.run()
162 return sup.get_rms()
163
165 """
166 Returns <Fragment length=L id=ID> where L=length of fragment
167 and ID the identifier (rank in the library).
168 """
169 return "<Fragment length=%i id=%i>" % (self.length, self.fid)
170
171
173 """
174 Dice up a peptide in fragments of length "length".
175
176 @param pp: a list of residues (part of one peptide)
177 @type pp: [L{Residue}, L{Residue}, ...]
178
179 @param length: fragment length
180 @type length: int
181 """
182 frag_list=[]
183 for i in range(0, len(pp)-length+1):
184 f=Fragment(length, -1)
185 for j in range(0, length):
186 residue=pp[i+j]
187 resname=residue.get_resname()
188 if residue.has_id("CA"):
189 ca=residue["CA"]
190 else:
191 raise PDBException("CHAINBREAK")
192 if ca.is_disordered():
193 raise PDBException("CHAINBREAK")
194 ca_coord=ca.get_coord()
195 f.add_residue(resname, ca_coord)
196 frag_list.append(f)
197 return frag_list
198
199
201 """
202 Map all frgaments in flist to the closest
203 (in RMSD) fragment in reflist.
204
205 Returns a list of reflist indices.
206
207 @param flist: list of protein fragments
208 @type flist: [L{Fragment}, L{Fragment}, ...]
209
210 @param reflist: list of reference (ie. library) fragments
211 @type reflist: [L{Fragment}, L{Fragment}, ...]
212 """
213 mapped=[]
214 for f in flist:
215 rank=[]
216 for i in range(0, len(reflist)):
217 rf=reflist[i]
218 rms=f-rf
219 rank.append((rms, rf))
220 rank.sort()
221 fragment=rank[0][1]
222 mapped.append(fragment)
223 return mapped
224
225
227 """
228 Map polypeptides in a model to lists of representative fragments.
229 """
230 - def __init__(self, model, lsize=20, flength=5, fdir="."):
231 """
232 @param model: the model that will be mapped
233 @type model: L{Model}
234
235 @param lsize: number of fragments in the library
236 @type lsize: int
237
238 @param flength: length of fragments in the library
239 @type flength: int
240
241 @param fdir: directory where the definition files are
242 found (default=".")
243 @type fdir: string
244 """
245 if flength==5:
246 self.edge=2
247 elif flength==7:
248 self.edge=3
249 else:
250 raise PDBException("Fragment length should be 5 or 7.")
251 self.flength=flength
252 self.lsize=lsize
253 self.reflist=_read_fragments(lsize, flength, fdir)
254 self.model=model
255 self.fd=self._map(self.model)
256
257 - def _map(self, model):
258 """
259 @param model: the model that will be mapped
260 @type model: L{Model}
261 """
262 ppb=PPBuilder()
263 ppl=ppb.build_peptides(model)
264 fd={}
265 for pp in ppl:
266 try:
267
268 flist=_make_fragment_list(pp, self.flength)
269
270 mflist=_map_fragment_list(flist, self.reflist)
271 for i in range(0, len(pp)):
272 res=pp[i]
273 if i<self.edge:
274
275 continue
276 elif i>=(len(pp)-self.edge):
277
278 continue
279 else:
280
281 index=i-self.edge
282 assert(index>=0)
283 fd[res]=mflist[index]
284 except "CHAINBREAK":
285
286 pass
287 return fd
288
290 """
291 @type res: L{Residue}
292 """
293 return self.fd.has_key(res)
294
296 """
297 @type res: L{Residue}
298
299 @return: fragment classification
300 @rtype: L{Fragment}
301 """
302 return self.fd[res]
303
304
305 if __name__=="__main__":
306
307 import sys
308
309 p=PDBParser()
310 s=p.get_structure("X", sys.argv[1])
311
312 m=s[0]
313 fm=FragmentMapper(m, 10, 5, "levitt_data")
314
315
316 for r in Selection.unfold_entities(m, "R"):
317
318 print r,
319 if fm.has_key(r):
320 print fm[r]
321 else:
322 print
323