1
2
3
4
5
6 from types import StringType
7
8 from Bio.Alphabet import ProteinAlphabet
9 from Bio.Seq import Seq
10 from Bio.SCOP.Raf import to_one_letter_code
11 from Bio.PDB.PDBExceptions import PDBException
12 from Bio.PDB.Residue import Residue, DisorderedResidue
13 from Vector import calc_dihedral, calc_angle
14
15 __doc__="""
16 Polypeptide related classes (construction and representation).
17
18 Example:
19
20 >>> ppb=PPBuilder()
21 >>> for pp in ppb.build_peptides(structure):
22 >>> print pp.get_sequence()
23 """
24
25 standard_aa_names=["ALA", "CYS", "ASP", "GLU", "PHE", "GLY", "HIS", "ILE", "LYS",
26 "LEU", "MET", "ASN", "PRO", "GLN", "ARG", "SER", "THR", "VAL",
27 "TRP", "TYR"]
28
29
30 aa1="ACDEFGHIKLMNPQRSTVWY"
31 aa3=standard_aa_names
32
33 d1_to_index={}
34 dindex_to_1={}
35 d3_to_index={}
36 dindex_to_3={}
37
38
39 for i in range(0, 20):
40 n1=aa1[i]
41 n3=aa3[i]
42 d1_to_index[n1]=i
43 dindex_to_1[i]=n1
44 d3_to_index[n3]=i
45 dindex_to_3[i]=n3
46
48 """
49 Index to corresponding one letter amino acid name.
50 For example: 0 to A.
51 """
52 return dindex_to_1[index]
53
55 """
56 One letter code to index.
57 For example: A to 0.
58 """
59 return d1_to_index[s]
60
62 """
63 Index to corresponding three letter amino acid name.
64 For example: 0 to ALA.
65 """
66 return dindex_to_3[i]
67
69 """
70 Three letter code to index.
71 For example: ALA to 0.
72 """
73 return d3_to_index[s]
74
76 """
77 Three letter code to one letter code.
78 For example: ALA to A.
79 """
80 i=d3_to_index[s]
81 return dindex_to_1[i]
82
84 """
85 One letter code to three letter code.
86 For example: A to ALA.
87 """
88 i=d1_to_index[s]
89 return dindex_to_3[i]
90
91 -def is_aa(residue, standard=0):
92 """
93 Return 1 if residue object/string is an amino acid.
94
95 @param residue: a L{Residue} object OR a three letter amino acid code
96 @type residue: L{Residue} or string
97
98 @param standard: flag to check for the 20 AA (default false)
99 @type standard: boolean
100 """
101 if not type(residue)==StringType:
102 residue=residue.get_resname()
103 residue=residue.upper()
104 if standard:
105 return d3_to_index.has_key(residue)
106 else:
107 return to_one_letter_code.has_key(residue)
108
109
111 """
112 A polypeptide is simply a list of L{Residue} objects.
113 """
115 """
116 @return: the list of C-alpha atoms
117 @rtype: [L{Atom}, L{Atom}, ...]
118 """
119 ca_list=[]
120 for res in self:
121 ca=res["CA"]
122 ca_list.append(ca)
123 return ca_list
124
126 """
127 Return the list of phi/psi dihedral angles
128 """
129 ppl=[]
130 lng=len(self)
131 for i in range(0, lng):
132 res=self[i]
133 try:
134 n=res['N'].get_vector()
135 ca=res['CA'].get_vector()
136 c=res['C'].get_vector()
137 except:
138
139
140 ppl.append((None, None))
141 res.xtra["PHI"]=None
142 res.xtra["PSI"]=None
143 continue
144
145 if i>0:
146 rp=self[i-1]
147 try:
148 cp=rp['C'].get_vector()
149 phi=calc_dihedral(cp, n, ca, c)
150 except:
151 phi=None
152 else:
153
154 phi=None
155
156 if i<(lng-1):
157 rn=self[i+1]
158 try:
159 nn=rn['N'].get_vector()
160 psi=calc_dihedral(n, ca, c, nn)
161 except:
162 psi=None
163 else:
164
165 psi=None
166 ppl.append((phi, psi))
167
168 res.xtra["PHI"]=phi
169 res.xtra["PSI"]=psi
170 return ppl
171
173 """
174 Return list of tau torsions angles for all 4 consecutive
175 Calpha atoms.
176 """
177 ca_list=self.get_ca_list()
178 tau_list=[]
179 for i in range(0, len(ca_list)-3):
180 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2], ca_list[i+3]]
181 vector_list=map(lambda a: a.get_vector(), atom_list)
182 v1, v2, v3, v4=vector_list
183 tau=calc_dihedral(v1, v2, v3, v4)
184 tau_list.append(tau)
185
186 res=ca_list[i+2].get_parent()
187 res.xtra["TAU"]=tau
188 return tau_list
189
191 """
192 Return list of theta angles for all 3 consecutive
193 Calpha atoms.
194 """
195 theta_list=[]
196 ca_list=self.get_ca_list()
197 for i in range(0, len(ca_list)-2):
198 atom_list=[ca_list[i], ca_list[i+1], ca_list[i+2]]
199 vector_list=map(lambda a: a.get_vector(), atom_list)
200 v1, v2, v3=vector_list
201 theta=calc_angle(v1, v2, v3)
202 theta_list.append(theta)
203
204 res=ca_list[i+1].get_parent()
205 res.xtra["THETA"]=theta
206 return theta_list
207
225
227 """
228 Return <Polypeptide start=START end=END>, where START
229 and END are sequence identifiers of the outer residues.
230 """
231 start=self[0].get_id()[1]
232 end=self[-1].get_id()[1]
233 s="<Polypeptide start=%s end=%s>" % (start, end)
234 return s
235
237 """
238 Base class to extract polypeptides.
239 It checks if two consecutive residues in a chain
240 are connected. The connectivity test is implemented by a
241 subclass.
242 """
244 """
245 @param radius: distance
246 @type radius: float
247 """
248 self.radius=radius
249
251 "Check if the residue is an amino acid."
252 if is_aa(residue):
253
254 return 1
255 else:
256 return 0
257
259 """
260 Build and return a list of Polypeptide objects.
261
262 @param entity: polypeptides are searched for in this object
263 @type entity: L{Structure}, L{Model} or L{Chain}
264
265 @param aa_only: if 1, the residue needs to be a standard AA
266 @type aa_only: int
267 """
268 is_connected=self._is_connected
269 accept=self._accept
270 level=entity.get_level()
271
272 if level=="S":
273 model=entity[0]
274 chain_list=model.get_list()
275 elif level=="M":
276 chain_list=entity.get_list()
277 elif level=="C":
278 chain_list=[entity]
279 else:
280 raise PDBException("Entity should be Structure, Model or Chain.")
281 pp_list=[]
282 for chain in chain_list:
283 chain_it=iter(chain)
284 prev=chain_it.next()
285 pp=None
286 for next in chain_it:
287 if aa_only and not accept(prev):
288 prev=next
289 continue
290 if is_connected(prev, next):
291 if pp is None:
292 pp=Polypeptide()
293 pp.append(prev)
294 pp_list.append(pp)
295 pp.append(next)
296 else:
297 pp=None
298 prev=next
299 return pp_list
300
301
303 """
304 Use CA--CA distance to find polypeptides.
305 """
308
329
330
332 """
333 Use C--N distance to find polypeptides.
334 """
337
372
374 "Return 1 if distance between atoms<radius"
375 if (c-n)<self.radius:
376 return 1
377 else:
378 return 0
379
380
381 if __name__=="__main__":
382
383 import sys
384
385 from Bio.PDB.PDBParser import PDBParser
386
387 p=PDBParser(PERMISSIVE=1)
388
389 s=p.get_structure("scr", sys.argv[1])
390
391 ppb=PPBuilder()
392
393 print "C-N"
394 for pp in ppb.build_peptides(s):
395 print pp.get_sequence()
396 for pp in ppb.build_peptides(s[0]):
397 print pp.get_sequence()
398 for pp in ppb.build_peptides(s[0]["A"]):
399 print pp.get_sequence()
400
401 for pp in ppb.build_peptides(s):
402 for phi, psi in pp.get_phi_psi_list():
403 print phi, psi
404
405 ppb=CaPPBuilder()
406
407 print "CA-CA"
408 for pp in ppb.build_peptides(s):
409 print pp.get_sequence()
410 for pp in ppb.build_peptides(s[0]):
411 print pp.get_sequence()
412 for pp in ppb.build_peptides(s[0]["A"]):
413 print pp.get_sequence()
414