1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36 __doc__="Access the PDB over the internet (for example to download structures)."
37
38
39 import urllib, re, os
40 import warnings
41 import shutil
42
44 """
45 This class provides quick access to the structure lists on the
46 PDB server or its mirrors. The structure lists contain
47 four-letter PDB codes, indicating that structures are
48 new, have been modified or are obsolete. The lists are released
49 on a weekly basis.
50
51 It also provides a function to retrieve PDB files from the server.
52 To use it properly, prepare a directory /pdb or the like,
53 where PDB files are stored.
54
55 If You want to use this module from inside a proxy, add
56 the proxy variable to Your environment, e.g. in Unix
57 export HTTP_PROXY='http://realproxy.charite.de:888'
58 (This can also be added to ~/.bashrc)
59 """
60
61 PDB_REF="""
62 The Protein Data Bank: a computer-based archival file for macromolecular structures.
63 F.C.Bernstein, T.F.Koetzle, G.J.B.Williams, E.F.Meyer Jr, M.D.Brice, J.R.Rodgers, O.Kennard, T.Shimanouchi, M.Tasumi
64 J. Mol. Biol. 112 pp. 535-542 (1977)
65 http://www.pdb.org/.
66 """
67
68 alternative_download_url = "http://www.rcsb.org/pdb/files/"
69
70
71
72 - def __init__(self,server='ftp://ftp.wwpdb.org', pdb=os.getcwd(), obsolete_pdb=None):
73 """Initialize the class with the default server or a custom one."""
74
75 self.pdb_server = server
76
77
78 self.local_pdb = pdb
79
80
81 if obsolete_pdb:
82 self.obsolete_pdb = obsolete_pdb
83 else:
84 self.obsolete_pdb = self.local_pdb + os.sep + 'obsolete'
85 if not os.access(self.obsolete_pdb,os.F_OK):
86 os.makedirs(self.obsolete_pdb)
87
88
89 self.overwrite = 0
90 self.flat_tree = 0
91
92
94 """Retrieves a list of pdb codes in the weekly pdb status file
95 from the given URL. Used by get_recent_files.
96
97 Typical contents of the list files parsed by this method;
98 -rw-r--r-- 1 rcsb rcsb 330156 Oct 14 2003 pdb1cyq.ent
99 -rw-r--r-- 1 rcsb rcsb 333639 Oct 14 2003 pdb1cz0.ent
100 """
101 url = urllib.urlopen(url)
102 file = url.readlines()
103 list = []
104
105
106 list = map(lambda x: x[3:7], \
107 filter(lambda x: x[-4:] == '.ent', \
108 map(lambda x: x.split()[-1], file)))
109 return list
110
111
113 """Returns three lists of the newest weekly files (added,mod,obsolete).
114
115 Reads the directories with changed entries from the PDB server and
116 returns a tuple of three URL's to the files of new, modified and
117 obsolete entries from the most recent list. The directory with the
118 largest numerical name is used.
119 Returns None if something goes wrong.
120
121 Contents of the data/status dir (20031013 would be used);
122 drwxrwxr-x 2 1002 sysadmin 512 Oct 6 18:28 20031006
123 drwxrwxr-x 2 1002 sysadmin 512 Oct 14 02:14 20031013
124 -rw-r--r-- 1 1002 sysadmin 1327 Mar 12 2001 README
125
126
127 """
128 url = urllib.urlopen(self.pdb_server+'/pub/pdb/data/status/')
129 file = url.readlines()
130
131 try:
132
133 recent = filter(lambda x: x.isdigit(), \
134 map(lambda x: x.split()[-1], file))[-1]
135
136 path = self.pdb_server+'/pub/pdb/data/status/%s/'%(recent)
137
138 added = self.get_status_list(path+'added.pdb')
139 modified = self.get_status_list(path+'modified.pdb')
140 obsolete = self.get_status_list(path+'obsolete.pdb')
141 return [added,modified,obsolete]
142 except:
143 return None
144
145
146
148 """Retrieves a big file containing all the
149 PDB entries and some annotation to them.
150 Returns a list of PDB codes in the index file.
151 """
152 entries = []
153 warnings.warn("retrieving index file. Takes about 5 MB.")
154 url = urllib.urlopen(self.pdb_server+'/pub/pdb/derived_data/index/entries.idx')
155
156 entries = map(lambda x: x[:4], \
157 filter(lambda x: len(x)>4, url.readlines()[2:]))
158
159 return entries
160
161
162
164 """Returns a list of all obsolete entries ever in the PDB.
165
166 Returns a list of all obsolete pdb codes that have ever been
167 in the PDB.
168
169 Gets and parses the file from the PDB server in the format
170 (the first pdb_code column is the one used).
171 LIST OF OBSOLETE COORDINATE ENTRIES AND SUCCESSORS
172 OBSLTE 30-SEP-03 1Q1D 1QZR
173 OBSLTE 26-SEP-03 1DYV 1UN2
174 """
175 url = urllib.urlopen(self.pdb_server+'/pub/pdb/data/status/obsolete.dat')
176
177 obsolete = map(lambda x: x[21:25].lower(),
178 filter(lambda x: x[:6] == 'OBSLTE', url.readlines()))
179
180 return obsolete
181
182
183
184 - def retrieve_pdb_file(self,pdb_code, obsolete=0, compression='.gz',
185 uncompress="gunzip", pdir=None):
186 """Retrieves a PDB structure file from the PDB server and
187 stores it in a local file tree.
188 The PDB structure is returned as a single string.
189 If obsolete is 1, the file will be by default saved in a special file tree.
190 The compression should be '.Z' or '.gz'. 'uncompress' is
191 the command called to uncompress the files.
192
193 @param pdir: put the file in this directory (default: create a PDB-style directory tree)
194 @type pdir: string
195
196 @return: filename
197 @rtype: string
198 """
199
200 code=pdb_code.lower()
201 filename="pdb%s.ent%s"%(code,compression)
202 if not obsolete:
203 url=(self.pdb_server+
204 '/pub/pdb/data/structures/divided/pdb/%s/pdb%s.ent%s'
205 % (code[1:3],code,compression))
206 else:
207 url=(self.pdb_server+
208 '/pub/pdb/data/structures/obsolete/pdb/%s/pdb%s.ent%s'
209 % (code[1:3],code,compression))
210
211
212 if pdir is None:
213 if self.flat_tree:
214 if not obsolete:
215 path=self.local_pdb
216 else:
217 path=self.obsolete_pdb
218 else:
219
220 if not obsolete:
221 path=self.local_pdb+os.sep+code[1:3]
222 else:
223 path=self.obsolete_pdb+os.sep+code[1:3]
224 else:
225
226 path=pdir
227
228 if not os.access(path,os.F_OK):
229 os.makedirs(path)
230
231 filename=path+os.sep+filename
232
233 final_file=path+os.sep+"pdb%s.ent" % code
234
235
236 if not self.overwrite:
237 if os.path.exists(final_file):
238 warnings.warn("file exists, not retrieved %s" % final_file,
239 RuntimeWarning)
240 return final_file
241
242
243 warnings.warn('retrieving %s' % url)
244 lines=urllib.urlopen(url).read()
245 open(filename,'wb').write(lines)
246
247 os.system("%s %s" % (uncompress, filename))
248
249 return final_file
250
251
253 """
254 I guess this is the 'most wanted' function from this module.
255 It gets the weekly lists of new and modified pdb entries and
256 automatically downloads the according PDB files.
257 You can call this module as a weekly cronjob.
258 """
259 changes = self.get_recent_changes()
260 new = changes[0]
261 modified = changes[1]
262 obsolete = changes[2]
263
264 for pdb_code in new+modified:
265 try:
266 warnings.warn('retrieving %s' % pdb_code)
267 self.retrieve_pdb_file(pdb_code)
268 except:
269 warnings.warn('error %s' % pdb_code, RuntimeWarning)
270
271
272
273
274 for pdb_code in obsolete:
275 if self.flat_tree:
276 old_file = self.local_pdb + os.sep + 'pdb%s.ent'%(pdb_code)
277 new_file = self.obsolete_pdb + os.sep + 'pdb%s.ent'%(pdb_code)
278 else:
279 old_file = self.local_pdb + os.sep + pdb_code[1:3] + os.sep + 'pdb%s.ent'%(pdb_code)
280 new_file = self.obsolete_pdb + os.sep + pdb_code[1:3] + os.sep + 'pdb%s.ent'%(pdb_code)
281 shutil.move(old_file, new_file)
282
283
285 """Retrieves all PDB entries not present in the local PDB copy.
286 Writes a list file containing all PDB codes (optional, if listfile is given).
287 """
288 entries = self.get_all_entries()
289 for pdb_code in entries: self.retrieve_pdb_file(pdb_code)
290
291
292 if listfile:
293 open(listfile,'w').writelines(map(lambda x: x+'\n',entries))
294
295
297
298 """Retrieves all obsolete PDB entries not present in the local obsolete PDB copy.
299 Writes a list file containing all PDB codes (optional, if listfile is given).
300 """
301 entries = self.get_all_obsolete()
302 for pdb_code in entries: self.retrieve_pdb_file(pdb_code,obsolete=1)
303
304
305 if listfile:
306 open(listfile,'w').writelines(map(lambda x: x+'\n',entries))
307
308
309
310
311
312
313
315 """Retrieves a (big) file containing all the sequences
316 of PDB entries and writes it to a file."""
317 warnings.warn("retrieving sequence file. Takes about 15 MB.")
318 url = urllib.urlopen(self.pdb_server+'/pub/pdb/derived_data/pdb_seqres.txt')
319 file = url.readlines()
320 open(savefile,'w').writelines(file)
321
322
323
324 if __name__ == '__main__':
325
326 import sys
327
328 doc = """PDBList.py
329 (c) Kristian Rother 2003, Contributed to BioPython
330
331 Usage:
332 PDBList.py update <pdb_path> [options] - write weekly PDB updates to
333 local pdb tree.
334 PDBList.py all <pdb_path> [options] - write all PDB entries to
335 local pdb tree.
336 PDBList.py obsol <pdb_path> [options] - write all obsolete PDB
337 entries to local pdb tree.
338 PDBList.py <PDB-ID> <pdb_path> [options] - retrieve single structure
339
340 Options:
341 -d A single directory will be used as <pdb_path>, not a tree.
342 -o Overwrite existing structure files.
343 """
344 print doc
345
346 if len(sys.argv)>2:
347 pdb_path = sys.argv[2]
348 pl = PDBList(pdb=pdb_path)
349 if len(sys.argv)>3:
350 for option in sys.argv[3:]:
351 if option == '-d': pl.flat_tree = 1
352 elif option == '-o': pl.overwrite = 1
353
354 else:
355 pdb_path = os.getcwd()
356 pl = PDBList()
357 pl.flat_tree = 1
358
359 if len(sys.argv) > 1:
360 if sys.argv[1] == 'update':
361
362 print "updating local PDB at "+pdb_path
363 pl.update_pdb()
364
365 elif sys.argv[1] == 'all':
366
367 pl.download_entire_pdb()
368
369 elif sys.argv[1] == 'obsol':
370
371 pl.download_obsolete_entries(pdb_path)
372
373 elif re.search('^\d...$',sys.argv[1]):
374
375 pl.retrieve_pdb_file(sys.argv[1],pdir=pdb_path)
376