1
2
3
4
5
6
7 """
8 This file implements a SOAP interface into the Bibliographic database of
9 the European Bioinformatics Institute. It is a low-level interface and is
10 intended to be used by higher-level objects to build object representations
11 from data retrieved by this interface. This file needs a version of the
12 pywebsvcs package LATER than 0.97 available from www.sourceforge.net.
13 """
14
15 import warnings
16 warnings.warn("Bio.biblio is deprecated because it seems to be out of date, "
17 "and no one came forward saying they use this module. If you "
18 "use Bio.biblio, please join the biopython mailing list and "
19 "email us.",
20 DeprecationWarning)
21
22 import sys
23 import SOAP
24 import copy
25
26
27 SOAP.Config.BuildWithNoType = 1
28 SOAP.Config.BuildWithNoNamespacePrefix = 1
29
30 namespace = 'http://industry.ebi.ac.uk/openBQS'
31
33 """
34 this class provides low-level access to the EBI Bibliographics services exported
35 through SOAP. there exist an almost 1-to-1 mapping between the methods and the
36 RPC's available on the SOAP server.
37 """
38
47
49 if cid == -1:
50 return self.server.getBibRefCount()
51 else:
52 return self.server.getBibRefCount(cid)
53
54 - def find(self, cid, keywords, attrs, criteria):
59
61 if cid == -1:
62 raise 'no collection id'
63 self.server.resetRetrieval(cid)
64
66 if cid == -1:
67 raise 'no collection id'
68 return self.server.hasNext(cid)
69
71 if cid == -1:
72 raise 'no collection id'
73 return self.server.getNext(cid)
74
76 if cid == -1:
77 raise 'no collection id'
78 if cnt <= 0:
79 raise 'invalid count' + cnt
80 return self.server.getMore(cid, cnt)
81
83 if cid == -1:
84 raise 'no collection id (large result safeguard)'
85 return self.server.getAllIDs(cid)
86
88 if cid == -1:
89 raise 'no collection id (large result safeguard)'
90 return self.server.getAllBibRefs(cid)
91
93 return self.server.getById(id)
94
96 if cid == -1:
97 raise 'no collection id'
98 return self.server.exists(cid)
99
101 if cid == -1:
102 raise 'no collection id'
103 self.server.destroy(cid)
104
106 return self.server.getAllVocabularyNames()
107
109 return self.server.getAllValues(vocab)
110
111 - def get_entry_description(self, vocab, entry):
112 return self.server.getEntryDescription(vocab, entry)
113
115 return self.server.contains(vocab, entry)
116
117
119 """
120 this class attempts to hide the concept of a collection id from users. each
121 find action's results are grouped in the server under a unique collection id.
122 this id could be used in subsequent calls to refine its content more by
123 entering more specific search criteria. it can also return a new collection
124 by using the subcollection method. each collection has its own current
125 collection id as returned by the SOAP server by using the lower level Biblio
126 class's services and it takes care of freeing this collection in the server
127 upon destruction.
128 """
129
133
136
138 if self.cid == -1:
139 raise 'no collection id (use find)'
140 return self.cid
141
144
145 - def refine(self, keywords, attrs, criteria):
150
155
158
161
163 if self.cid == -1:
164 return 0
165 return self.biblio.exists(self.cid)
166
168 if self.cid == -1:
169 return
170 self.biblio.destroy(self.cid)
171
173 if idx-1 > len(sys.argv):
174 raise 'argument expected at position %d for option %s' % (idx, msg)
175
177 """
178 this function implements a command-line utility using the classes implemented
179 in this file above and serves as base-line access software to query the BQS.
180 """
181
182
183 serverurl = 'http://industry.ebi.ac.uk/soap/openBQS'
184
185
186 try:
187 sys.argv.index('-h')
188 except:
189 pass
190 else:
191 print """
192 usage: biblio.py [options] [- [finds]]
193 where options may be:
194 -l <server URL> to change the server URL
195 (the default URL is %s)
196 -g <citation ID> to get the XML version of a citation
197 -c to obtain the size of a citation collection with each refinement
198 -a to retrieve the citations in a collection instead of showing only
199 their citation id's
200 -f <prefix> to specify the location whereto dump citations (implies -a)
201 found in a collection
202 -o get citations one-by-one i.e. each will end up in its own file if used
203 in conjunction with -f
204 -Vn to get the vocabulary names in the database
205 -Vv <vocabulary> to get all antries for vocabulary
206 -Vd <vocabulary> <entry> to get description for vocabulary entry
207 -Ve <vocabulary> <entry> to determine whether vocabulary entry exist
208 and finds are any number of successive occurrences of the following:
209 -find <keyword> [-attr <attribute>]
210 where each new find occurrence refines the result of the previous
211 examples of using this script is:
212 biblio.py -l http://192.168.0.163:8123 -g 21322295
213 biblio.py -g 21322295
214 biblio.py -a - -find study -find gene
215 biblio.py -f genestudies - -find study -find gene
216 biblio.py -f brazma - -find brazma -attr author
217 biblio.py -Vn
218 biblio.py -Vv MEDLINE/Person/properties
219 biblio.py -Vd MEDLINE/Person/properties LAST_NAME
220 biblio.py -Ve MEDLINE/Person/properties LAST_NAME
221 """ % serverurl
222 sys.exit
223
224
225 idx = 0
226 try:
227 idx = sys.argv.index('-l')
228 except:
229 pass
230 else:
231 checkargv(idx+1, '-l')
232 serverurl = sys.argv[idx+1]
233 server = Biblio(serverurl)
234
235
236
237
238 try:
239 idx = sys.argv.index('-g')
240 except:
241 pass
242 else:
243 checkargv(idx+1, '-g')
244 print server.get_by_id(sys.argv[idx+1])
245
246
247 showsize = 0
248 try:
249 idx = sys.argv.index('-c')
250 except:
251 pass
252 else:
253 print 'total number of citations ->', server.get_count()
254 showsize = 1
255
256
257 fetch = 0
258 try:
259 idx = sys.argv.index('-a')
260 except:
261 pass
262 else:
263 fetch = 1
264
265
266 prefix = None
267 try:
268 idx = sys.argv.index('-f')
269 except:
270 pass
271 else:
272 checkargv(idx+1, '-f')
273 prefix = sys.argv[idx+1]
274 fetch = 1
275
276
277 indiv = 0
278 try:
279 idx = sys.argv.index('-o')
280 except:
281 pass
282 else:
283 checkargv(idx+1, '-o')
284 indiv = 1
285
286
287 try:
288 idx = sys.argv.index('-Vn')
289 except:
290 pass
291 else:
292 checkargv(idx+1, '-Vn')
293 vocab = server.get_vocabulary_names()
294 if len(vocab) > 0:
295 print 'the vocabulary names are:'
296 else:
297 print 'there is no names in the vocabulary.'
298 for v in vocab:
299 print v
300
301
302 try:
303 idx = sys.argv.index('-Vv')
304 except:
305 pass
306 else:
307 checkargv(idx+1, '-Vv')
308 values = server.get_all_values(sys.argv[idx+1])
309 if len(values) > 0:
310 print 'the vocabulary entries for %s are:' % sys.argv[idx+1]
311 else:
312 print 'there is no entries in the vocabulary %s.' % sys.argv[idx+1]
313 for v in values:
314 print v
315
316
317 try:
318 idx = sys.argv.index('-Vd')
319 except:
320 pass
321 else:
322 checkargv(idx+1, '-Vd name')
323 checkargv(idx+2, '-Vd entry')
324 print server.get_entry_description(sys.argv[idx+1], sys.argv[idx+2])
325
326
327 try:
328 idx = sys.argv.index('-Ve')
329 except:
330 pass
331 else:
332 checkargv(idx+1, '-Ve name')
333 checkargv(idx+2, '-Ve entry')
334 if server.contains(sys.argv[idx+1], sys.argv[idx+2]):
335 print 'entry %s::%s exists.' % (sys.argv[idx+1], sys.argv[idx+2])
336 else:
337 print 'entry %s::%s doesn\'t exists.' % (sys.argv[idx+1], sys.argv[idx+2])
338
339
340 base = 0
341 try:
342 idx = sys.argv.index('-')
343 except:
344 sys.exit
345 else:
346 base = idx
347
348
349 collection = BiblioCollection(server)
350 while 1:
351 attrs = ''
352 keys = ''
353 try:
354 idx = sys.argv[base:].index('-find')
355 except:
356 break
357 else:
358 checkargv(base+idx+1, '-find')
359 keys = sys.argv[base+idx+1]
360 if len(sys.argv[base+idx+1:]) > 1:
361 if sys.argv[base+idx+2] == '-attr':
362 checkargv(base+idx+3, '-attr')
363 attrs = sys.argv[base+idx+3]
364 if fetch:
365 collection.refine(keys, attrs, '')
366 else:
367 print 'search with:', keys, attrs
368 collection.refine(keys, attrs, '')
369 print 'collection ->', collection.get_collection_id()
370 if showsize:
371 print 'collection size is ->', collection.get_count()
372 ids = collection.get_all_ids()
373 if len(ids) > 0:
374 print 'citations in collection ->'
375 else:
376 print 'no citations in collection.'
377 for id in ids:
378 print id
379 base = base+idx+1
380
381 if fetch:
382 if prefix != None:
383 if indiv:
384 ids = collection.get_all_ids()
385 for id in ids:
386 print 'saving %s ...' % id
387 fn = prefix + '-' + id + '.xml'
388 try:
389 f = open(fn, 'w')
390 except:
391 print 'failed to open %s.' % fn
392 else:
393 f.write(server.get_by_id(id))
394 f.close()
395 else:
396 fn = prefix + '.xml'
397 try:
398 f = open(fn, 'w')
399 except:
400 print 'failed to open %s.' % fn
401 else:
402 f.write(collection.get_all())
403 f.close()
404 else:
405 print collection.get_all()
406
407 if __name__ == "__main__":
408 main()
409