1 """various EUtils datatypes"""
2
3 from __future__ import generators
4
5 import re, types
6
8 """Base class for all EUtils-specific errors
9
10 Contains a single error string -- use str(err) to get it.
11 """
12 pass
13
15 """Used when the ESearch XML says there is an ERROR
16
17 The main error is in err.errmsg but more information
18 may be available in err.errors or err.warnings. Eg,
19 the error message is often "Can't run executor" but
20 you can get more information from the list of errors.
21
22 """
23 - def __init__(self, errmsg, errors = None, warnings = None):
33 return "%s(%r, %r, %r)" % (self.__class__.__name__,
34 self.errmsg, self.errors, self.warnings)
36 s = self.errmsg
37 if self.errors:
38 s = s + "; ERRORS: " + ", ".join(map(str, self.errors))
39 if self.warnings:
40 s = s + "; WARNINGS: " + ", ".join(map(str, self.warnings))
41 return s.encode("latin1")
42
43
44
45
47 """Store a list of identifiers for a database
48
49 This is used as input for the '*_using_dbids' functions.
50
51 Constructed with the database name and list of identifier strings.
52
53 """
55 """db, ids
56
57 'db' -- the database for those identifiers
58 'ids' -- a list of identifiers for the given database
59 """
60 self.db = db
61 self.ids = ids
63 """number of identifers"""
64 return len(self.ids)
66 """get an identifier or a subset of the DBIds"""
67 if isinstance(i, types.SliceType):
68
69
70 step = i.step
71 start = i.start
72 if start is None: start = 0
73 stop = i.stop
74 if stop is None: stop = len(self.ids)
75 if step is None:
76 return self.__class__(self.db, self.ids[start:stop])
77 else:
78 return self.__class__(self.db, self.ids[start:stop:step])
79
80
81 return self.ids[i]
83 """Get a DBIds containing the item at position i
84
85 Can't use dbids[i] since that returns only the identifier.
86 This returns a DBIds, which can be used for another request.
87 """
88 return self.__class__(self.db, [self.ids[i]])
89
91 """Iterate over the list of identifiers"""
92 return iter(self.ids)
94 return "DBIds(%r, %r)" % (self.db, self.ids)
96 """does this DBIds equal the other?
97
98 The database names must match, but the identifiers
99 themselves can be in any order.
100 """
101 if self.ids == other.ids:
102 return self.db == other.db
103 if self.db != other.db:
104 return 0
105
106
107
108 d1 = {}
109 for x in self.ids:
110 d1[x] = 0
111 d2 = {}
112 for x in other.ids:
113 d2[x] = 0
114 return d1 == d2
116 """check if this isn't equal to the other DBIds"""
117 return not self == other
118
120 """DBIds of the identifiers in this set which aren't in the other"""
121 if self.db != other.db:
122 raise TypeError("Different databases: %r and %r" % (
123 self.db, other.db))
124 other_d = {}
125 for x in other.ids:
126 other_d[x] = 0
127 new_ids = [x for x in self.ids if x not in other_d]
128 return DBIds(self.db, new_ids)
129
131 """Restrict a search to matches in the last N days
132
133 Eg, to see what's been published in PubMed about rabies
134 in the last 20 days.
135
136 client.search("rabies", daterange = WithinNDays(20, "pdat")
137 """
138 - def __init__(self, ndays, datetype = None):
139 """ndays, datetype = None
140
141 'ndays' -- within this many days of now (the 'reldate' field
142 of a search)
143 'datetype' -- the date field to use (defaults to Entrez date,
144 which is "edat")
145 """
146 self.ndays = ndays
147 self.datetype = datetype
149 """returns the fields to add to the EUtils query
150
151 This is an internal implementation feature you can ignore.
152 """
153 return {"reldate": self.ndays,
154 "datetype": self.datetype}
155
156
157 _date_re_match = re.compile(r"\d{4}(/\d\d(/\d\d)?)?$").match
158
160 """Restrict a search to matches within a date range
161
162 Some examples:
163 matches between 1995 and 2000 -- DateRange("1995", "1999/12/31")
164 matches before 1990 -- DateRange(maxdate = "1990/01/01")
165 matches in 2002 or later -- DateRange(mindate = "2002/01/01")
166 matches in June or July of 2001 -- DateRange("2001/06", "2001/07")
167
168 """
169 - def __init__(self, mindate = None, maxdate = None, datetype = None):
170 """mindate = None, maxdate = None, datetype = None
171
172 'mindate' -- matches must be on or after this date
173 'maxdate' -- matches must be on or before this date
174 'datetype' -- the date field to use for the search (defaults
175 to Entrez date, which is "edat")
176
177 At least one of mindate or maxdate must be specified.
178 If mindate is omitted, all results on or before maxdate are returned.
179 If maxdate is omitted, all results on or after mindate are returned.
180
181 Dates must be formatted as 'YYYY/MM/DD', 'YYYY/MM', or 'YYYY'.
182 """
183 if mindate is None and maxdate is None:
184 raise TypeError("Must specify at least one of mindate or maxdate")
185
186 errinfo = None
187 if mindate is not None and _date_re_match(mindate) is None:
188 errinfo = ("mindate", mindate)
189 elif maxdate is not None and _date_re_match(maxdate) is None:
190 errinfo = ("maxdate", maxdate)
191 if errinfo:
192 raise TypeError(
193 "%s is not in YYYY/MM/DD format (month and "
194 "day are optional): %r" % errinfo)
195 self.mindate = mindate
196 self.maxdate = maxdate
197 self.datetype = datetype
198
200 """returns the fields to add to the EUtils query
201
202 This is an internal implementation feature you can ignore.
203 """
204 return {"mindate": str(self.mindate),
205 "maxdate": str(self.maxdate),
206 "datetype": self.datetype}
207
208
209
211 """Base class for the Expression given in the eSearch output
212
213 NCBI does some processing on the request. They return the
214 translated expression as part of the search results. To get the
215 expression as an Entrez string, use str(expression).
216
217 iter(expression) traverses the expression tree in postfix order.
218 """
220 """intersection of two expressions"""
221 return And(self, other)
223 """union of two expressions"""
224 return Or(self, other)
226 """Traverse the tree in postfix order"""
227 raise NotImplementedError
228
229 -class Term(Expression):
230 """Information about an Expression Term, which is the leaf node
231
232 The fields are:
233 term -- a word from the search term
234 field -- the field searched by this term
235 count -- the number of records matching this word
236 explode -- no idea
237 """
238 - def __init__(self, term, field, count, explode):
239 self.term = term
240 self.field = field
241 self.count = count
242 self.explode = explode
246 """Traverse the tree in postfix order"""
247 yield self
248
250 """Base class for binary expressions. Has a left and a right child"""
252 self.left = left
253 self.right = right
255 """Traverse the tree in postfix order"""
256 for x in self.left:
257 yield x
258 for x in self.right:
259 yield x
260 yield self
261
262
263
264
265 -class And(BinaryOp):
266 """intersection of two subexpressions"""
268 return "(%s AND %s)" % (self.left, self.right)
269
271 """union two subexpressions"""
273 return "(%s OR %s)" % (self.left, self.right)
274
275
276 -class Not(BinaryOp):
277 """the set of the left child without elements from the right child
278
279 This is used for something like "poliovirus NOT polio"
280 """
282 return "(%s NOT %s)" % (self.left, self.right)
283
285 """Used to store a date range"""
287 if left.field != right.field:
288 raise TypeError("dates must have the same field: %r and %r" %
289 (left.field, right.field))
290 BinaryOp.__init__(self, left, right)
291
293 i = self.left.term.rfind("[")
294 if i == -1:
295 i = len(self.left.term)
296 x = self.left.term[:i]
297
298 i = self.right.term.rfind("[")
299 if i == -1:
300 i = len(self.right.term)
301 y = self.right.term[:i]
302
303 return "%s:%s[%s]" % (x, y, self.left.field)
304
305
306
308 """Store results from a database search
309
310 Attributes are:
311 count -- total number of matches to the query
312 retmax -- total number of identifiers requested
313 retstart -- a search can return a portion of the total
314 number of results. retstart is the offset into this list
315 ids -- matching identifiers (may be a subset of the full list)
316 translation_set -- dict mapping an input name to the canonical
317 form prefered by NCBI
318 expression -- the full equery as understood by NCBI
319 webenv -- the WebEnv string (if use_history is set)
320 query_key -- the query_key (if use_history is set)
321 errors -- list of Problems in the ErrorList
322 warnings -- list of Problems in the WarningList
323 timestamp -- timestamp (from time.time()) when this record
324 was received from the server.
325
326 Returns a list of identifers instead of a DBIds because the output
327 from NCBI's eSearch doesn't include the database name.
328 """
329 - def __init__(self,
330 count, retmax, retstart, ids,
331 translation_set, expression,
332 webenv, query_key, errors,
333 warnings, timestamp):
334 self.count = count
335 self.retmax = retmax
336 self.retstart = retstart
337 self.ids = ids
338 self.translation_set = translation_set
339 self.expression = expression
340 self.webenv = webenv
341 self.query_key = query_key
342 self.errors = errors
343 self.warnings = warnings
344 self.timestamp = timestamp
345
347 """Store the results of a Post
348
349 Attributes are:
350 webenv -- the WebEnv string
351 query_key -- the query_ket
352 timestamp -- timestamp (from time.time()) when this record
353 was received from the server.
354 """
355 - def __init__(self, webenv, query_key, invalid_ids, timestamp):
356 self.webenv = webenv
357 self.query_key = query_key
358 self.invalid_ids = invalid_ids
359 self.timestamp = timestamp
360
362 """Store information from calling eSummary
363
364 Attributes are:
365 id -- the identifier string for this record
366 dataitems -- an OrderedDictList containing the parsed Item
367 elements for this Summary.
368 """
370 self.id = id
371 self.dataitems = dataitems
373 return "Summary(%r, %r)" % (self.id, self.dataitems)
375 return "<Summary id=%s, %s>" % (self.id, self.dataitems)
376
377
379 """Allow simple Date storage
380
381 Parameters and attributes are 'year', 'month', and 'day'
382 """
384 self.year = year
385 self.month = month
386 self.day = day
388 return "%s(%r, %r, %r)" % (self.__class__.__name__,
389 self.year, self.month, self.day)
391 return "%4d/%02d/%02d" % (self.year, self.month, self.day)
393 """Return the 9-tuple needed by various time functions"""
394
395
396 return (self.year, self.month, self.day, 0, 0, 0, 0, 0, -1)
398 """Are these two times equal?"""
399 return (self.year == other.year and
400 self.month == other.month and
401 self.day == other.day)
403 """Are these two times dissimilar?"""
404 return not self == other
405
406
407
408
409
410
411
412
414 """Base class for Search Errors or Warnings
415
416 A problem has:
417 text -- the text of the problem
418 severity -- either Problem.ERROR or Problem.WARNING
419 category -- how NCBI categorizes this problem
420 """
421 ERROR = "ERROR"
422 WARNING = "WARNING"
430 return not self == other
432 return "%s(%r)" % (self.__class__.__name__, self.text)
434 return str(self.text)
435
438
441
444
448
451
454
457
459 """Internal: make a map from category name (in XML) to the right class"""
460 mapping = {}
461 for v in globals().values():
462 try:
463 if issubclass(v, Problem) and hasattr(v, "category"):
464 mapping[v.category] = v
465 except TypeError:
466 pass
467 return mapping
468
469 problem_category_mapping = _build_problem_mapping()
470
471
472
474 """Store neighbor Link information for a given record
475
476 Attributes are;
477 id -- the identifier used as the input for the neighbor request
478 score -- the amount of similarity, high numbers are better
479 """
486 return not self == other
488 return "Link(%r, %r)" % (self.id, self.score)
489
491 """Store results from an lcheck link
492
493 Attributes are:
494 id -- the id of the requested record
495 has_linkout -- boolean, either it does or doesn't
496 has_neighbor -- boolean, either it does or doesn't
497 """
498 - def __init__(self, id, has_linkout = 0, has_neighbor = 0):
499 self.id = id
500 self.has_linkout = has_linkout
501 self.has_neighbor = has_neighbor
503 return (self.id == other.id and
504 self.has_linkout == other.has_linkout and
505 self.has_neighbor == other.has_neighbor)
507 return not self == other
509 return "IdCheck(%r, %r, %r)" % (self.id, self.has_linkout, self.has_neighbor)
510
512 """Used in eLink with cmd == neighbor
513
514 Attributes are:
515 dbto -- the links are TO this database name
516 linkname -- the name for this set (eg, "pubmed_protein")
517 links -- list of Links, one per matching record (includes score)
518 List order is the sames as the XML, which is ordered from
519 most likely to least. The identifer is from 'dbto'
520 info -- ignored; this is only used as a warning when there is
521 an empty list
522
523 You can also use
524 dbids -- get a DBIds of dbto and the identifiers in each Link
525 """
526 - def __init__(self, dbto, linkname, links = None, info = None):
527 if links is None:
528 if info is None:
529 raise TypeError("At least one of 'links' and 'info' must be set")
530 links = []
531 self.dbto = dbto
532 self.linkname = linkname
533 self.links = links
534
537 dbids = property(_get_dbids)
538
540 return (self.dbto == other.dbto and
541 self.linkname == other.linkname and
542 self.links == other.links)
544 return not self == other
546 return "LinkSetDb(%r, %r, %r)" % (self.dbto, self.linkname, self.links)
547
549 """Results from an eLink neighbor search
550
551 Attributes are:
552 dbids -- the DBIds of the *REQUESTED* identifiers
553 linksetdbs -- an OrderedMultiDict of LinkSetDb objects
554
555 """
557 self.dbids = dbids
558 self.linksetdbs = linksetdbs
560 return (self.dbids == other.dbids and
561 self.linksetdbs == other.linksetdbs)
563 return not self == other
564
566 return "NeighborLinkSet(%r, %r)" % (self.dbids, self.linksetdbs)
567
568
570 """Results from 'ncheck' and 'lcheck' searches
571
572 This is used to check if a set of records has neighbors
573 or links.
574
575 Attributes are:
576 dbfrom -- the database containing those records
577 idchecks -- list of IdCheck objects, one per id
578
579 dbids -- the DBIds make from dbfrom and the idchecks
580 """
582 self.dbfrom = dbfrom
583 self.idchecks = idchecks
584
586 return DBIds(self.dbfrom, [idcheck.id for idcheck in self.idchecks])
587 dbids = property(_get_dbids)
588
590 return (self.dbfrom == other.dbfrom and
591 self.idchecks == other.idchecks)
593 return not self == other
595 return "CheckLinkSet(%r, %r)" % (self.dbfrom, self.idchecks)
596
597
598
600 """The Provider, as listed in 'llinks' (LinkOut)
601
602 Attributes are:
603 name -- name of the provider
604 name_abbr -- an abbreviated name for the provider
605 id -- a unique id for the provider
606 url -- where to go for more information about the provider
607 icon_url -- a small image to use for the provider
608
609 """
610 - def __init__(self, name, name_abbr, id,
611 url = None, icon_url = None):
612 self.name = name
613 self.name_abbr = name_abbr
614 self.id = id
615 self.url = url
616 self.icon_url = icon_url
618 return (self.name == other.name and
619 self.name_abbr == other.name_abbr and
620 self.id == other.id and
621 self.url == other.url and
622 self.icon_url == other.icon_url)
624 return not self == other
626 return "Provider(%r, %r, %r, %r, %r)" % (
627 self.name, self.name_abbr, self.id, self.url, self.icon_url)
628
629
631 """The ObjUrl containing LinkOut information for a record
632
633 Attributes are:
634 subject_types -- list of strings describing this link (0 or more)
635 provider -- a Provider instance
636 linkname -- a name used to categorize this link (optional)
637 attributes -- list of attributes (text strings), (0 or more)
638 url -- URL of the link (optional)
639 iconurl -- URL containing image for this link (optional)
640 """
641 - def __init__(self, subject_types, provider,
642 linkname = None, url = None, attributes = None):
643 assert isinstance(subject_types, list)
644 self.subject_types = subject_types
645 self.provider = provider
646 self.linkname = linkname
647 if attributes is None:
648 attributes = []
649 self.url = url
650 self.attributes = attributes
652 return (self.linkname == other.linkname and
653 self.subject_types == other.subject_types and
654 self.url == other.url and
655 self.attributes == other.attributes and
656 self.provider == other.provider)
658 return not self == other
660 return "ObjUrl(%r, %r, %r, %r, %r)" % (
661 self.subject_types, self.provider, self.linkname,
662 self.url, self.attributes)
663
665 """Set of ObjUrls for the record with the given 'id'"""
667 self.id = id
668 self.objurls = objurls
670 return (self.id == other.id and
671 self.objurls == other.objurls)
673 return not self == other
675 return "IdUrlSet(%r, %r)" % (self.id, self.objurls)
676
678 """Results of an 'llink' (LinkOut) search
679
680 Finds links from records in a given database to external
681 resources.
682
683 Fields are:
684 dbfrom -- the database in which search started
685 idurlset -- a list of IdUrlSet, one for each identifier
686 """
687
689 self.dbfrom = dbfrom
690 self.idurlset = idurlset
692 return (self.dbfrom == other.dbfrom and
693 self.idurlset == other.idurlset)
695 return not self == other
697 return "LinksLinkSet(%r, %r)" % (self.dbfrom, self.idurlset)
698