Package Bio :: Module EZRetrieve
[hide private]
[frames] | no frames]

Source Code for Module Bio.EZRetrieve

 1  """This module contains code to access EZRetrieve (DEPRECATED). 
 2   
 3  This module is now deprecated, and will be removed in a future release of 
 4  Biopython. 
 5   
 6  This is a very simple interface to the EZRetrieve website described in: 
 7   
 8  Zhang, H., Ramanathan, Y., Soteropoulos, P., Recce, M., and Tolias, P.P. (2002). 
 9  EZ-Retrieve: A web-server for batch retrieval of coordinate-specified human 
10  DNA sequences and underscoring putative transcription factor-binding sites. 
11  Nucl. Acids. Res. 2002 30: e121. 
12  http://dx.doi.org/10.1093/nar/gnf120 
13   
14  Functions: 
15  retrieve_single  Retrieve a single sequence from EZRetrieve. 
16  parse_single     Parse the results from EZRetrieve into FASTA format. 
17  """ 
18   
19  import warnings 
20  warnings.warn("Bio.EZRetrieve is deprecated, and will be removed in a future"\ 
21                " release of Biopython.  If you want to continue to use this"\ 
22                " code, please get in contact with the Biopython developers"\ 
23                " via the mailing lists to avoid its permanent removal from"\ 
24                " Biopython.", DeprecationWarning) 
25   
26 -def retrieve_single(id, from_, to, retrieve_by=None, organism=None, 27 parse_results=1):
28 import urllib 29 30 CGI = "http://siriusb.umdnj.edu:18080/EZRetrieve/single_r_run.jsp" 31 org2value = {"Hs" : "0", "Mm" : "1", "Rn" : 2} 32 organism = organism or "Hs" 33 assert organism in org2value 34 35 acctype2value = {"genbank":0, "unigene":1, "locuslink":2, "image":3} 36 retrieve_by = retrieve_by or "GenBank" 37 retrieve_by = retrieve_by.lower() 38 assert retrieve_by in acctype2value 39 40 params = { 41 "input" : str(id), 42 "from" : str(from_), 43 "to" : str(to), 44 "org" : org2value[organism], 45 "AccType" : acctype2value[retrieve_by], 46 } 47 options = urllib.urlencode(params) 48 handle = urllib.urlopen(CGI, options) 49 if parse_results: 50 results = parse_single(handle) 51 else: 52 results = handle.read() 53 return results
54
55 -def parse_single(handle):
56 """Return a FASTA-formatted string for the sequence. May raise an 57 AssertionError if there was a problem retrieving the sequence. 58 59 """ 60 import re 61 results = handle.read() 62 lresults = results.lower() 63 64 i = results.find("Error: ") 65 if i >= 0: 66 j = lresults.index("<br>", i) 67 errmsg = results[i:j].strip() 68 raise AssertionError(errmsg) 69 70 i = lresults.find("<b>>") 71 assert i >= 0, "Couldn't find sequence." 72 j = lresults.find("<br><br>", i) 73 seqdata = results[i:j] 74 reobj = re.compile(r"<[^>]*>", re.IGNORECASE|re.DOTALL) 75 seqdata = reobj.sub("", seqdata) 76 seqdata = re.sub(r"\s+", r"\n", seqdata) 77 seqdata = seqdata.strip() + "\n" 78 return seqdata
79