Package Bio :: Package LocusLink
[hide private]
[frames] | no frames]

Source Code for Package Bio.LocusLink

  1  from Bio import File 
  2  import Martel 
  3  from Martel.Dispatch import Dispatcher 
  4  from Martel import RecordReader 
  5  from locus_format import locus_record 
  6   
  7  """Parser for NCBI's LocusLink, curated sequence and descriptive information  
  8  about genetic loci. 
  9   
 10  The LocusLink site is: 
 11  http://www.ncbi.nlm.nih.gov/LocusLink/ 
 12  """ 
 13   
 14  import warnings 
 15  warnings.warn("Bio.LocusLink was deprecated, as NCBI's LocusLink was superceded by Entrez Gene. If you still need this module, please get in touch with the Biopython developers (biopython-dev@biopython.org) to avoid permanent removal of this module", DeprecationWarning) 
 16   
17 -class Record( dict):
18
19 - def __init__( self ):
20 dict.__init__( self )
21
22 - def __str__( self ):
23 queue_keys = self.keys() 24 queue_keys.sort() 25 out = '' 26 for key in queue_keys: 27 out = out + '%s:\n' % key 28 out = out + self.print_item( self[ key ] ) 29 out = out + '\n' 30 return out
31
32 - def print_item( self, item, level = 1 ):
33 indent = ' ' 34 out = '' 35 for j in range( 0, level ): 36 indent = indent + ' ' 37 if( type( item ) == type( '' ) ): 38 if( item != '' ): 39 out = out + '%s%s\n' % ( indent, item ) 40 elif( type( item ) == type([])): 41 for subitem in item: 42 out = out + self.print_item( subitem, level + 1 ) 43 out = out + '\n' 44 elif( isinstance( item, dict ) ): 45 keys = item.keys() 46 keys.sort() 47 for subitem in keys: 48 out = out + '%s %s:\n' % ( indent, subitem ) 49 out = out + self.print_item( item[ subitem ], level + 1 ) 50 out = out + '\n' 51 elif( type( item ) == type( {} ) ): 52 keys = item.keys() 53 keys.sort() 54 for subitem in keys: 55 out = out + '%s %s:\n' % ( indent, subitem ) 56 out = out + self.print_item( item[ subitem ], level + 1 ) 57 out = out + '\n' 58 else: 59 out = out + '%s\n' % str( item ) 60 return out
61
62 -class Iterator:
63 """Iterator interface to move over a file of LocusLink entries one at a time. 64 65 """
66 - def __init__(self, handle, parser = None):
67 """Initialize the iterator. 68 69 Arguments: 70 o handle - A handle with LocusLink entries to iterate through. 71 o parser - An optional parser to pass the entries through before 72 returning them. If None, then the raw entry will be returned. 73 """ 74 self.handle = File.UndoHandle( handle ) 75 self._reader = RecordReader.StartsWith( self.handle, '>>' ) 76 self._parser = parser
77
78 - def next(self):
79 """Return the next LocusLink record from the handle. 80 81 Will return None if we ran out of records. 82 """ 83 data = self._reader.next() 84 if self._parser is not None: 85 if data: 86 dumpfile = open( 'dump', 'w' ) 87 dumpfile.write( data ) 88 dumpfile.close() 89 return self._parser.parse(File.StringHandle(data)) 90 91 return data
92
93 - def __iter__(self):
94 return iter(self.next, None)
95
96 -class _Scanner:
97 """Start up Martel to do the scanning of the file. 98 99 This initialzes the Martel based parser and connects it to a handler 100 that will generate events for a Feature Consumer. 101 """
102 - def __init__(self, debug_level = 0):
103 """Initialize the scanner by setting up our caches. 104 105 Creating the parser takes a long time, so we want to cache it 106 to reduce parsing time. 107 108 Arguments: 109 o debug - The level of debugging that the parser should 110 display. Level 0 is no debugging, Level 2 displays the most 111 debugging info (but is much slower). See Martel documentation 112 for more info on this. 113 """ 114 # a listing of all tags we are interested in scanning for 115 # in the MartelParser 116 self.interest_tags = [ "locus_line", "accnum_block", "phenotype_block", "db_block" ] 117 118 # make a parser that returns only the tags we are interested in 119 expression = Martel.select_names( locus_format.locus_record, self.interest_tags) 120 self._parser = expression.make_parser(debug_level )
121
122 - def feed(self, handle, consumer):
123 """Feeed a set of data into the scanner. 124 125 Arguments: 126 o handle - A handle with the information to parse. 127 o consumer - The consumer that should be informed of events. 128 """ 129 consumer.set_interest_tags( self.interest_tags ) 130 self._parser.setContentHandler( consumer ) 131 # self._parser.setErrorHandler(handle.ErrorHandler()) 132 133 self._parser.parseFile(handle)
134
135 -class _RecordConsumer( Dispatcher ):
136 """Create a LocusLink Record object from scanner generated information. 137 """
138 - def __init__(self):
139 Dispatcher.__init__( self )
140
141 - def startDocument( self ):
142 self.data = Record()
143 144
145 - def set_interest_tags( self, interest_tags ):
146 self.interest_tags = interest_tags
147
148 - def start_locus_line( self, line, attrs ):
149 self.save_characters()
150
151 - def end_locus_line( self, locus_record ):
152 line = self.get_characters() 153 cols = line.split( ':', 1 ) 154 155 key = cols[ 0 ] 156 key = key.strip() 157 newval = cols[ 1 ] 158 newval = newval.strip() 159 if key == 'BUTTON': 160 pass 161 elif not self.data.has_key( key ): 162 self.data[ key ] = newval 163 else: 164 165 val = self.data[ key ] 166 if( type( val ) == type( '' ) ): 167 self.data[ key ] = [ val, newval ] 168 elif( type( val ) == type( [] ) ): 169 val.append( newval ) 170 self.data[ key ] = val
171
172 - def start_accnum_block( self, line, attrs ):
173 self.save_characters()
174
175 - def end_accnum_block( self, locus_record ):
176 block = self.get_characters() 177 self.parse_block( block, 'ACCNUM' )
178
179 - def start_phenotype_block( self, line, attrs ):
180 self.save_characters()
181
182 - def end_phenotype_block( self, locus_record ):
183 block = self.get_characters() 184 self.parse_block( block, 'PHENOTYPE' )
185
186 - def start_db_block( self, line, attrs ):
187 self.save_characters()
188
189 - def end_db_block( self, locus_record ):
190 block = self.get_characters() 191 self.parse_block( block, 'DB' )
192
193 - def parse_block( self, block, block_key ):
194 lines = block.splitlines() 195 entry = {} 196 for line in lines: 197 cols = line.split( ':', 1 ) 198 199 key = cols[ 0 ] 200 key = key.strip() 201 newval = cols[ 1 ] 202 newval = newval.strip() 203 entry[ key ] = newval 204 205 if not self.data.has_key( block_key ): 206 self.data[ block_key ] = [ entry, ] 207 else: 208 209 val = self.data[ block_key ] 210 val.append( entry ) 211 self.data[ block_key ] = val
212
213 -class RecordParser:
214 """Parse LocusLink files into Record objects 215 """
216 - def __init__(self, debug_level = 0):
217 """Initialize the parser. 218 219 Arguments: 220 o debug_level - An optional argument that specifies the amount of 221 debugging information Martel should spit out. By default we have 222 no debugging info (the fastest way to do things), but if you want 223 you can set this as high as two and see exactly where a parse fails. 224 """ 225 self._scanner = _Scanner(debug_level)
226
227 - def parse(self, handle):
228 """Parse the specified handle into an NBRF record. 229 """ 230 self._consumer = _RecordConsumer() 231 self._scanner.feed(handle, self._consumer) 232 return self._consumer.data
233