Package Martel :: Module IterParser
[hide private]
[frames] | no frames]

Source Code for Module Martel.IterParser

  1  # Copyright 2000-2001, Dalke Scientific Software, LLC 
  2  # Distributed under the Biopython License Agreement (see the LICENSE file). 
  3   
  4  """Implement Martel parsers. 
  5   
  6  The classes in this module are used by other Martel modules and not 
  7  typically by external users. 
  8   
  9  There are two major parsers, 'Parser' and 'RecordParser.'  The first 
 10  is the standard one, which parses the file as one string in memory 
 11  then generates the SAX events.  The other reads a record at a time 
 12  using a RecordReader and generates events after each read.  The 
 13  generated event callbacks are identical. 
 14   
 15  At some level, both parsers use "_do_callback" to convert mxTextTools 
 16  tags into SAX events. 
 17   
 18  XXX finish this documentation 
 19   
 20  XXX need a better way to get closer to the likely error position when 
 21  parsing. 
 22   
 23  XXX need to implement Locator 
 24   
 25  """ 
 26  from __future__ import generators 
 27   
 28  import urllib, traceback, sys 
 29  from xml.sax import handler, saxutils 
 30  import Parser, RecordReader 
 31   
 32  try: 
 33      from cStringIO import StringIO 
 34  except ImportError: 
 35      from StringIO import StringIO 
 36   
 37   
38 -class IterRecords:
39 - def __init__(self, record_parser, make_reader, reader_args, marker_tag):
40 self.record_parser = record_parser 41 self.make_reader = make_reader 42 self.reader_args = reader_args 43 self.marker_tag = marker_tag
44
45 - def copy(self):
46 return IterRecords(self.record_parser.copy(), 47 self.make_reader, 48 self.reader_args, 49 self.marker_tag)
50
51 - def iterate(self, source, cont_handler = None):
52 source = saxutils.prepare_input_source(source) 53 file = source.getCharacterStream() or source.getByteStream() 54 return self.iterateFile(file, cont_handler)
55
56 - def iterateString(self, s, cont_handler = None):
57 return self.iterateFile(StringIO(s), cont_handler)
58
59 - def iterateFile(self, fileobj, cont_handler = None):
60 self.start_position = 0 61 if cont_handler is None: 62 import LAX 63 cont_handler = LAX.LAX() 64 self.record_parser.setContentHandler(cont_handler) 65 66 reader = self.make_reader(fileobj, *self.reader_args) 67 while 1: 68 try: 69 rec = reader.next() 70 except RecordReader.ReaderError: 71 raise Parser.ParserPositionException(self.start_position) 72 if rec is None: 73 break 74 self.end_position = self.start_position + len(rec) 75 try: 76 self.record_parser.parseString(rec) 77 except Parser.ParserPositionException, exc: 78 exc += self.start_position 79 raise 80 81 yield cont_handler 82 self.start_position = self.end_position 83 84 fileobj, lookahead = reader.remainder() 85 if lookahead or fileobj.read(1): 86 raise Parser.ParserPositionException(self.start_position)
87 88
89 -class IterHeaderFooter:
90 - def __init__(self, 91 header_parser, make_header_reader, header_args, 92 record_parser, make_record_reader, record_args, 93 footer_parser, make_footer_reader, footer_args, 94 marker_tag):
95 self.header_parser = header_parser 96 self.make_header_reader = make_header_reader 97 self.header_args = header_args 98 99 self.record_parser = record_parser 100 self.make_record_reader = make_record_reader 101 self.record_args = record_args 102 103 self.footer_parser = footer_parser 104 self.make_footer_reader = make_footer_reader 105 self.footer_args = footer_args 106 107 self.marker_tag = marker_tag
108
109 - def copy(self):
110 header_parser = self.header_parser 111 if header_parser is not None: 112 header_parser = header_parser.copy() 113 record_parser = self.record_parser.copy() 114 footer_parser = self.footer_parser 115 if footer_parser is not None: 116 footer_parser = footer_parser.copy() 117 118 return IterHeaderFooter( 119 header_parser, self.make_header_reader, self.header_args, 120 record_parser, self.make_record_reader, self.record_args, 121 footer_parser, self.make_footer_reader, self.footer_args, 122 self.marker_tag)
123
124 - def iterate(self, source, cont_handler = None):
125 """parse using the URL or file handle""" 126 source = saxutils.prepare_input_source(source) 127 file = source.getCharacterStream() or source.getByteStream() 128 return self.iterateFile(file, cont_handler)
129
130 - def iterateString(self, s, cont_handler = None):
131 return self.iterateFile(StringIO(s), cont_handler)
132
133 - def iterateFile(self, fileobj, cont_handler = None):
134 self.start_position = self.end_position = 0 135 if cont_handler is None: 136 import LAX 137 cont_handler = LAX.LAX() 138 self.record_parser.setContentHandler(cont_handler) 139 140 lookahead = "" 141 142 # By construction, we never need events from the header 143 # nor from the footer 144 if self.header_parser is not None: 145 reader = self.make_header_reader(fileobj, *self.header_args, 146 **{"lookahead": lookahead}) 147 try: 148 rec = reader.next() 149 except RecordReader.ReaderError: 150 raise Parser.ParserPositionException(self.start_position) 151 self.end_position = self.start_position + len(rec) 152 self.header_parser.parseString(rec) 153 self.start_position = self.end_position 154 fileobj, lookahead = reader.remainder() 155 156 reader = self.make_record_reader(fileobj, *self.record_args, 157 **{"lookahead": lookahead}) 158 159 if not self.footer_parser: 160 while 1: 161 try: 162 rec = reader.next() 163 except RecordReader.ReaderError: 164 raise Parser.ParserPositionException(self.start_position) 165 if rec is None: 166 break 167 self.end_position = self.start_position + len(rec) 168 try: 169 self.record_parser.parseString(rec) 170 except Parser.ParserPositionException, exc: 171 exc += self.start_position 172 raise 173 yield cont_handler 174 self.start_position = self.end_position 175 return 176 177 # This one is tedious 178 while 1: 179 try: 180 rec = reader.next() 181 except RecordReader.ReaderError: 182 # we may have stumbled into the footer 183 fileobj, lookahead = reader.remainder() 184 break 185 186 if not rec: 187 # maybe there's a footer left 188 fileobj, lookahead = reader.remainder() 189 break 190 191 try: 192 self.record_parser.parseString(rec) 193 except Parser.ParserException: 194 # we may have tried to parse the footer 195 fileobj, lookahead = reader.remainder() 196 lookahead = rec + lookahead 197 break 198 self.end_position = self.start_position + len(rec) 199 yield cont_handler 200 self.start_position = self.end_position 201 202 # Try to read the footer 203 reader = self.make_footer_reader(fileobj, *self.footer_args, 204 **{"lookahead": lookahead}) 205 try: 206 rec = reader.next() 207 except RecordReader.ReaderError: 208 raise Parser.ParserPositionException(self.start_position) 209 210 if rec is None: 211 # Could read any footer 212 raise Parser.ParserPositionException(self.start_position) 213 214 try: 215 self.footer_parser.parseString(rec) 216 except Parser.ParserPositionException, exc: 217 exc += self.start_position 218 raise 219 self.end_position = self.start_position + len(rec) 220 self.start_position = self.end_position 221 222 fileobj, lookahead = reader.remainder() 223 if lookahead or fileobj.read(1): 224 raise Parser.ParserIncompleteException(self.start_position)
225