Package Martel :: Package test :: Module test_Parser
[hide private]
[frames] | no frames]

Source Code for Module Martel.test.test_Parser

  1  import Martel 
  2  from Martel import RecordReader, Parser 
  3   
  4  from xml.sax import handler, saxutils 
  5  from StringIO import StringIO 
  6   
  7  # NOTE: Do not write formats like this, eg, with "(.|\n)*".  Those 
  8  # depend on the implementation using a RecordReader-like interface. 
  9  # Instead, you need to write them so they could be used even as one 
 10  # huge regexp. 
 11   
12 -def test_reader_parser():
13 record = Martel.Group("start", Martel.Rep(Martel.Str("abc"))) + \ 14 Martel.Group("end", Martel.Rep(Martel.Str("xyz"))) 15 parser = record.make_parser() 16 17 parser = Parser.Parser(parser.tagtable) 18 parser.setErrorHandler(handler.ErrorHandler()) 19 20 parser.parseString("abc" * 10 + "xyz") 21 22 try: 23 parser.parseString("abc" * 10 + "xyzQ") 24 except Parser.ParserPositionException: 25 pass 26 else: 27 raise AssertionError, "didn't get a position exception" 28 29 try: 30 parser.parseString("abc" * 10 + "x") 31 except Parser.ParserPositionException: 32 pass 33 else: 34 raise AssertionError, "didn't get a position exception"
35
36 -class CountErrors(handler.ErrorHandler):
37 - def __init__(self):
38 self.error_count = 0 39 self.fatal_error_count = 0
40 - def error(self, exception):
41 self.error_count = self.error_count + 1
42 - def fatalError(self, exception):
43 self.fatal_error_count = self.fatal_error_count + 1
44
45 -class CountRecords(handler.ContentHandler):
46 - def __init__(self, tag):
47 self.tag = tag 48 self.count = 0
49 - def startElement(self, tag, attrs):
50 if tag == self.tag: 51 self.count = self.count + 1
52
53 -def test_record_parser():
54 record = Martel.Group("A", Martel.Str("X\n") + Martel.Re("a*\n")) 55 p = record.make_parser() 56 57 parser = Parser.RecordParser("blah", {}, p.tagtable, (0, 1, {}), 58 RecordReader.StartsWith, ("X",)) 59 60 err = CountErrors() 61 parser.setErrorHandler(err) 62 count = CountRecords("A") 63 parser.setContentHandler(count) 64 65 parser.parseString("X\na\nX\nb\nX\naaa\nX\naaaa\nX\nq\nX\na\n") 66 67 assert err.fatal_error_count == 0, err.fatal_error_count 68 assert err.error_count == 2, err.error_count 69 assert count.count == 4, count.count
70
71 -def test_header_footer1():
72 s = """\ 73 header 74 XX 75 record 1 76 // 77 record 2 78 // 79 record 3 80 // 81 footer 82 """ 83 gold = """\ 84 <?xml version="1.0" encoding="iso-8859-1"?> 85 <hf><header>header 86 XX 87 </header><record>record 1 88 // 89 </record><record>record 2 90 // 91 </record><record>record 3 92 // 93 </record><footer>footer 94 </footer></hf>""" 95 96 debug_level = 1 97 98 # Don't use regexps like these in your code - for testing only! 99 header = Martel.Group("header", Martel.Re(r"header(.|\n)*")) 100 record = Martel.Group("record", Martel.Re(r"rec(.|\n)*")) 101 footer = Martel.Group("footer", Martel.Re(r"footer(.|\n)*")) 102 103 header = header.make_parser(debug_level = debug_level) 104 record = record.make_parser(debug_level = debug_level) 105 footer = footer.make_parser(debug_level = debug_level) 106 107 hf = Parser.HeaderFooterParser( 108 "hf", {}, 109 RecordReader.EndsWith, ("XX\n", ), header.tagtable, 110 RecordReader.EndsWith, ("//\n", ), record.tagtable, 111 RecordReader.StartsWith, ("f", ), footer.tagtable, 112 (0, debug_level, {})) 113 114 outfile = StringIO() 115 hf.setContentHandler(saxutils.XMLGenerator(outfile)) 116 hf.setErrorHandler(handler.ErrorHandler()) 117 hf.parseFile(StringIO(s)) 118 119 result = outfile.getvalue() 120 assert result == gold, (result, gold)
121 122
123 -def test_header_footer2():
124 # Have a header but no footer 125 s = """ 126 This is some misc. header text 127 that goes on until the end. 128 ID 1 129 This is some data 130 ID 2 131 This is some more data 132 """ 133 gold = """\ 134 <?xml version="1.0" encoding="iso-8859-1"?> 135 <hf><header> 136 This is some misc. header text 137 that goes on until the end. 138 </header><record>ID 1 139 This is some data 140 </record><record>ID 2 141 This is some more data 142 </record></hf>""" 143 144 # Don't use a regexp like this in your code - for testing only! 145 header = Martel.Group("header", Martel.Re(r"(.|\n)*")) 146 record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) 147 148 header = header.make_parser() 149 record = record.make_parser() 150 151 hf = Parser.HeaderFooterParser( 152 "hf", {}, 153 RecordReader.Until, ("ID", ), header.tagtable, 154 RecordReader.StartsWith, ("ID", ), record.tagtable, 155 RecordReader.Nothing, (), (), 156 (0, 1, {})) 157 158 outfile = StringIO() 159 hf.setContentHandler(saxutils.XMLGenerator(outfile)) 160 hf.setErrorHandler(handler.ErrorHandler()) 161 hf.parseFile(StringIO(s)) 162 163 text = outfile.getvalue() 164 assert text == gold, (text, gold)
165
166 -def test_header_footer3():
167 # Have a footer but no header 168 s = """\ 169 ID 1 170 This is some data 171 // 172 ID 2 173 This is some more data 174 // 175 Okay, that was all of the data. 176 """ 177 gold = """\ 178 <?xml version="1.0" encoding="iso-8859-1"?> 179 <hf><record>ID 1 180 This is some data 181 // 182 </record><record>ID 2 183 This is some more data 184 // 185 </record><footer>Okay, that was all of the data. 186 </footer></hf>""" 187 188 # Don't use a regexp like this in your code - for testing only! 189 record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) 190 # Require at least 5 characters (just to be safe) 191 footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*")) 192 193 record = record.make_parser() 194 footer = footer.make_parser() 195 196 hf = Parser.HeaderFooterParser( 197 "hf", {}, 198 RecordReader.Nothing, (), (), 199 RecordReader.EndsWith, ("//\n", ), record.tagtable, 200 RecordReader.Everything, (), footer.tagtable, 201 (0, 1, {})) 202 203 outfile = StringIO() 204 hf.setContentHandler(saxutils.XMLGenerator(outfile)) 205 hf.setErrorHandler(handler.ErrorHandler()) 206 hf.parseFile(StringIO(s)) 207 208 text = outfile.getvalue() 209 assert text == gold, (text, gold)
210
211 -def test_header_footer4():
212 # Have a header but no footer - and not footer reader 213 s = """ 214 This is some misc. header text 215 that goes on until the end. 216 ID 1 217 This is some data 218 ID 2 219 This is some more data 220 """ 221 gold = """\ 222 <?xml version="1.0" encoding="iso-8859-1"?> 223 <hf><header> 224 This is some misc. header text 225 that goes on until the end. 226 </header><record>ID 1 227 This is some data 228 </record><record>ID 2 229 This is some more data 230 </record></hf>""" 231 232 # Don't use a regexp like this in your code - for testing only! 233 header = Martel.Group("header", Martel.Re(r"(.|\n)*")) 234 record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) 235 236 header = header.make_parser() 237 record = record.make_parser() 238 239 hf = Parser.HeaderFooterParser( 240 "hf", {}, 241 RecordReader.Until, ("ID", ), header.tagtable, 242 RecordReader.StartsWith, ("ID", ), record.tagtable, 243 None, (), (), 244 (0, 1, {})) 245 outfile = StringIO() 246 hf.setContentHandler(saxutils.XMLGenerator(outfile)) 247 hf.setErrorHandler(handler.ErrorHandler()) 248 hf.parseFile(StringIO(s)) 249 250 text = outfile.getvalue() 251 assert text == gold, (text, gold)
252
253 -def test_header_footer5():
254 # Make sure I can skip records when there are not footer records 255 s = """ 256 This is some misc. header text 257 that goes on until the end. 258 ID 1 259 This is some data 260 ID A 261 This is some more data 262 ID 3 263 This is again some more data 264 ID Q 265 This blah 266 ID W 267 QWE 268 ID 987 269 To be 270 ID 897 271 Or not to be 272 """ 273 header = Martel.Group("header", Martel.Re(r"(.|\n)*")) 274 record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) 275 276 header = header.make_parser() 277 record = record.make_parser() 278 279 hf = Parser.HeaderFooterParser( 280 "hf", {}, 281 RecordReader.Until, ("ID", ), header.tagtable, 282 RecordReader.StartsWith, ("ID", ), record.tagtable, 283 None, (), (), 284 (0, 1, {})) 285 count = CountRecords("record") 286 hf.setContentHandler(count) 287 err = CountErrors() 288 hf.setErrorHandler(err) 289 hf.parseFile(StringIO(s)) 290 291 assert err.error_count == 3, err.error_count 292 assert err.fatal_error_count == 0, err.fatal_error_count 293 assert count.count == 4, count.count
294
295 -def test_header_footer6():
296 # Make sure I can skip records when there are footer records 297 s = """ 298 This is some misc. header text 299 that goes on until the end. 300 ID 1 301 This is some data 302 // 303 ID A 304 This is some more data 305 // 306 ID 3 307 This is again some more data 308 // 309 ID Q 310 This blah 311 // 312 ID W 313 QWE 314 // 315 ID 987 316 To be 317 // 318 ID 897 319 Or not to be 320 // 321 FOOTER 322 """ 323 header = Martel.Group("header", Martel.Re(r"(.|\n)*")) 324 record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) 325 footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) 326 327 header = header.make_parser() 328 record = record.make_parser() 329 footer = footer.make_parser() 330 331 hf = Parser.HeaderFooterParser( 332 "hf", {}, 333 RecordReader.Until, ("ID", ), header.tagtable, 334 RecordReader.EndsWith, ("//", ), record.tagtable, 335 RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, 336 (0, 1, {})) 337 count = CountRecords("record") 338 hf.setContentHandler(count) 339 err = CountErrors() 340 hf.setErrorHandler(err) 341 hf.parseFile(StringIO(s)) 342 343 assert err.error_count == 3, err.error_count 344 assert err.fatal_error_count == 0, err.fatal_error_count 345 assert count.count == 4, count.count
346
347 -def test_header_footer7():
348 # header and footer but with no record data 349 s = """\ 350 This is some misc. header text 351 that goes on until the end. 352 FOOTER 353 """ 354 header = Martel.Group("header", Martel.Re(r"(.|\n)*")) 355 record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*")) 356 footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*")) 357 358 header = header.make_parser() 359 record = record.make_parser() 360 footer = footer.make_parser() 361 362 hf = Parser.HeaderFooterParser( 363 "hf", {}, 364 RecordReader.CountLines, (2, ), header.tagtable, 365 RecordReader.EndsWith, ("//", ), record.tagtable, 366 RecordReader.StartsWith, ("FOOTER", ), footer.tagtable, 367 (0, 1, {})) 368 count = CountRecords("record") 369 hf.setContentHandler(count) 370 err = CountErrors() 371 hf.setErrorHandler(err) 372 hf.parseFile(StringIO(s)) 373 374 assert err.error_count == 0, err.error_count 375 assert err.fatal_error_count == 0, err.fatal_error_count 376 assert count.count == 0, count.count
377
378 -def test_header_footer8():
379 # header, record and footer, but with extra data 380 s1 = """Two lines in 381 the header. 382 Data 1 383 Data 2 384 Data Q 385 Data 4 386 FOOTER Abc 387 FOOTER B 388 """ 389 s2 = """Two lines in 390 the header. 391 Data 1 392 Data 2 393 Data Q 394 Data 4 395 FOOTER Abc 396 """ 397 s3 = """Two lines in 398 the header. 399 Data 1 400 Data 4 401 FOOTER Abc 402 """ 403 s4 = """Two lines in 404 the header. 405 Data Q 406 FOOTER Abc 407 """ 408 s5 = """Two lines in 409 the header. 410 FOOTER Abc 411 """ 412 dataset = ( (s1, 3, 1, 1), 413 (s2, 3, 1, 0), 414 (s3, 2, 0, 0), 415 (s4, 0, 1, 0), 416 (s5, 0, 0, 0), 417 ) 418 419 header = Martel.Group("header", Martel.Re(r"(.|\n)*")) 420 record = Martel.Group("record", Martel.Re(r"Data \d+\n")) 421 footer = Martel.Group("footer", Martel.Re("FOOTER \w+\n")) 422 423 header = header.make_parser() 424 record = record.make_parser() 425 footer = footer.make_parser() 426 427 hf = Parser.HeaderFooterParser( 428 "hf", {}, 429 RecordReader.CountLines, (2, ), header.tagtable, 430 RecordReader.CountLines, (1, ), record.tagtable, 431 RecordReader.CountLines, (1, ), footer.tagtable, 432 (0, 1, {})) 433 for s, rec_count, err_count, fatal_count in dataset: 434 count = CountRecords("record") 435 hf.setContentHandler(count) 436 err = CountErrors() 437 hf.setErrorHandler(err) 438 hf.parseFile(StringIO(s)) 439 440 assert err.error_count == err_count, (s, err.error_count, err_count) 441 assert err.fatal_error_count == fatal_count, \ 442 (s, err.fatal_error_count, fatal_count) 443 assert count.count == rec_count, (s, count.count, rec_count)
444
445 -def test():
446 test_reader_parser() 447 test_record_parser() 448 test_header_footer1() 449 test_header_footer2() 450 test_header_footer3() 451 test_header_footer4() 452 test_header_footer5() 453 test_header_footer7() 454 test_header_footer8()
455 456 if __name__ == "__main__": 457 test() 458