1
2
3
4
5
6 """Martel based parser to read ECell formatted files.
7
8 This is a huge regular regular expression for ECell, built using
9 the 'regular expressiona on steroids' capabilities of Martel.
10
11 http://www.bioinformatics.org/ecell2/
12 Notes:
13 Just so I remember -- the new end of line syntax is:
14 New regexp syntax - \R
15 \R means "\n|\r\n?"
16 [\R] means "[\n\r]"
17
18 This helps us have endlines be consistent across platforms.
19
20 """
21
22
23 import string
24
25
26 import Martel
27 from Martel import RecordReader
28 from Martel import Str
29 from Martel import AnyEol
30 from Martel import ToEol
31 from Martel import Group
32 from Martel import Alt
33 from Martel import Rep
34 from Martel import Rep1
35 from Martel import Any
36 from Martel import AnyBut
37 from Martel import Expression
38
39
40
41
42
43
44
45
46
47
48
49
50 excluded_chars = ' ' + chr( 0x09 ) + chr( 10 ) + chr( 13 )
51
52 block_type = Group( "block_type", Expression.NoCase( Str( "Type" ) ) )
53 header_line = Group( "header_line", \
54 block_type + ToEol())
55 tab = Group( "tab", Str( '\t' ) )
56 system_tag = Group( "system_tag", Expression.NoCase( Str( "system" ) ) )
57 reactor_tag = Group( "reactor_tag", Expression.NoCase( Str( "Reactor" ) ) )
58 substance_tag = Group( "substance_tag", Expression.NoCase( Str( "Substance" ) ) )
59 system_line = Group( "system_line", system_tag + ToEol() )
60 reactor_line = Group( "reactor_line", reactor_tag + ToEol() )
61 substance_line = Group( "substance_line", substance_tag + ToEol() )
62 continuation_line = Group( "continuation_line", tab + ToEol() )
63 include_line = Group( "include_line", Str( 'include' ) + ToEol())
64
65 substance_multiline = Group( "substance_multiline", \
66 substance_line +
67 Rep( continuation_line ) )
68
69 reactor_multiline = Group( "reactor_multiline", \
70 reactor_line +
71 Rep( continuation_line ) )
72
73 system_block = Group( "system_block", \
74 Rep1( system_line ) )
75 reactor_block = Group( "reactor_block", \
76 Rep1( reactor_multiline ) )
77 substance_block = Group( "substance_block", \
78 Rep1( substance_multiline ) )
79 valid_block = Group( "valid_block",
80 header_line +
81 Alt( system_block, reactor_block, substance_block ) )
82 valid_contents = Group( "valid_contents", Rep1( valid_block ) )
83 ecell_record = valid_contents
84