1
2
3
4
5
6 import warnings
7 warnings.warn("Bio.ECell was deprecated, as it does not seem to have any users. If you do use this module, please contact the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module")
8
9
10
11 import sys
12 import string
13 import copy
14 import array
15 import os
16 import re
17 import sgmllib
18 import urlparse
19
20
21 from xml.sax import handler
22
23
24 import Martel
25 from Martel import RecordReader
26
27 from Bio.ParserSupport import EventGenerator
28 from Bio.ParserSupport import AbstractConsumer
29 from Bio import File
30 from Bio.Align.Generic import Alignment
31 import Bio.Alphabet
32 import ecell_format
33 import Record
34
35 """
36 Ecell converts the ECell input from spreadsheet format to an intermediate format, described in
37 http://www.e-cell.org/manual/chapter2E.html#3.2. It provides an alternative to the perl script
38 supplied with the Ecell2 distribution at http://bioinformatics.org/project/?group_id=49.
39
40 ECell expects a spreadsheet exported in delimited text format. The file should be read with
41 FilteredReader using the default filter chain to remove extraneous characters.
42 """
43
49
51
52 """
53 message - description of error
54 """
55
58
59
60
62 """Iterator interface to move over a file of ecell entries one at a time.
63 """
64 - def __init__(self, handle, parser = None):
65 """Initialize the iterator.
66
67 Arguments:
68 o handle - A handle with ECell entries to iterate through.
69 o parser - An optional parser to pass the entries through before
70 returning them. If None, then the raw entry will be returned.
71 """
72 self.handle = File.UndoHandle( handle )
73 self._reader = RecordReader.Everything( self.handle )
74 self._parser = parser
75
77 """Return the next ecell record from the handle.
78
79 Will return None if we ran out of records.
80 """
81 data = self._reader.next()
82
83 if self._parser is not None:
84 if data:
85 dumpfile = open( 'dump', 'w' )
86 dumpfile.write( data )
87 dumpfile.close()
88 return self._parser.parse(File.StringHandle(data))
89
90 return data
91
93 return iter(self.next, None)
94
95
96
98 """Start up Martel to do the scanning of the file.
99
100 This initialzes the Martel based parser and connects it to a handler
101 that will generate events for a Feature Consumer.
102 """
104 """Initialize the scanner by setting up our caches.
105
106 Creating the parser takes a long time, so we want to cache it
107 to reduce parsing time.
108
109 Arguments:
110 o debug - The level of debugging that the parser should
111 display. Level 0 is no debugging, Level 2 displays the most
112 debugging info (but is much slower). See Martel documentation
113 for more info on this.
114 """
115
116
117 self.interest_tags = [ 'header_line', 'system_line', 'substance_multiline', \
118 'reactor_multiline', 'include_line' ]
119
120
121 expression = Martel.select_names( ecell_format.ecell_record, self.interest_tags)
122 self._parser = expression.make_parser(debug_level = debug)
123
124 - def feed(self, handle, consumer):
125 """Feed a set of data into the scanner.
126
127 Arguments:
128 o handle - A handle with the information to parse.
129 o consumer - The consumer that should be informed of events.
130 """
131 self._parser.setContentHandler( EventGenerator(consumer,
132 self.interest_tags))
133
134
135 self._parser.parseFile(handle)
136
138 """Create an ECell Record object from scanner generated information.
139 """
145
147 self.data.include_buf = self.data.include_buf + line
148
158
159
168
169
173
175 lines = multiline.splitlines()
176 line_no = 0
177 for line in lines:
178 line_dict = self._make_line_dict( line )
179 try:
180 if( not _is_valid_substance( line_dict ) ):
181 raise ECellError( "quantity and concentration are mutually exclusive" )
182 except ECellError, e:
183 print sys.stderr, e.message
184
185 qty = Record.get_entry( line_dict, 'qty' )
186 conc = Record.get_entry( line_dict, 'conc' )
187 if( ( qty.lower() != 'fix' ) and ( conc.lower() != 'fix' ) ):
188 self.data.num_substances = self.data.num_substances + 1
189 else:
190 line_no = line_no + 1
191 if( line.lower().startswith( 'substance' ) ):
192 _set_defaults( line_dict )
193 self._convert_conc( line_dict )
194
195 self._build_substance_entry( line_dict, line_no )
196
200
202 lines = multiline.splitlines()
203 for line in lines:
204 line_dict = self._make_line_dict( line )
205 if( line.lower().startswith( 'reactor' ) ):
206 if( not self._check_missing_header( line_dict ) ):
207 raise ECellError( "invalid header" )
208 try:
209 if( not is_only_digits( line_dict[ 's_coeff' ] ) ):
210 raise ECellError( 's_coeff must contain only digits' )
211 if( not is_only_digits( line_dict[ 'p_coeff' ] ) ):
212 raise ECellError( 'p_coeff must contain only digits' )
213 except KeyError:
214 pass
215 if( line.lower().startswith( 'reactor' ) ):
216 _set_reactor_defaults( line_dict )
217 line_dict = self._remove_if_inconsistent( line_dict )
218
219 if( line_dict.has_key( 'class' ) ):
220 self.data.num_reactors = self.data.num_reactors + 1
221 num_substrates = 0
222 num_products = 0
223 num_catalysts = 0
224 num_effectors = 0
225 num_options = 0
226 num_args = 0
227 if( line_dict.has_key( 's_id' ) ): num_substrates = num_substrates + 1
228 if( line_dict.has_key( 'p_id' ) ): num_products = num_products + 1
229 if( line_dict.has_key( 'c_id' ) ): num_catalysts = num_catalysts + 1
230 if( line_dict.has_key( 'e_id' ) ): num_effectors = num_effectors + 1
231 if( line_dict.has_key( 'o_type' ) ): num_options = num_options + 1
232 if( line_dict.has_key( 'arg_tag' ) ): num_args = num_args + 1
233 counter_dict = { \
234 's_' : num_substrates, \
235 'p_' : num_products, \
236 'c_' : num_catalysts, \
237 'e_' : num_effectors, \
238 'o_' : num_options, \
239 'arg_tag' : num_args
240 }
241 self._set_max( counter_dict )
242 self._build_reactor_entry( line_dict, counter_dict )
243
244
246 num_reactors = self.data.num_reactors
247 for key in counter_dict.keys():
248 composite_key = key + str( num_reactors )
249 self.data._max_dict[ composite_key ] = counter_dict[ key ]
250
251 - def _build_system_entry( self, line_dict ):
252 for key in line_dict.keys():
253 item = line_dict[ key ]
254 composite_key = 'system' + str( self.data.num_systems ) + key + '0'
255
256 if( not self.data.cell_dict.has_key( composite_key ) ):
257 self.data.cell_dict[ composite_key ] = item
258
259 - def _build_substance_entry( self, line_dict, line_no ):
260 for key in line_dict.keys():
261 item = line_dict[ key ]
262 composite_key = 'substance' + str( self.data.num_substances ) + key + \
263 str( line_no )
264 if( not self.data.cell_dict.has_key( composite_key ) ):
265 self.data.cell_dict[ composite_key ] = item
266
268 if( line_dict.has_key( 'conc' ) ):
269 if( not line_dict.has_key( 'qty' ) ):
270 contents = 'QTY(%s,%s)' % ( line_dict[ 'conc' ], line_dict[ 'path' ] )
271 composite_key = 'substance' + str( self.data.num_substances ) + 'qty' + '0'
272 self.data.cell_dict[ composite_key ] = contents
273 self.data.contains_concentration = 1
274
275 - def _build_reactor_entry( self, line_dict, counter_dict ):
276 for key in line_dict.keys():
277 item = line_dict[ key ]
278 prefix = key[ :2 ]
279 if( key.startswith( 'arg_' ) ):
280 index = counter_dict[ 'arg_tag' ]
281 elif( counter_dict.has_key( prefix ) ):
282 index = counter_dict[ prefix ]
283 else:
284 index = '0'
285 composite_key = 'reactor' + str( self.data.num_reactors ) + str( key ) + str( index )
286 if( not self.data.cell_dict.has_key( composite_key ) ):
287 self.data.cell_dict[ composite_key ] = item
288
289
291 ok = 1
292 items = [ 'id', 'path', 'class' ]
293 for item in items:
294 if( line_dict.has_key( item ) == 0 ):
295 others = copy.deepcopy( items )
296 others.remove( item )
297 for other in others:
298 if( line_dict.has_key( other ) ):
299 if( item.lower() != 'class' ):
300 ok = 0
301 break
302 return ok
303
305 valid_keys = list_dict.keys()
306 for label in [ 'id', 'path', 'type' ]:
307 for prefix in [ 's_', 'p_', 'c_', 'e_' ]:
308 node = prefix + label
309 valid_keys = self._consistency_filter( prefix, node, valid_keys )
310 for key in list_dict.keys():
311 if( not key in valid_keys ):
312 del list_dict[ key ]
313 return list_dict
314
316 block = []
317 for suffix in [ 'id', 'path', 'coeff', 'type' ]:
318 node = prefix + suffix
319 block.append( node )
320 for node in block:
321 if( ( not tag in valid_keys ) and ( node in valid_keys ) ):
322 if( ( prefix == 'o_' ) or ( not tag.endswith( 'type' ) ) ):
323 valid_keys.remove( node )
324 return valid_keys
325
327 line_dict = {}
328 items = line.split( '\t' )
329 num = 0
330 for item in items:
331 item = item.strip()
332 if( item != '' ):
333 line_dict[ self._header[ num ] ] = item
334 num = num + 1
335 return line_dict
336
341
343 ok = 1
344 if( line_dict.has_key( 'qty' ) and line_dict.has_key( 'conc' ) ):
345 if( not ( line_dict[ 'qty' ] == 'QTY' ) ):
346 ok = 0
347 return ok
348
350 ok = 1
351 text = line.strip()
352 if( text != '' ):
353 if( not text.isdigit() ):
354 ok = 0
355 return ok
356
358 line_dict = _set_defaults( line_dict )
359 for item in [ 's_', 'p_', 'c_', 'e_' ]:
360 id = item + 'id'
361 coeff = item + 'coeff'
362 path = item + 'path'
363 if( line_dict.has_key( id ) ):
364 if( not line_dict.has_key( coeff ) ):
365 line_dict[ coeff ] = 1
366 if( not line_dict.has_key( path ) ):
367 line_dict[ path ] = line_dict[ 'path' ]
368
369 return( line_dict )
370
372 if( not line_dict.has_key( 'name' ) ):
373 line_dict[ 'name' ] = line_dict[ 'id' ]
374 if( line_dict.has_key( 'arg_tag' ) ):
375 if( not line_dict.has_key( 'arg_coeff' ) ):
376 line_dict[ 'arg_coeff' ] = 0
377
378 return( line_dict )
379
380
381
382
383
384
385
387 """Parse ECell files into Record objects
388 """
390 """Initialize the parser.
391
392 Arguments:
393 o debug_level - An optional argument that specifies the amount of
394 debugging information Martel should spit out. By default we have
395 no debugging info (the fastest way to do things), but if you want
396 you can set this as high as two and see exactly where a parse fails.
397 """
398 self._scanner = _Scanner(debug_level)
399
400 - def parse(self, handle):
401 """Parse the specified handle into an ECell record.
402 """
403 self._consumer = _RecordConsumer()
404 self._scanner.feed(handle, self._consumer)
405 return self._consumer.data
406