1
2
3
4
5
6 """
7 This module provides code to work with html files from InterPro,
8 and code to access resources at InterPro over the WWW.
9 http://www.ebi.ac.uk/interpro/
10
11
12 Classes:
13 Record Holds interpro sequence data.
14 InterProParser Parses interpro sequence data into a Record object.
15
16 Functions:
17 get_interpro_entry
18
19 """
20
21 from Bio import File
22 import sgmllib
23 from Bio.SeqFeature import Reference
24
26
28 keys = self.keys()
29 keys.sort()
30 out = ''
31 for key in keys:
32 val = self[ key ]
33 if key == 'References':
34 out = out + '\n%s\n' % key
35 for reference in val:
36 out = out + '%s\n' % str( reference )
37 out = out + '\n'
38 elif key == 'Examples':
39 out = out + '\n%s\n' % key
40 for example in val:
41 out = out + '%s\n' % example
42 elif key == 'Abstract':
43 out = out + '\n%s\n' % key
44 out = out + '%s...\n' % val[ : 80 ]
45 elif type( self[ key ] ) == list:
46 out = out + '\n%s\n' % key
47 for item in val:
48 out = out + '%s\n' % item
49
50 else:
51 out = out + '%s: %s\n' % ( key, self[ key ] )
52 return out
53
55 """Parses InterPro sequence data into a Record object.
56
57 """
59 sgmllib.SGMLParser.reset( self )
60 self.text = ''
61 self.inter_pro_dict = Record()
62 self.inter_pro_dict[ 'Database' ] = ''
63 self.inter_pro_dict[ 'Accession' ] = ''
64 self.inter_pro_dict[ 'Name' ] = ''
65 self.inter_pro_dict[ 'Dates' ] = ''
66 self.inter_pro_dict[ 'Type' ] = ''
67 self.inter_pro_dict[ 'Parent' ] = ''
68 self.inter_pro_dict[ 'Process' ] = ''
69 self.inter_pro_dict[ 'Function' ] = ''
70 self.inter_pro_dict[ 'Component' ] = ''
71 self.inter_pro_dict[ 'Signatures' ] = []
72 self.inter_pro_dict[ 'Abstract' ] = ''
73 self.inter_pro_dict[ 'Examples' ] = []
74 self.inter_pro_dict[ 'References' ] = []
75 self.inter_pro_dict[ 'Database links' ] = []
76 self._state = 'title'
77 self._reference_state = ''
78 self._key_waiting = ''
79 self._current_reference = ''
80
85
86 - def feed(self, handle):
87 """feed(self, handle )
88
89 Feed in interpro data for scanning. handle is a file-like object
90 containing interpro data. consumer is a Consumer object that will
91 receive events as the ndb data is scanned.
92
93 """
94 if isinstance(handle, File.UndoHandle):
95 uhandle = handle
96 else:
97 uhandle = File.UndoHandle(handle)
98 text = ''
99 while 1:
100 line = uhandle.readline()
101 if not line:
102 break
103 line = line.strip()
104 if line[ -7: ] == '</HTML>':
105 break
106 text = text + ' ' + line
107
108 sgmllib.SGMLParser.feed( self, text )
109
110
112 newtext = newtext.strip()
113 self.text = self.text + newtext
114
116 dictionary = dict( attrs )
117 for key in dictionary:
118 val = dictionary[key]
119
122
124 self._state = 'chugging_along'
125
127 dictionary = dict( attrs )
128 if self._state == 'chugging_along':
129 if dictionary.has_key( 'class' ):
130 if dictionary[ 'class' ] == 'tag':
131 self._state = 'waiting_tag'
132 self._flush_text()
133 elif dictionary[ 'class' ] == 'inf':
134 self._state = 'waiting_inf'
135 self._flush_text()
136
138 if self._state == 'waiting_tag':
139 self._key_waiting = self._flush_text()
140 self._state = 'chugging_along'
141 elif self._state == 'waiting_inf':
142 key = self._key_waiting
143 if self.inter_pro_dict.has_key( key ):
144 val = self._flush_text()
145 if key == 'Signatures':
146 pass
147 elif key == 'Database links':
148 pass
149 else:
150 self.inter_pro_dict[ key ] = val
151 self._key_waiting = ''
152 self._state = 'chugging_along'
153
154
156 if self._key_waiting == 'Examples':
157 self._state = 'examples'
158 self._flush_text()
159
161 self._key_waiting = ''
162 self._state = 'chugging_along'
163
165 if self._key_waiting == 'References':
166 self._state = 'references'
167 self._reference_state = 'pubmed_id'
168 self._flush_text()
169 self._references = []
170
172 if self._state == 'references':
173 self._references.append( self._current_reference )
174 self.inter_pro_dict[ 'References' ] = self._references
175 self._state = 'chugging_along'
176
178 if self._state == 'references':
179 self._reference_state = 'pubmed_id'
180 self._flush_text()
181 if( self._current_reference != '' ):
182 self._references.append( self._current_reference )
183 self._current_reference = Reference()
184
189
191 dictionary = dict( attrs )
192 if self._state == 'references':
193 if self._reference_state == 'pubmed_id':
194 if dictionary.has_key( 'name' ):
195 self._current_reference.pubmed_id = dictionary[ 'name' ]
196 self._reference_state = 'authors'
197 elif self._reference_state == 'journal':
198 self._current_reference.journal = self._flush_text()
199 self._reference_state = 'medline_id'
200
213
214 - def do_br( self, attrs ):
215 if self._state == 'references':
216 if self._reference_state == 'authors':
217 self._current_reference.authors = self._flush_text()
218 self._reference_state = 'title'
219 elif self._key_waiting == 'Signatures':
220 self.inter_pro_dict[ 'Signatures' ].append( self._flush_text() )
221 elif self._key_waiting == 'Database links':
222 self.inter_pro_dict[ 'Database links' ].append( self._flush_text() )
223
226
228 if self._state == 'references':
229 if self._reference_state == 'title':
230 text = self._flush_text()
231 self._current_reference.title = text
232 self._reference_state = 'journal'
233
234
236 if self._state == 'references':
237 if tag == 'li':
238 self.stack.pop()
239 elif tag == 'a':
240 if self._reference_state == 'pubmed_id':
241 self.stack.pop()
242 method(attrs)
243
244
245 - def _flush_text( self ):
246 text = self.text.strip()
247 self.text = ''
248 return text[:]
249
251 import warnings
252 warnings.warn("pairlist_to_dict was deprecated. Please use dict() instead of pairlist_to_dict")
253 return dict(pairs)
254
256 """get specified interpro entry"""
257 import urllib
258 handle = urllib.urlopen("http://www.ebi.ac.uk/interpro/IEntry?ac=" + id )
259
260
261 return handle
262
263 if __name__ == '__main__':
264 import Bio.File
265 handle = open('IPR001064.htm')
266 undo_handle = Bio.File.UndoHandle( handle )
267 interpro_parser = InterProParser()
268 record = interpro_parser.parse( handle )
269 print str( record )
270