Package Bio :: Package expressions :: Package embl :: Module embl65
[hide private]
[frames] | no frames]

Source Code for Module Bio.expressions.embl.embl65

  1  """Format from EMBL Nucleotide Sequence Database Release 65, December 2000 
  2   
  3  """ 
  4   
  5  import warnings 
  6  warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning) 
  7   
  8   
  9   
 10  import Martel 
 11  from Martel import RecordReader, Time 
 12  from Bio import Std 
 13   
 14  from Bio.expressions.swissprot import sprot38 
 15   
 16  whitespace = Martel.Spaces() 
 17   
 18  ## ID - identification             (begins each entry; 1 per entry) 
 19  # ID   entryname  dataclass; molecule; division; sequencelength BP. 
 20   
 21  divisions = Martel.Re("EST|PHG|FUN|GSS|HTC|HTG|HUM|INV|ORG|MAM|VRT|PLN|" + \ 
 22                        "PRO|ROD|SYN|STS|UNC|VRL|[A-Z]{3}") 
 23   
 24  # XXX is found in S40706 
 25  ID_line = Martel.Str("ID   ") + \ 
 26            Std.dbid(Martel.UntilSep("entry_name", " "), {"type": "primary", 
 27                                                          "dbname": "embl"}) + \ 
 28            whitespace + \ 
 29            Martel.ToSep("dataclass", ";") + \ 
 30            whitespace + \ 
 31            Martel.Group("molecule", 
 32                         Std.alphabet(Martel.Str("DNA", "circular DNA"), 
 33                                      {"alphabet": "iupac-ambiguous-dna"}) | 
 34                         Std.alphabet(Martel.Str("RNA", "circular RNA"), 
 35                                      {"alphabet": "iupac-ambiguous-rna"}) | 
 36                         Std.alphabet(Martel.Str("XXX"), 
 37                                      {"alphabet": "nucleotide"})) + \ 
 38            Martel.Str("; ") + \ 
 39            Martel.Group("division", divisions) + \ 
 40            Martel.Str("; ") + \ 
 41            Martel.Digits("length") + \ 
 42            Martel.Str(" BP.") + \ 
 43            Martel.AnyEol() 
 44   
 45   
 46  ## AC - accession number           (>=1 per entry) 
 47  accession = Std.dbid(Martel.UntilSep("accession", ";"), 
 48                       {"type": "accession", 
 49                        "dbname": "embl"}) + Martel.Str(";") 
 50  AC_line = Martel.Str("AC   ") + \ 
 51            accession + Martel.Rep(Martel.Str(" ") + accession) + \ 
 52            Martel.AnyEol() 
 53             
 54  AC_block = Martel.Rep1(AC_line) 
 55   
 56  ## SV - sequence version           (1 per entry) 
 57  SV_line = Martel.Str("SV   ") + \ 
 58            Martel.Group("sequence_version", 
 59                         Martel.ToSep("accession", ".") + \ 
 60                         Martel.Digits("version")) + \ 
 61            Martel.AnyEol() 
 62   
 63   
 64  ## DT - date                       (2 per entry) 
 65  date = Time.make_expression("%(day)-%(Jan)-%(year)") 
 66   
 67  DT_created_line = Martel.Str("DT   ") + \ 
 68                    Martel.Group("date_created", date) + \ 
 69                    Martel.Str(" (Rel. ") + \ 
 70                    Martel.Digits("release_created") + \ 
 71                    Martel.Str(", Created)") + \ 
 72                    Martel.AnyEol() 
 73   
 74  DT_updated_line = Martel.Str("DT   ") + \ 
 75                    Martel.Group("date_updated", date) + \ 
 76                    Martel.Str(" (Rel. ") + \ 
 77                    Martel.Digits("release_updated") + \ 
 78                    Martel.Str(", Last updated, Version ") + \ 
 79                    Martel.Digits("version_number") + \ 
 80                    Martel.Str(")") + \ 
 81                    Martel.AnyEol() 
 82   
 83  DT_block = DT_created_line + DT_updated_line 
 84   
 85  ## DE - description                (>=1 per entry) 
 86  DE_line = Martel.Str("DE   ") + \ 
 87            Std.description(Martel.UntilEol("description")) + \ 
 88            Martel.AnyEol() 
 89   
 90  DE_block = Std.description_block(Martel.Group("description_block", 
 91                                                Martel.Rep1(DE_line))) 
 92   
 93  ## KW - keyword                    (>=1 per entry) 
 94  KW_line = Martel.Str("KW   ") + \ 
 95            Martel.ToEol("keyword_data") 
 96  KW_block = Martel.Rep1(KW_line) 
 97   
 98  ## OS - organism species           (>=1 per entry) 
 99  OS_block = sprot38.OS_block 
100   
101  ## OC - organism classification    (>=1 per entry) 
102  OC_block = sprot38.OC_block 
103   
104  ## OG - organelle                  (0 or 1 per entry) 
105  OG_block = sprot38.OG_block 
106   
107  organism = Martel.Group("organism", 
108                          OS_block + \ 
109                          OC_block + \ 
110                          Martel.Opt(OG_block)) 
111   
112  ## RN - reference number           (>=1 per entry) 
113  ## RC - reference comment          (>=0 per entry) 
114  ## RP - reference positions        (>=1 per entry) 
115  ## RX - reference cross-reference  (>=0 per entry) 
116  ## RA - reference author(s)        (>=1 per entry) 
117  ## RT - reference title            (>=1 per entry) 
118  ## RL - reference location         (>=1 per entry) 
119  RN_line = sprot38.RN 
120  RC_block = sprot38.RC_block 
121  RP_line = sprot38.RP 
122   
123  RX_line = sprot38.RX 
124  RX_block = Martel.Group("RX_block", Martel.Rep1(RX_line)) 
125   
126  RA_block = sprot38.RA_block 
127  RT_block = sprot38.RT_block 
128  RL_block = sprot38.RL_block 
129   
130  reference = Martel.Group("reference", 
131                           RN_line + \ 
132                           Martel.Opt(RC_block) + \ 
133                           Martel.Opt(RP_line) + \ 
134                           Martel.Opt(RX_block) + \ 
135                           RA_block + \ 
136                           RT_block + \ 
137                           RL_block) 
138   
139  ## DR - database cross-reference   (>=0 per entry) 
140  DR_block = sprot38.DR_block 
141   
142  ## FH - feature table header       (0 or 2 per entry) 
143  FH_block = Martel.Str("FH   Key             Location/Qualifiers") + \ 
144             Martel.AnyEol() + \ 
145             Martel.Str("FH") + \ 
146             Martel.AnyEol() 
147   
148  ## FT - feature table data         (>=0 per entry) 
149  ##FT_line = Martel.Str("FT   ") + \ 
150  ##          Martel.ToEol("ft_data") 
151  ##FT_block = Martel.Rep1(FT_line) 
152   
153  fq_dbxref = Std.feature_qualifier_name(Martel.Str("db_xref")) + \ 
154              Martel.Str('=') + \ 
155              Std.feature_qualifier_description( 
156                  Martel.Str('"') + \ 
157                  Std.dbxref(Std.dbxref_dbname(Martel.UntilSep(None, ":")) + \ 
158                             Martel.Str(":") + \ 
159                             Std.dbxref_dbid(Martel.UntilSep(None, '"'))) + \ 
160                  Martel.Str('"')) + \ 
161              Martel.AnyEol() 
162                          
163   
164  fq_generic = \ 
165             Martel.Assert(Martel.Word() + Martel.Str("=")) + \ 
166             Std.feature_qualifier_name(Martel.Word()) + \ 
167             Martel.Str("=") + \ 
168             Std.feature_qualifier_description(Martel.UntilEol()) + \ 
169             Martel.AnyEol() + \ 
170             Martel.Rep( 
171                 Martel.Str("FT                   ") + \ 
172                 (Martel.AssertNot(Martel.Str("/")) | 
173                 Martel.AssertNot(Martel.Re(r"/\w+="))) + \ 
174             Std.feature_qualifier_description(Martel.UntilEol()) + \ 
175                 Martel.AnyEol()) 
176   
177  feature_qualifier = Std.feature_qualifier( 
178      Martel.Str("FT                   /") + \ 
179      (fq_dbxref | fq_generic)) 
180   
181  feature = Std.feature( 
182      Martel.Str("FT   ") + \ 
183      Std.feature_name(Martel.UntilSep(sep = " ")) + \ 
184      whitespace + \ 
185      Std.feature_location(Martel.UntilEol()) + \ 
186      Martel.AnyEol() + \ 
187      Martel.Rep(Martel.Str("FT                   ") + \ 
188                 Martel.AssertNot(Martel.Str("/")) + \ 
189                 Std.feature_location(Martel.UntilEol()) + \ 
190                 Martel.AnyEol() 
191                 ) + \ 
192      Martel.Rep(feature_qualifier) 
193      ) 
194       
195  FT_block = Std.feature_block(Martel.Rep(feature), 
196                               {"location-style": "genbank"}) 
197   
198                            
199       
200   
201  ## CC - comments or notes          (>=0 per entry) 
202  CC_line = Martel.Str("CC   ") + \ 
203            Martel.ToEol("comment") 
204  CC_block = Martel.Rep1(CC_line) 
205   
206  ## XX - spacer line                (many per entry) 
207  XX = Martel.Str("XX") + Martel.AnyEol() 
208   
209  ## SQ - sequence header            (1 per entry) 
210  SQ_line = Martel.Str("SQ   Sequence ") + \ 
211            Martel.Digits("num_BP") + \ 
212            Martel.Str(" BP; ") + \ 
213            Martel.Digits("num_A") + \ 
214            Martel.Str(" A; ") + \ 
215            Martel.Digits("num_C") + \ 
216            Martel.Str(" C; ") + \ 
217            Martel.Digits("num_G") + \ 
218            Martel.Str(" G; ") + \ 
219            Martel.Digits("num_T") + \ 
220            Martel.Str(" T; ") + \ 
221            Martel.Digits("num_other") + \ 
222            Martel.Str(" other;") + \ 
223            Martel.AnyEol() 
224   
225  ## bb - (blanks) sequence data     (>=1 per entry) 
226  SQ_data = Martel.Str("     ") + \ 
227            Std.sequence(Martel.Re(".{65}")) + \ 
228            whitespace + \ 
229            Martel.Digits("end_position") + \ 
230            Martel.AnyEol() 
231   
232  SQ_block = Std.sequence_block(SQ_line + Martel.Rep1(SQ_data)) 
233   
234  ## // - termination line           (ends each entry; 1 per entry) 
235  end = Martel.Str("//") + Martel.AnyEol() 
236   
237  record = Martel.Group("record", \ 
238                        ID_line + \ 
239                        Martel.Opt(XX) + \ 
240                        AC_block + \ 
241                        Martel.Opt(XX) + \ 
242                        SV_line + \ 
243                        Martel.Opt(XX) + \ 
244                        DT_block + \ 
245                        Martel.Opt(XX) + \ 
246                        DE_block + \ 
247                        Martel.Opt(XX) + \ 
248                        KW_block + \ 
249                        Martel.Opt(XX) + \ 
250                        Martel.Rep1(organism + Martel.Opt(XX)) + \ 
251                        Martel.Rep(reference + Martel.Opt(XX)) + \ 
252                        Martel.Opt(DR_block + \ 
253                                   Martel.Opt(XX)) + \ 
254                        Martel.Rep(CC_block + \ 
255                                   Martel.Opt(XX)) + \ 
256                        FH_block + \ 
257                        FT_block + \ 
258                        Martel.Opt(XX) + \ 
259                        SQ_block + \ 
260                        end, 
261                        {"format": "embl/65"}) 
262   
263  format_expression = Martel.Group("dataset", Martel.Rep1(record), 
264                                   {"format": "embl/65"}) 
265   
266  format = Martel.ParseRecords("dataset", {"format": "embl/65"}, 
267                               record, RecordReader.EndsWith, ("//\n",) ) 
268