1
2
3
4
5 from Bio import Alphabet
6 from Bio.Alphabet import IUPAC
7 from Bio.Data import IUPACData
8
9 unambiguous_dna_by_name = {}
10 unambiguous_dna_by_id = {}
11 unambiguous_rna_by_name = {}
12 unambiguous_rna_by_id = {}
13 generic_by_name = {}
14 generic_by_id = {}
15 ambiguous_generic_by_name = {}
16 ambiguous_generic_by_id = {}
17
18
19 standard_dna_table = None
20 standard_rna_table = None
21
22
23
24
25
28
105
116
117
129
130
133
136
137
138
141 names = name.split("; ")
142
143 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons,
144 stop_codons)
145
146 rna_table = {}
147 generic_table = {}
148 for codon, val in table.items():
149 generic_table[codon] = val
150 codon = codon.replace("T", "U")
151 generic_table[codon] = val
152 rna_table[codon] = val
153 rna_start_codons = []
154 generic_start_codons = []
155 for codon in start_codons:
156 generic_start_codons.append(codon)
157 codon = codon.replace("T", "U")
158 generic_start_codons.append(codon)
159 rna_start_codons.append(codon)
160 rna_stop_codons = []
161 generic_stop_codons = []
162 for codon in stop_codons:
163 generic_stop_codons.append(codon)
164 codon = codon.replace("T", "U")
165 generic_stop_codons.append(codon)
166 rna_stop_codons.append(codon)
167
168 generic = NCBICodonTable(id, names + [alt_name], generic_table,
169 generic_start_codons, generic_stop_codons)
170 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table,
171 rna_start_codons, rna_stop_codons)
172
173 if id == 1:
174 global standard_dna_table, standard_rna_table
175 standard_dna_table = dna
176 standard_rna_table = rna
177
178 unambiguous_dna_by_id[id] = dna
179 unambiguous_rna_by_id[id] = rna
180 generic_by_id[id] = generic
181
182 if alt_name is not None:
183 names.append(alt_name)
184
185 for name in names:
186 unambiguous_dna_by_name[name] = dna
187 unambiguous_rna_by_name[name] = rna
188 generic_by_name[name] = generic
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264 register_ncbi_table(name = 'Standard',
265 alt_name = 'SGC0', id = 1,
266 table = {
267 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
268 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
269 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
270 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
271 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
272 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
273 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
274 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
275 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
276 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
277 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
278 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
279 'GGG': 'G', },
280 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
281 start_codons = [ 'TTG', 'CTG', 'ATG', ]
282 )
283 register_ncbi_table(name = 'Vertebrate Mitochondrial',
284 alt_name = 'SGC1', id = 2,
285 table = {
286 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
287 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
288 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
289 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
290 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
291 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
292 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
293 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
294 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V',
295 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A',
296 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E',
297 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
298 stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ],
299 start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ]
300 )
301 register_ncbi_table(name = 'Yeast Mitochondrial',
302 alt_name = 'SGC2', id = 3,
303 table = {
304 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
305 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
306 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T',
307 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P',
308 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
309 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
310 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
311 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
312 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
313 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
314 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
315 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
316 'GGA': 'G', 'GGG': 'G', },
317 stop_codons = [ 'TAA', 'TAG', ],
318 start_codons = [ 'ATG', ]
319 )
320 register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma',
321 alt_name = 'SGC3', id = 4,
322 table = {
323 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
324 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
325 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
326 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
327 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
328 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
329 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
330 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
331 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
332 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
333 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
334 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
335 'GGA': 'G', 'GGG': 'G', },
336 stop_codons = [ 'TAA', 'TAG', ],
337 start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC',
338 'ATA', 'ATG', 'GTG', ]
339 )
340 register_ncbi_table(name = 'Invertebrate Mitochondrial',
341 alt_name = 'SGC4', id = 5,
342 table = {
343 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
344 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
345 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
346 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
347 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
348 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
349 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
350 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
351 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
352 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
353 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
354 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
355 'GGA': 'G', 'GGG': 'G', },
356 stop_codons = [ 'TAA', 'TAG', ],
357 start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG',
358 'GTG', ]
359 )
360 register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear',
361 alt_name = 'SGC5', id = 6,
362 table = {
363 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
364 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
365 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W',
366 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
367 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
368 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
369 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
370 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
371 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
372 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
373 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
374 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
375 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
376 stop_codons = [ 'TGA', ],
377 start_codons = [ 'ATG', ]
378 )
379 register_ncbi_table(name = 'Echinoderm Mitochondrial',
380 alt_name = 'SGC8', id = 9,
381 table = {
382 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
383 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
384 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
385 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
386 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
387 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
388 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
389 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
390 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S',
391 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
392 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
393 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
394 'GGA': 'G', 'GGG': 'G', },
395 stop_codons = [ 'TAA', 'TAG', ],
396 start_codons = [ 'ATG', ]
397 )
398 register_ncbi_table(name = 'Euplotid Nuclear',
399 alt_name = 'SGC9', id = 10,
400 table = {
401 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
402 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
403 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L',
404 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
405 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
406 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
407 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
408 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
409 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
410 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
411 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
412 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
413 'GGA': 'G', 'GGG': 'G', },
414 stop_codons = [ 'TAA', 'TAG', ],
415 start_codons = [ 'ATG', ]
416 )
417 register_ncbi_table(name = 'Bacterial',
418 alt_name = None, id = 11,
419 table = {
420 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
421 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
422 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
423 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
424 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
425 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
426 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
427 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
428 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
429 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
430 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
431 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
432 'GGG': 'G', },
433 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
434 start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA',
435 'ATG', 'GTG', ]
436 )
437 register_ncbi_table(name = 'Alternative Yeast Nuclear',
438 alt_name = None, id = 12,
439 table = {
440 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
441 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
442 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L',
443 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P',
444 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q',
445 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I',
446 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T',
447 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K',
448 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R',
449 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A',
450 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D',
451 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G',
452 'GGG': 'G', },
453 stop_codons = [ 'TAA', 'TAG', 'TGA', ],
454 start_codons = [ 'CTG', 'ATG', ]
455 )
456 register_ncbi_table(name = 'Ascidian Mitochondrial',
457 alt_name = None, id = 13,
458 table = {
459 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
460 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
461 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L',
462 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
463 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
464 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
465 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T',
466 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
467 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G',
468 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
469 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
470 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
471 'GGA': 'G', 'GGG': 'G', },
472 stop_codons = [ 'TAA', 'TAG', ],
473 start_codons = [ 'ATG', ]
474 )
475 register_ncbi_table(name = 'Flatworm Mitochondrial',
476 alt_name = None, id = 14,
477 table = {
478 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
479 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
480 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W',
481 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P',
482 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H',
483 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R',
484 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M',
485 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N',
486 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S',
487 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V',
488 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A',
489 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G',
490 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', },
491 stop_codons = [ 'TAG', ],
492 start_codons = [ 'ATG', ]
493 )
494 register_ncbi_table(name = 'Blepharisma Macronuclear',
495 alt_name = None, id = 15,
496 table = {
497 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S',
498 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y',
499 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L',
500 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P',
501 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q',
502 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R',
503 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T',
504 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N',
505 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R',
506 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V',
507 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D',
508 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G',
509 'GGA': 'G', 'GGG': 'G', },
510 stop_codons = [ 'TAA', 'TGA', ],
511 start_codons = [ 'ATG', ]
512 )
513
514
515
517 - def __init__(self, codon_table,
518 ambiguous_nucleotide_alphabet,
519 ambiguous_nucleotide_values,
520 ambiguous_protein_alphabet,
521 ambiguous_protein_values):
522 CodonTable.__init__(self,
523 ambiguous_nucleotide_alphabet,
524 ambiguous_protein_alphabet,
525 AmbiguousForwardTable(codon_table.forward_table,
526 ambiguous_nucleotide_values,
527 ambiguous_protein_values),
528 codon_table.back_table,
529
530
531
532
533 list_ambiguous_codons(codon_table.start_codons, ambiguous_nucleotide_values),
534 list_ambiguous_codons(codon_table.stop_codons, ambiguous_nucleotide_values)
535 )
536 self._codon_table = codon_table
537
538
539
540
542 return getattr(self._codon_table, name)
543
545 c1, c2, c3 = codon
546 x1 = ambiguous_nucleotide_values[c1]
547 x2 = ambiguous_nucleotide_values[c2]
548 x3 = ambiguous_nucleotide_values[c3]
549 possible = {}
550 stops = []
551 for y1 in x1:
552 for y2 in x2:
553 for y3 in x3:
554 try:
555 possible[forward_table[y1+y2+y3]] = 1
556 except KeyError:
557
558 stops.append(y1+y2+y3)
559 if stops:
560 if possible.keys():
561 raise TranslationError("ambiguous codon '%s' codes " % codon \
562 + "for both proteins and stop codons")
563
564 raise KeyError(codon)
565 return possible.keys()
566
568 """Extends a codon list to include all possible ambigous codons.
569
570 e.g. ['TAG', 'TAA'] -> ['TAG', 'TAA', 'TAR']
571 ['UAG', 'UGA'] -> ['UAG', 'UGA', 'URA']
572
573 Note that ['TAG', 'TGA'] -> ['TAG', 'TGA'], this does not add 'TRR'.
574 Thus only two more codons are added in the following:
575
576 e.g. ['TGA', 'TAA', 'TAG'] -> ['TGA', 'TAA', 'TAG', 'TRA', 'TAR']
577
578 Returns a new (longer) list of codon strings.
579 """
580
581
582
583
584 c1_list = [letter for (letter, meanings) \
585 in ambiguous_nucleotide_values.iteritems() \
586 if set([codon[0] for codon in codons]).issuperset(set(meanings))]
587 c2_list = [letter for (letter, meanings) \
588 in ambiguous_nucleotide_values.iteritems() \
589 if set([codon[1] for codon in codons]).issuperset(set(meanings))]
590 c3_list = [letter for (letter, meanings) \
591 in ambiguous_nucleotide_values.iteritems() \
592 if set([codon[2] for codon in codons]).issuperset(set(meanings))]
593
594 candidates = []
595 for c1 in c1_list :
596 for c2 in c2_list :
597 for c3 in c3_list :
598 codon = c1+c2+c3
599 if codon not in candidates and codon not in codons :
600 candidates.append(codon)
601 answer = codons[:]
602
603 for ambig_codon in candidates :
604 wanted = True
605
606 for codon in [c1+c2+c3 \
607 for c1 in ambiguous_nucleotide_values[ambig_codon[0]] \
608 for c2 in ambiguous_nucleotide_values[ambig_codon[1]] \
609 for c3 in ambiguous_nucleotide_values[ambig_codon[2]]]:
610 if codon not in codons :
611
612 wanted=False
613
614 continue
615 if wanted :
616 answer.append(ambig_codon)
617 return answer
618
619 assert list_ambiguous_codons(['TGA', 'TAA'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TRA']
620 assert list_ambiguous_codons(['TAG', 'TGA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TGA']
621 assert list_ambiguous_codons(['TAG', 'TAA'],IUPACData.ambiguous_dna_values) == ['TAG', 'TAA', 'TAR']
622 assert list_ambiguous_codons(['UAG', 'UAA'],IUPACData.ambiguous_rna_values) == ['UAG', 'UAA', 'UAR']
623 assert list_ambiguous_codons(['TGA', 'TAA', 'TAG'],IUPACData.ambiguous_dna_values) == ['TGA', 'TAA', 'TAG', 'TAR', 'TRA']
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
641 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
642 self.forward_table = forward_table
643
644 self.ambiguous_nucleotide = ambiguous_nucleotide
645 self.ambiguous_protein = ambiguous_protein
646
647 inverted = {}
648 for name, val in ambiguous_protein.items():
649 for c in val:
650 x = inverted.get(c, {})
651 x[name] = 1
652 inverted[c] = x
653 for name, val in inverted.items():
654 inverted[name] = val.keys()
655 self._inverted = inverted
656
657 self._cache = {}
658
659 - def get(self, codon, failobj = None):
660 try:
661 return self.__getitem__(codon)
662 except KeyError:
663 return failobj
664
666 try:
667 x = self._cache[codon]
668 except KeyError:
669 pass
670 else:
671 if x is TranslationError:
672 raise TranslationError(codon)
673 if x is KeyError:
674 raise KeyError(codon)
675 return x
676 try:
677 x = self.forward_table[codon]
678 self._cache[codon] = x
679 return x
680 except KeyError:
681 pass
682
683
684
685 try:
686 possible = list_possible_proteins(codon,
687 self.forward_table,
688 self.ambiguous_nucleotide)
689 except KeyError:
690 self._cache[codon] = KeyError
691 raise KeyError(codon)
692 except TranslationError:
693 self._cache[codon] = TranslationError
694 raise TranslationError(codon)
695 assert len(possible) > 0, "unambiguous codons must code"
696
697
698 if len(possible) == 1:
699 self._cache[codon] = possible[0]
700 return possible[0]
701
702
703
704 ambiguous_possible = {}
705 for amino in possible:
706 for term in self._inverted[amino]:
707 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1
708
709 n = len(possible)
710 possible = []
711 for amino, val in ambiguous_possible.items():
712 if val == n:
713 possible.append(amino)
714
715
716 if len(possible) == 0:
717 self._cache[codon] = TranslationError
718 raise TranslationError(codon)
719
720
721
722
723 def _sort(x, y, table = self.ambiguous_protein):
724 a = cmp(len(table[x]), len(table[y]))
725 if a == 0:
726 return cmp(x, y)
727 return a
728 possible.sort(_sort)
729
730 x = possible[0]
731 self._cache[codon] = x
732 return x
733
734
735 ambiguous_dna_by_name = {}
736 for key, val in unambiguous_dna_by_name.items():
737 ambiguous_dna_by_name[key] = AmbiguousCodonTable(val,
738 IUPAC.ambiguous_dna,
739 IUPACData.ambiguous_dna_values,
740 IUPAC.extended_protein,
741 IUPACData.extended_protein_values)
742 ambiguous_dna_by_id = {}
743 for key, val in unambiguous_dna_by_id.items():
744 ambiguous_dna_by_id[key] = AmbiguousCodonTable(val,
745 IUPAC.ambiguous_dna,
746 IUPACData.ambiguous_dna_values,
747 IUPAC.extended_protein,
748 IUPACData.extended_protein_values)
749
750 ambiguous_rna_by_name = {}
751 for key, val in unambiguous_rna_by_name.items():
752 ambiguous_rna_by_name[key] = AmbiguousCodonTable(val,
753 IUPAC.ambiguous_rna,
754 IUPACData.ambiguous_rna_values,
755 IUPAC.extended_protein,
756 IUPACData.extended_protein_values)
757 ambiguous_rna_by_id = {}
758 for key, val in unambiguous_rna_by_id.items():
759 ambiguous_rna_by_id[key] = AmbiguousCodonTable(val,
760 IUPAC.ambiguous_rna,
761 IUPACData.ambiguous_rna_values,
762 IUPAC.extended_protein,
763 IUPACData.extended_protein_values)
764
765
766 _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems())
767 _merged_values["T"] = "U"
768
769 for key, val in generic_by_name.items():
770 ambiguous_generic_by_name[key] = AmbiguousCodonTable(val,
771 Alphabet.NucleotideAlphabet(),
772 _merged_values,
773 IUPAC.extended_protein,
774 IUPACData.extended_protein_values)
775
776 for key, val in generic_by_id.items():
777 ambiguous_generic_by_id[key] = AmbiguousCodonTable(val,
778 Alphabet.NucleotideAlphabet(),
779 _merged_values,
780 IUPAC.extended_protein,
781 IUPACData.extended_protein_values)
782 del _merged_values
783 del key, val
784
785
786 for n in ambiguous_generic_by_id.keys() :
787 assert ambiguous_rna_by_id[n].forward_table["GUU"] == "V"
788 assert ambiguous_rna_by_id[n].forward_table["GUN"] == "V"
789 assert ambiguous_rna_by_id[n].forward_table["UUN"] == "X"
790
791 if "UAA" in unambiguous_rna_by_id[n].stop_codons \
792 and "UGA" in unambiguous_rna_by_id[n].stop_codons :
793 try :
794 print ambiguous_dna_by_id[n].forward_table["TRA"]
795 assert False, "Should be a stop only"
796 except KeyError :
797 pass
798 assert "URA" in ambiguous_generic_by_id[n].stop_codons
799 assert "URA" in ambiguous_rna_by_id[n].stop_codons
800 assert "TRA" in ambiguous_generic_by_id[n].stop_codons
801 assert "TRA" in ambiguous_dna_by_id[n].stop_codons
802 del n
803 assert ambiguous_generic_by_id[1].stop_codons == ambiguous_generic_by_name["Standard"].stop_codons
804 assert ambiguous_generic_by_id[4].stop_codons == ambiguous_generic_by_name["SGC3"].stop_codons
805 assert ambiguous_generic_by_id[15].stop_codons == ambiguous_generic_by_name['Blepharisma Macronuclear'].stop_codons
806