Package Bio :: Package Data :: Module CodonTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.CodonTable

  1  import string 
  2  from Bio import Alphabet 
  3  from Bio.Alphabet import IUPAC 
  4  from Bio.Data import IUPACData 
  5   
  6  unambiguous_dna_by_name = {} 
  7  unambiguous_dna_by_id = {} 
  8  unambiguous_rna_by_name = {} 
  9  unambiguous_rna_by_id = {} 
 10  generic_by_name = {} # unambiguous DNA or RNA 
 11  generic_by_id = {} # unambiguous DNA or RNA 
 12  ambiguous_generic_by_name = {} # ambiguous DNA or RNA 
 13  ambiguous_generic_by_id = {} # ambiguous DNA or RNA  
 14   
 15  # standard IUPAC unambiguous codons 
 16  standard_dna_table = None 
 17  standard_rna_table = None 
 18   
 19  # In the future, the back_table could return a statistically 
 20  # appropriate distribution of codons, so do not cache the results of 
 21  # back_table lookups! 
 22   
23 -class TranslationError(Exception):
24 pass
25
26 -class CodonTable:
27 nucleotide_alphabet = Alphabet.generic_nucleotide 28 protein_alphabet = Alphabet.generic_protein 29 30 forward_table = {} # only includes codons which actually code 31 back_table = {} # for back translations 32 start_codons = [] 33 stop_codons = [] 34 # Not always called from derived classes!
35 - def __init__(self, nucleotide_alphabet = nucleotide_alphabet, 36 protein_alphabet = protein_alphabet, 37 forward_table = forward_table, back_table = back_table, 38 start_codons = start_codons, stop_codons = stop_codons):
45
46 - def __str__(self) :
47 """Returns a simple text representation of the codon table 48 49 e.g. 50 >>> import Bio.Data.CodonTable 51 >>> print Bio.Data.CodonTable.standard_dna_table 52 >>> print Bio.Data.CodonTable.generic_by_id[1]""" 53 54 if self.id : 55 answer = "Table %i" % self.id 56 else : 57 answer = "Table ID unknown" 58 if self.names : 59 answer += " " + ", ".join(filter(None, self.names)) 60 61 #Use the main four letters (and the conventional ordering) 62 #even for ambiguous tables 63 letters = self.nucleotide_alphabet.letters 64 if isinstance(self.nucleotide_alphabet, Alphabet.DNAAlphabet) \ 65 or (letters is not None and "T" in letters) : 66 letters = "TCAG" 67 else : 68 #Should be either RNA or generic nucleotides, 69 #e.g. Bio.Data.CodonTable.generic_by_id[1] 70 letters = "UCAG" 71 72 #Build the table... 73 answer=answer + "\n\n |" + "|".join( \ 74 [" %s " % c2 for c2 in letters] \ 75 ) + "|" 76 answer=answer + "\n--+" \ 77 + "+".join(["---------" for c2 in letters]) + "+--" 78 for c1 in letters : 79 for c3 in letters : 80 line = c1 + " |" 81 for c2 in letters : 82 codon = c1+c2+c3 83 line = line + " %s" % codon 84 if codon in self.stop_codons : 85 line = line + " Stop|" 86 else : 87 try : 88 amino = self.forward_table[codon] 89 except KeyError : 90 amino = "?" 91 except TranslationError : 92 amino = "?" 93 if codon in self.start_codons : 94 line = line + " %s(s)|" % amino 95 else : 96 line = line + " %s |" % amino 97 line = line + " " + c3 98 answer = answer + "\n"+ line 99 answer=answer + "\n--+" \ 100 + "+".join(["---------" for c2 in letters]) + "+--" 101 return answer
102
103 -def make_back_table(table, default_stop_codon):
104 # ONLY RETURNS A SINGLE CODON 105 # Do the sort so changes in the hash implementation won't affect 106 # the result when one amino acid is coded by more than one codon. 107 back_table = {} 108 keys = table.keys() ; keys.sort() 109 for key in keys: 110 back_table[table[key]] = key 111 back_table[None] = default_stop_codon 112 return back_table
113 114
115 -class NCBICodonTable(CodonTable):
116 nucleotide_alphabet = Alphabet.generic_nucleotide 117 protein_alphabet = IUPAC.protein 118
119 - def __init__(self, id, names, table, start_codons, stop_codons):
120 self.id = id 121 self.names = names 122 self.forward_table = table 123 self.back_table = make_back_table(table, stop_codons[0]) 124 self.start_codons = start_codons 125 self.stop_codons = stop_codons
126 127
128 -class NCBICodonTableDNA(NCBICodonTable):
129 nucleotide_alphabet = IUPAC.unambiguous_dna
130
131 -class NCBICodonTableRNA(NCBICodonTable):
132 nucleotide_alphabet = IUPAC.unambiguous_rna
133 134 135
136 -def register_ncbi_table(name, alt_name, id, 137 table, start_codons, stop_codons):
138 names = string.split(name, "; ") 139 140 dna = NCBICodonTableDNA(id, names + [alt_name], table, start_codons, 141 stop_codons) 142 # replace all T's with U's for the RNA tables 143 rna_table = {} 144 generic_table = {} 145 for codon, val in table.items(): 146 generic_table[codon] = val 147 codon = codon.replace("T", "U") 148 generic_table[codon] = val 149 rna_table[codon] = val 150 rna_start_codons = [] 151 generic_start_codons = [] 152 for codon in start_codons: 153 generic_start_codons.append(codon) 154 codon = codon.replace("T", "U") 155 generic_start_codons.append(codon) 156 rna_start_codons.append(codon) 157 rna_stop_codons = [] 158 generic_stop_codons = [] 159 for codon in stop_codons: 160 generic_stop_codons.append(codon) 161 codon = codon.replace("T", "U") 162 generic_stop_codons.append(codon) 163 rna_stop_codons.append(codon) 164 165 generic = NCBICodonTable(id, names + [alt_name], generic_table, 166 generic_start_codons, generic_stop_codons) 167 rna = NCBICodonTableRNA(id, names + [alt_name], rna_table, 168 rna_start_codons, rna_stop_codons) 169 170 if id == 1: 171 global standard_dna_table, standard_rna_table 172 standard_dna_table = dna 173 standard_rna_table = rna 174 175 unambiguous_dna_by_id[id] = dna 176 unambiguous_rna_by_id[id] = rna 177 generic_by_id[id] = generic 178 179 if alt_name is not None: 180 names.append(alt_name) 181 182 for name in names: 183 unambiguous_dna_by_name[name] = dna 184 unambiguous_rna_by_name[name] = rna 185 generic_by_name[name] = generic
186 187 ### These tables created from the data file 188 ### ftp://ncbi.nlm.nih.gov/entrez/misc/data/gc.prt 189 ### using the following: 190 ##import re 191 ##for line in open("gc.prt").readlines(): 192 ## if line[:2] == " {": 193 ## names = [] 194 ## id = None 195 ## aa = None 196 ## start = None 197 ## bases = [] 198 ## elif line[:6] == " name": 199 ## names.append(re.search('"([^"]*)"', line).group(1)) 200 ## elif line[:8] == " name": 201 ## names.append(re.search('"(.*)$', line).group(1)) 202 ## elif line == ' Mitochondrial; Mycoplasma; Spiroplasma" ,\n': 203 ## names[-1] = names[-1] + " Mitochondrial; Mycoplasma; Spiroplasma" 204 ## elif line[:4] == " id": 205 ## id = int(re.search('(\d+)', line).group(1)) 206 ## elif line[:10] == " ncbieaa ": 207 ## aa = line[12:12+64] 208 ## elif line[:10] == " sncbieaa": 209 ## start = line[12:12+64] 210 ## elif line[:9] == " -- Base": 211 ## bases.append(line[12:12+64]) 212 ## elif line[:2] == " }": 213 ## assert names != [] and id is not None and aa is not None 214 ## assert start is not None and bases != [] 215 ## if len(names) == 1: 216 ## names.append(None) 217 ## print "register_ncbi_table(name = %s," % repr(names[0]) 218 ## print " alt_name = %s, id = %d", % \ 219 ## (repr(names[1]), id) 220 ## print " table = {" 221 ## s = " " 222 ## for i in range(64): 223 ## if aa[i] != "*": 224 ## t = " '%s%s%s': '%s'," % (bases[0][i], bases[1][i], 225 ## bases[2][i], aa[i]) 226 ## if len(s) + len(t) > 75: 227 ## print s 228 ## s = " " + t 229 ## else: 230 ## s = s + t 231 ## print s, "}," 232 233 ## s = " stop_codons = [" 234 ## for i in range(64): 235 ## if aa[i] == "*": 236 ## t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 237 ## if len(s) + len(t) > 75: 238 ## print s 239 ## s = " " + t 240 ## else: 241 ## s = s + t 242 ## print s, "]," 243 244 ## s = " start_codons = [" 245 ## for i in range(64): 246 ## if start[i] == "M": 247 ## t = " '%s%s%s'," % (bases[0][i], bases[1][i], bases[2][i]) 248 ## if len(s) + len(t) > 75: 249 ## print s 250 ## s = " " + t 251 ## else: 252 ## s = s + t 253 ## print s, "]" 254 ## print " )" 255 ## elif line[:2] == "--" or line == "\n" or line == "}\n" or \ 256 ## line == 'Genetic-code-table ::= {\n': 257 ## pass 258 ## else: 259 ## raise "Unparsed", repr(line) 260 261 register_ncbi_table(name = 'Standard', 262 alt_name = 'SGC0', id = 1, 263 table = { 264 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 265 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 266 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 267 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 268 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 269 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 270 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 271 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 272 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 273 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 274 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 275 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 276 'GGG': 'G', }, 277 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 278 start_codons = [ 'TTG', 'CTG', 'ATG', ] 279 ) 280 register_ncbi_table(name = 'Vertebrate Mitochondrial', 281 alt_name = 'SGC1', id = 2, 282 table = { 283 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 284 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 285 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 286 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 287 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 288 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 289 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 290 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 291 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'GTT': 'V', 292 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 293 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 294 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 295 stop_codons = [ 'TAA', 'TAG', 'AGA', 'AGG', ], 296 start_codons = [ 'ATT', 'ATC', 'ATA', 'ATG', 'GTG', ] 297 ) 298 register_ncbi_table(name = 'Yeast Mitochondrial', 299 alt_name = 'SGC2', id = 3, 300 table = { 301 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 302 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 303 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'T', 304 'CTC': 'T', 'CTA': 'T', 'CTG': 'T', 'CCT': 'P', 'CCC': 'P', 305 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 306 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 307 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 308 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 309 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 310 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 311 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 312 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 313 'GGA': 'G', 'GGG': 'G', }, 314 stop_codons = [ 'TAA', 'TAG', ], 315 start_codons = [ 'ATG', ] 316 ) 317 register_ncbi_table(name = 'Mold Mitochondrial; Protozoan Mitochondrial; Coelenterate Mitochondrial; Mycoplasma; Spiroplasma', 318 alt_name = 'SGC3', id = 4, 319 table = { 320 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 321 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 322 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 323 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 324 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 325 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 326 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 327 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 328 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 329 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 330 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 331 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 332 'GGA': 'G', 'GGG': 'G', }, 333 stop_codons = [ 'TAA', 'TAG', ], 334 start_codons = [ 'TTA', 'TTG', 'CTG', 'ATT', 'ATC', 335 'ATA', 'ATG', 'GTG', ] 336 ) 337 register_ncbi_table(name = 'Invertebrate Mitochondrial', 338 alt_name = 'SGC4', id = 5, 339 table = { 340 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 341 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 342 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 343 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 344 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 345 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 346 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 347 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 348 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 349 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 350 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 351 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 352 'GGA': 'G', 'GGG': 'G', }, 353 stop_codons = [ 'TAA', 'TAG', ], 354 start_codons = [ 'TTG', 'ATT', 'ATC', 'ATA', 'ATG', 355 'GTG', ] 356 ) 357 register_ncbi_table(name = 'Ciliate Nuclear; Dasycladacean Nuclear; Hexamita Nuclear', 358 alt_name = 'SGC5', id = 6, 359 table = { 360 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 361 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 362 'TAA': 'Q', 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 363 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 364 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 365 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 366 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 367 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 368 'AAC': 'N', 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 369 'AGA': 'R', 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 370 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 371 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 372 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 373 stop_codons = [ 'TGA', ], 374 start_codons = [ 'ATG', ] 375 ) 376 register_ncbi_table(name = 'Echinoderm Mitochondrial', 377 alt_name = 'SGC8', id = 9, 378 table = { 379 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 380 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 381 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 382 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 383 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 384 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 385 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 386 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 387 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'S', 388 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 389 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 390 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 391 'GGA': 'G', 'GGG': 'G', }, 392 stop_codons = [ 'TAA', 'TAG', ], 393 start_codons = [ 'ATG', ] 394 ) 395 register_ncbi_table(name = 'Euplotid Nuclear', 396 alt_name = 'SGC9', id = 10, 397 table = { 398 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 399 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 400 'TGT': 'C', 'TGC': 'C', 'TGA': 'C', 'TGG': 'W', 'CTT': 'L', 401 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 402 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 403 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 404 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 405 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 406 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 407 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 408 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 409 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 410 'GGA': 'G', 'GGG': 'G', }, 411 stop_codons = [ 'TAA', 'TAG', ], 412 start_codons = [ 'ATG', ] 413 ) 414 register_ncbi_table(name = 'Bacterial', 415 alt_name = None, id = 11, 416 table = { 417 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 418 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 419 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 420 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 421 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 422 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 423 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 424 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 425 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 426 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 427 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 428 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 429 'GGG': 'G', }, 430 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 431 start_codons = [ 'TTG', 'CTG', 'ATT', 'ATC', 'ATA', 432 'ATG', 'GTG', ] 433 ) 434 register_ncbi_table(name = 'Alternative Yeast Nuclear', 435 alt_name = None, id = 12, 436 table = { 437 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 438 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 439 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 'CTC': 'L', 440 'CTA': 'L', 'CTG': 'S', 'CCT': 'P', 'CCC': 'P', 'CCA': 'P', 441 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 'CAG': 'Q', 442 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 'ATT': 'I', 443 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 'ACC': 'T', 444 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 'AAA': 'K', 445 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 'AGG': 'R', 446 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 'GCT': 'A', 447 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 'GAC': 'D', 448 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 'GGA': 'G', 449 'GGG': 'G', }, 450 stop_codons = [ 'TAA', 'TAG', 'TGA', ], 451 start_codons = [ 'CTG', 'ATG', ] 452 ) 453 register_ncbi_table(name = 'Ascidian Mitochondrial', 454 alt_name = None, id = 13, 455 table = { 456 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 457 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 458 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 'CTT': 'L', 459 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 460 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 461 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 462 'ATT': 'I', 'ATC': 'I', 'ATA': 'M', 'ATG': 'M', 'ACT': 'T', 463 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 464 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'G', 465 'AGG': 'G', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 466 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 467 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 468 'GGA': 'G', 'GGG': 'G', }, 469 stop_codons = [ 'TAA', 'TAG', ], 470 start_codons = [ 'ATG', ] 471 ) 472 register_ncbi_table(name = 'Flatworm Mitochondrial', 473 alt_name = None, id = 14, 474 table = { 475 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 476 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 477 'TAA': 'Y', 'TGT': 'C', 'TGC': 'C', 'TGA': 'W', 'TGG': 'W', 478 'CTT': 'L', 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 479 'CCC': 'P', 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 480 'CAA': 'Q', 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 481 'CGG': 'R', 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 482 'ACT': 'T', 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 483 'AAC': 'N', 'AAA': 'N', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 484 'AGA': 'S', 'AGG': 'S', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 485 'GTG': 'V', 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 486 'GAT': 'D', 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 487 'GGC': 'G', 'GGA': 'G', 'GGG': 'G', }, 488 stop_codons = [ 'TAG', ], 489 start_codons = [ 'ATG', ] 490 ) 491 register_ncbi_table(name = 'Blepharisma Macronuclear', 492 alt_name = None, id = 15, 493 table = { 494 'TTT': 'F', 'TTC': 'F', 'TTA': 'L', 'TTG': 'L', 'TCT': 'S', 495 'TCC': 'S', 'TCA': 'S', 'TCG': 'S', 'TAT': 'Y', 'TAC': 'Y', 496 'TAG': 'Q', 'TGT': 'C', 'TGC': 'C', 'TGG': 'W', 'CTT': 'L', 497 'CTC': 'L', 'CTA': 'L', 'CTG': 'L', 'CCT': 'P', 'CCC': 'P', 498 'CCA': 'P', 'CCG': 'P', 'CAT': 'H', 'CAC': 'H', 'CAA': 'Q', 499 'CAG': 'Q', 'CGT': 'R', 'CGC': 'R', 'CGA': 'R', 'CGG': 'R', 500 'ATT': 'I', 'ATC': 'I', 'ATA': 'I', 'ATG': 'M', 'ACT': 'T', 501 'ACC': 'T', 'ACA': 'T', 'ACG': 'T', 'AAT': 'N', 'AAC': 'N', 502 'AAA': 'K', 'AAG': 'K', 'AGT': 'S', 'AGC': 'S', 'AGA': 'R', 503 'AGG': 'R', 'GTT': 'V', 'GTC': 'V', 'GTA': 'V', 'GTG': 'V', 504 'GCT': 'A', 'GCC': 'A', 'GCA': 'A', 'GCG': 'A', 'GAT': 'D', 505 'GAC': 'D', 'GAA': 'E', 'GAG': 'E', 'GGT': 'G', 'GGC': 'G', 506 'GGA': 'G', 'GGG': 'G', }, 507 stop_codons = [ 'TAA', 'TGA', ], 508 start_codons = [ 'ATG', ] 509 ) 510 511 ######### Deal with ambiguous forward translations 512
513 -class AmbiguousCodonTable(CodonTable):
514 - def __init__(self, codon_table, 515 ambiguous_nucleotide_alphabet, 516 ambiguous_nucleotide_values, 517 ambiguous_protein_alphabet, 518 ambiguous_protein_values):
519 CodonTable.__init__(self, 520 ambiguous_nucleotide_alphabet, 521 ambiguous_protein_alphabet, 522 AmbiguousForwardTable(codon_table.forward_table, 523 ambiguous_nucleotide_values, 524 ambiguous_protein_values), 525 codon_table.back_table, 526 527 # These two are WRONG! I need to get the 528 # list of ambiguous codons which code for 529 # the stop codons XXX 530 list_ambiguous_codons(codon_table.start_codons), 531 list_ambiguous_codons(codon_table.stop_codons) 532 ) 533 self._codon_table = codon_table
534 535 # Be sneaky and forward attribute lookups to the original table. 536 # This lets us get the names, if the original table is an NCBI 537 # table.
538 - def __getattr__(self, name):
539 return getattr(self._codon_table, name)
540
541 -def list_possible_proteins(codon, forward_table, ambiguous_nucleotide_values):
542 c1, c2, c3 = codon 543 x1 = ambiguous_nucleotide_values[c1] 544 x2 = ambiguous_nucleotide_values[c2] 545 x3 = ambiguous_nucleotide_values[c3] 546 possible = {} 547 stops = [] 548 for y1 in x1: 549 for y2 in x2: 550 for y3 in x3: 551 try: 552 possible[forward_table[y1+y2+y3]] = 1 553 except KeyError: 554 # If tripping over a stop codon 555 stops.append(y1+y2+y3) 556 if stops: 557 if possible.keys(): 558 raise TranslationError, ("ambiguous codon codes for both proteins and stop codons", codon) 559 # This is a true stop codon - tell the caller about it 560 raise KeyError, codon 561 return possible.keys()
562
563 -def list_ambiguous_codons(codons):
564 # XXX not implemented! 565 return codons
566 567 # Forward translation is "onto", that is, any given codon always maps 568 # to the same protein, or it doesn't map at all. Thus, I can build 569 # off of an existing table to produce the ambiguous mappings. 570 # 571 # This handles the general case. Perhaps it's overkill? 572 # >>> t = CodonTable.ambiguous_dna_by_id[1] 573 # >>> t.forward_table["AAT"] 574 # 'N' 575 # >>> t.forward_table["GAT"] 576 # 'D' 577 # >>> t.forward_table["RAT"] 578 # 'B' 579 # >>> t.forward_table["YTA"] 580 # 'L' 581
582 -class AmbiguousForwardTable:
583 - def __init__(self, forward_table, ambiguous_nucleotide, ambiguous_protein):
584 self.forward_table = forward_table 585 586 self.ambiguous_nucleotide = ambiguous_nucleotide 587 self.ambiguous_protein = ambiguous_protein 588 589 inverted = {} 590 for name, val in ambiguous_protein.items(): 591 for c in val: 592 x = inverted.get(c, {}) 593 x[name] = 1 594 inverted[c] = x 595 for name, val in inverted.items(): 596 inverted[name] = val.keys() 597 self._inverted = inverted 598 599 self._cache = {}
600
601 - def get(self, codon, failobj = None):
602 try: 603 return self.__getitem__(codon) 604 except KeyError: 605 return failobj
606
607 - def __getitem__(self, codon):
608 try: 609 x = self._cache[codon] 610 except KeyError: 611 pass 612 else: 613 if x is TranslationError: 614 raise TranslationError, codon # no unique translation 615 if x is KeyError: 616 raise KeyError, codon # it's a stop codon 617 return x 618 try: 619 x = self.forward_table[codon] 620 self._cache[codon] = x 621 return x 622 except KeyError: 623 pass 624 625 # XXX Need to make part of this into a method which returns 626 # a list of all possible encodings for a codon! 627 try: 628 possible = list_possible_proteins(codon, 629 self.forward_table, 630 self.ambiguous_nucleotide) 631 except KeyError: 632 self._cache[codon] = KeyError 633 raise KeyError, codon # stop codon 634 except TranslationError: 635 self._cache[codon] = TranslationError 636 raise TranslationError, codon # does not code 637 assert len(possible) > 0, "unambiguous codons must code" 638 639 # Hah! Only one possible protein, so use it 640 if len(possible) == 1: 641 self._cache[codon] = possible[0] 642 return possible[0] 643 644 # See if there's an ambiguous protein encoding for the multiples. 645 # Find residues which exist in every coding set. 646 ambiguous_possible = {} 647 for amino in possible: 648 for term in self._inverted[amino]: 649 ambiguous_possible[term] = ambiguous_possible.get(term, 0) + 1 650 651 n = len(possible) 652 possible = [] 653 for amino, val in ambiguous_possible.items(): 654 if val == n: 655 possible.append(amino) 656 657 # No amino acid encoding for the results 658 if len(possible) == 0: 659 self._cache[codon] = TranslationError 660 raise TranslationError, codon # no valid translation 661 662 # All of these are valid, so choose one 663 # To be unique, sort by smallet ambiguity then alphabetically 664 # Can get this if "X" encodes for everything. 665 def _sort(x, y, table = self.ambiguous_protein): 666 a = cmp(len(table[x]), len(table[y])) 667 if a == 0: 668 return cmp(x, y) 669 return a
670 possible.sort(_sort) 671 672 x = possible[0] 673 self._cache[codon] = x 674 return x
675 676 #Prepare the ambiguous tables for DNA, RNA and Generic (DNA or RNA) 677 ambiguous_dna_by_name = {} 678 for key, val in unambiguous_dna_by_name.items(): 679 ambiguous_dna_by_name[key] = AmbiguousCodonTable(val, 680 IUPAC.ambiguous_dna, 681 IUPACData.ambiguous_dna_values, 682 IUPAC.extended_protein, 683 IUPACData.extended_protein_values) 684 ambiguous_dna_by_id = {} 685 for key, val in unambiguous_dna_by_id.items(): 686 ambiguous_dna_by_id[key] = AmbiguousCodonTable(val, 687 IUPAC.ambiguous_dna, 688 IUPACData.ambiguous_dna_values, 689 IUPAC.extended_protein, 690 IUPACData.extended_protein_values) 691 692 ambiguous_rna_by_name = {} 693 for key, val in unambiguous_rna_by_name.items(): 694 ambiguous_rna_by_name[key] = AmbiguousCodonTable(val, 695 IUPAC.ambiguous_rna, 696 IUPACData.ambiguous_rna_values, 697 IUPAC.extended_protein, 698 IUPACData.extended_protein_values) 699 ambiguous_rna_by_id = {} 700 for key, val in unambiguous_rna_by_id.items(): 701 ambiguous_rna_by_id[key] = AmbiguousCodonTable(val, 702 IUPAC.ambiguous_rna, 703 IUPACData.ambiguous_rna_values, 704 IUPAC.extended_protein, 705 IUPACData.extended_protein_values) 706 707 #The following isn't very elegant, but seems to work nicely. 708 _merged_values = dict(IUPACData.ambiguous_rna_values.iteritems()) 709 _merged_values["T"] = "U" 710 711 for key, val in generic_by_name.items(): 712 ambiguous_generic_by_name[key] = AmbiguousCodonTable(val, 713 Alphabet.NucleotideAlphabet(), 714 _merged_values, 715 IUPAC.extended_protein, 716 IUPACData.extended_protein_values) 717 718 for key, val in generic_by_id.items(): 719 ambiguous_generic_by_id[key] = AmbiguousCodonTable(val, 720 Alphabet.NucleotideAlphabet(), 721 _merged_values, 722 IUPAC.extended_protein, 723 IUPACData.extended_protein_values) 724 725 #Basic sanity test, 726 for id in ambiguous_generic_by_id.keys() : 727 assert ambiguous_rna_by_id[id].forward_table["GUU"] == "V" 728 assert ambiguous_rna_by_id[id].forward_table["GUN"] == "V" 729 assert ambiguous_rna_by_id[id].forward_table["UUN"] == "X" #F or L 730 731 assert ambiguous_dna_by_id[id].forward_table["GTT"] == "V" 732 assert ambiguous_dna_by_id[id].forward_table["TTN"] == "X" #F or L 733 assert ambiguous_dna_by_id[id].forward_table["GTN"] == "V" 734 735 assert ambiguous_generic_by_id[id].forward_table.get("TTN") == "X" 736 assert ambiguous_generic_by_id[id].forward_table["ACN"] == "T" 737 assert ambiguous_generic_by_id[id].forward_table["GUU"] == "V" 738 assert ambiguous_generic_by_id[id].forward_table["GUN"] == "V" 739 assert ambiguous_generic_by_id[id].forward_table["UUN"] == "X" #F or L 740 assert ambiguous_generic_by_id[id].forward_table["GTT"] == "V" 741 assert ambiguous_generic_by_id[id].forward_table["TTN"] == "X" #F or L 742 assert ambiguous_generic_by_id[id].forward_table["GTN"] == "V" 743 #And finally something evil, an RNA-DNA mixture: 744 assert ambiguous_generic_by_id[id].forward_table["UTN"] == "X" #F or L 745 assert ambiguous_generic_by_id[id].forward_table["UTU"] == "F" 746 747 assert ambiguous_generic_by_id[1].stop_codons == ambiguous_generic_by_name["Standard"].stop_codons 748 assert ambiguous_generic_by_id[4].stop_codons == ambiguous_generic_by_name["SGC3"].stop_codons 749 assert ambiguous_generic_by_id[15].stop_codons == ambiguous_generic_by_name['Blepharisma Macronuclear'].stop_codons 750 del _merged_values 751 del key, val 752