Package Bio :: Package Data :: Module IUPACData
[hide private]
[frames] | no frames]

Source Code for Module Bio.Data.IUPACData

  1  # Information about the IUPAC alphabets 
  2   
  3  protein_letters = "ACDEFGHIKLMNPQRSTVWY" 
  4  extended_protein_letters = "ACDEFGHIKLMNPQRSTVWYBXZ" 
  5  ambiguous_dna_letters = "GATCRYWSMKHBVDN" 
  6  unambiguous_dna_letters = "GATC" 
  7  ambiguous_rna_letters = "GAUCRYWSMKHBVDN" 
  8  unambiguous_rna_letters = "GAUC" 
  9   
 10  #   B == 5-bromouridine 
 11  #   D == 5,6-dihydrouridine 
 12  #   S == thiouridine 
 13  #   W == wyosine 
 14  extended_dna_letters = "GATCBDSW" 
 15   
 16  # are there extended forms? 
 17  #extended_rna_letters = "GAUCBDSW" 
 18   
 19  ambiguous_dna_values = { 
 20      "A": "A", 
 21      "C": "C", 
 22      "G": "G", 
 23      "T": "T", 
 24      "M": "AC", 
 25      "R": "AG", 
 26      "W": "AT", 
 27      "S": "CG", 
 28      "Y": "CT", 
 29      "K": "GT", 
 30      "V": "ACG", 
 31      "H": "ACT", 
 32      "D": "AGT", 
 33      "B": "CGT", 
 34      "X": "GATC", 
 35      "N": "GATC", 
 36      } 
 37  ambiguous_rna_values = { 
 38      "A": "A", 
 39      "C": "C", 
 40      "G": "G", 
 41      "U": "U", 
 42      "M": "AC", 
 43      "R": "AG", 
 44      "W": "AU", 
 45      "S": "CG", 
 46      "Y": "CU", 
 47      "K": "GU", 
 48      "V": "ACG", 
 49      "H": "ACU", 
 50      "D": "AGU", 
 51      "B": "CGU", 
 52      "X": "GAUC", 
 53      "N": "GAUC", 
 54      } 
 55   
 56  ambiguous_dna_complement = { 
 57      "A": "T", 
 58      "C": "G", 
 59      "G": "C", 
 60      "T": "A", 
 61      "M": "K", 
 62      "R": "Y", 
 63      "W": "W", 
 64      "S": "S", 
 65      "Y": "R", 
 66      "K": "M", 
 67      "V": "B", 
 68      "H": "D", 
 69      "D": "H", 
 70      "B": "V", 
 71      "X": "X", 
 72      "N": "N", 
 73      } 
 74   
 75  ambiguous_rna_complement = { 
 76      "A": "U", 
 77      "C": "G", 
 78      "G": "C", 
 79      "U": "A", 
 80      "M": "K", 
 81      "R": "Y", 
 82      "W": "W", 
 83      "S": "S", 
 84      "Y": "R", 
 85      "K": "M", 
 86      "V": "B", 
 87      "H": "D", 
 88      "D": "H", 
 89      "B": "V", 
 90      "X": "X", 
 91      "N": "N", 
 92      } 
 93   
 94   
95 -def _make_ranges(dict):
96 d = {} 97 for key, value in dict.items(): 98 d[key] = (value, value) 99 return d
100 101 # From bioperl's SeqStats.pm 102 unambiguous_dna_weights = { 103 "A": 347., 104 "C": 323., 105 "G": 363., 106 "T": 322., 107 } 108 unambiguous_dna_weight_ranges = _make_ranges(unambiguous_dna_weights) 109 110 unambiguous_rna_weights = { 111 "A": unambiguous_dna_weights["A"] + 16., # 16 for the oxygen 112 "C": unambiguous_dna_weights["C"] + 16., 113 "G": unambiguous_dna_weights["G"] + 16., 114 "U": 340., 115 } 116 unambiguous_rna_weight_ranges = _make_ranges(unambiguous_rna_weights) 117
118 -def _make_ambiguous_ranges(dict, weight_table):
119 range_d = {} 120 avg_d = {} 121 for letter, values in dict.items(): 122 weights = map(weight_table.get, values) 123 range_d[letter] = (min(weights), max(weights)) 124 total_w = 0.0 125 for w in weights: 126 total_w = total_w + w 127 avg_d[letter] = total_w / len(weights) 128 return range_d, avg_d
129 130 ambiguous_dna_weight_ranges, avg_ambiguous_dna_weights = \ 131 _make_ambiguous_ranges(ambiguous_dna_values, 132 unambiguous_dna_weights) 133 134 ambiguous_rna_weight_ranges, avg_ambiguous_rna_weights = \ 135 _make_ambiguous_ranges(ambiguous_rna_values, 136 unambiguous_rna_weights) 137 138 protein_weights = { 139 "A": 89.09, 140 "C": 121.16, 141 "D": 133.10, 142 "E": 147.13, 143 "F": 165.19, 144 "G": 75.07, 145 "H": 155.16, 146 "I": 131.18, 147 "K": 146.19, 148 "L": 131.18, 149 "M": 149.21, 150 "N": 132.12, 151 "P": 115.13, 152 "Q": 146.15, 153 "R": 174.20, 154 "S": 105.09, 155 "T": 119.12, 156 "V": 117.15, 157 "W": 204.23, 158 "Y": 181.19 159 } 160 161 extended_protein_values = { 162 "A": "A", 163 "B": "ND", 164 "C": "C", 165 "D": "D", 166 "E": "E", 167 "F": "F", 168 "G": "G", 169 "H": "H", 170 "I": "I", 171 "K": "K", 172 "L": "L", 173 "M": "M", 174 "N": "N", 175 "P": "P", 176 "Q": "Q", 177 "R": "R", 178 "S": "S", 179 "T": "T", 180 "V": "V", 181 "W": "W", 182 "X": "ACDEFGHIKLMNPQRSTVWY", 183 "Y": "Y", 184 "Z": "QE", 185 } 186 187 protein_weight_ranges = _make_ranges(protein_weights) 188 189 extended_protein_weight_ranges, avg_extended_protein_weights = \ 190 _make_ambiguous_ranges(extended_protein_values, 191 protein_weights) 192