Package Bio :: Package Alphabet :: Module Reduced
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.Reduced

  1  # Copyright 2004 by Iddo Friedberg. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """Reduced alphabets which lump together several amino-acids into one letter. 
  8   
  9  Reduced (redundant or simplified) alphabets are used to represent protein sequences using an 
 10  alternative alphabet which lumps together several amino-acids into one letter, based 
 11  on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually 
 12  quite interchangeable, so many sequence studies group them into one letter 
 13   
 14  Examples of reduced alphabets are available in: 
 15   
 16  http://viscose.ifg.uni-muenster.de/html/alphabets.html 
 17   
 18  Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of 
 19  the tables here, or a user-defined table. 
 20  """ 
 21   
 22  from Bio import Alphabet 
 23   
 24  # The Murphy tables are from here: 
 25  #       Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid alphabets for protein 
 26  #       fold recognition and implications for folding. Protein Eng. 13(3):149-152 
 27   
 28  murphy_15_tab = {"L": "L", 
 29               "V": "L", 
 30               "I": "L", 
 31               "M": "L", 
 32               "C": "C", 
 33               "A": "A", 
 34               "G": "G", 
 35               "S": "S", 
 36               "T": "T", 
 37               "P": "P", 
 38               "F": "F", 
 39               "Y": "F", 
 40               "W": "W", 
 41               "E": "E", 
 42               "D": "D", 
 43               "N": "N", 
 44               "Q": "Q", 
 45               "K": "K", 
 46               "R": "K", 
 47               "H": "H"} 
 48   
49 -class Murphy15(Alphabet.ProteinAlphabet):
50 letters = "LCAGSTPFWEDNQKH" 51 size = 15
52 murphy_15 = Murphy15() 53 54 murphy_10_tab = {"L": "L", 55 "V": "L", 56 "I": "L", 57 "M": "L", 58 "C": "C", 59 "A": "A", 60 "G": "G", 61 "S": "S", 62 "T": "S", 63 "P": "P", 64 "F": "F", 65 "Y": "F", 66 "W": "F", 67 "E": "E", 68 "D": "E", 69 "N": "E", 70 "Q": "E", 71 "K": "K", 72 "R": "K", 73 "H": "H"}
74 -class Murphy10(Alphabet.ProteinAlphabet):
75 letters = "LCAGSPFEKH" 76 size = 10
77 murphy_10 = Murphy10() 78 79 murphy_8_tab = {"L": "L", 80 "V": "L", 81 "I": "L", 82 "M": "L", 83 "C": "L", 84 "A": "A", 85 "G": "A", 86 "S": "S", 87 "T": "S", 88 "P": "P", 89 "F": "F", 90 "Y": "F", 91 "W": "F", 92 "E": "E", 93 "D": "E", 94 "N": "E", 95 "Q": "E", 96 "K": "K", 97 "R": "K", 98 "H": "H"} 99
100 -class Murphy8(Alphabet.ProteinAlphabet):
101 letters = "LASPFEKH" 102 size = 8
103 murphy_8 = Murphy8() 104 105 murphy_4_tab = {"L": "L", 106 "V": "L", 107 "I": "L", 108 "M": "L", 109 "C": "L", 110 "A": "A", 111 "G": "A", 112 "S": "A", 113 "T": "A", 114 "P": "A", 115 "F": "F", 116 "Y": "F", 117 "W": "F", 118 "E": "E", 119 "D": "E", 120 "N": "E", 121 "Q": "E", 122 "K": "E", 123 "R": "E", 124 "H": "E"} 125
126 -class Murphy4(Alphabet.ProteinAlphabet):
127 letters = "LAFE" 128 size = 4
129 murphy_4 = Murphy4() 130 131 hp_model_tab = {"A": "P", # Hydrophilic 132 "G": "P", 133 "T": "P", 134 "S": "P", 135 "N": "P", 136 "Q": "P", 137 "D": "P", 138 "E": "P", 139 "H": "P", 140 "R": "P", 141 "K": "P", 142 "P": "P", 143 "C": "H", # Hydrophobic 144 "M": "H", 145 "F": "H", 146 "I": "H", 147 "L": "H", 148 "V": "H", 149 "W": "H", 150 "Y": "H"} 151
152 -class HPModel(Alphabet.ProteinAlphabet):
153 letters = "HP" 154 size = 2
155 hp_model = HPModel() 156 157 pc_5_table = {"I": "A", # Aliphatic 158 "V": "A", 159 "L": "A", 160 "F": "R", # Aromatic 161 "Y": "R", 162 "W": "R", 163 "H": "R", 164 "K": "C", # Charged 165 "R": "C", 166 "D": "C", 167 "E": "C", 168 "G": "T", # Tiny 169 "A": "T", 170 "C": "T", 171 "S": "T", 172 "T": "D", # Diverse 173 "M": "D", 174 "Q": "D", 175 "N": "D", 176 "P": "D"} 177
178 -class PC5(Alphabet.ProteinAlphabet):
179 letters = "ARCTD" 180 size = 5
181 hp_model = HPModel() 182