Package Bio :: Package Alphabet :: Module Reduced
[hide private]
[frames] | no frames]

Source Code for Module Bio.Alphabet.Reduced

  1  from Bio import Alphabet 
  2  """ 
  3  Reduced (redundant or simplified) alphabets are used to represent protein sequences using an 
  4  alternative alphabet which lumps together several amino-acids into one letter, based 
  5  on physico-chemical traits. For example, all the aliphatics (I,L,V) are usually 
  6  quite interchangeable, so many sequence studies group them into one letter 
  7   
  8  Examples of reduced alphabets are available in: 
  9   
 10  http://viscose.ifg.uni-muenster.de/html/alphabets.html 
 11   
 12  Bio.utils.reduce_sequence is used to take a Protein alphabet, and reduce it using one of 
 13  the tables here, or a user-defined table. 
 14  """ 
 15  # The Murphy tables are from here: 
 16  #       Murphy L.R., Wallqvist A, Levy RM. (2000) Simplified amino acid alphabets for protein 
 17  #       fold recognition and implications for folding. Protein Eng. 13(3):149-152 
 18   
 19  murphy_15_tab = {"L": "L", 
 20               "V": "L", 
 21               "I": "L", 
 22               "M": "L", 
 23               "C": "C", 
 24               "A": "A", 
 25               "G": "G", 
 26               "S": "S", 
 27               "T": "T", 
 28               "P": "P", 
 29               "F": "F", 
 30               "Y": "F", 
 31               "W": "W", 
 32               "E": "E", 
 33               "D": "D", 
 34               "N": "N", 
 35               "Q": "Q", 
 36               "K": "K", 
 37               "R": "K", 
 38               "H": "H"} 
 39   
40 -class Murphy15(Alphabet.ProteinAlphabet):
41 letters = "LCAGSTPFWEDNQKH" 42 size = 15
43 murphy_15 = Murphy15() 44 45 murphy_10_tab = {"L": "L", 46 "V": "L", 47 "I": "L", 48 "M": "L", 49 "C": "C", 50 "A": "A", 51 "G": "G", 52 "S": "S", 53 "T": "S", 54 "P": "P", 55 "F": "F", 56 "Y": "F", 57 "W": "F", 58 "E": "E", 59 "D": "E", 60 "N": "E", 61 "Q": "E", 62 "K": "K", 63 "R": "K", 64 "H": "H"}
65 -class Murphy10(Alphabet.ProteinAlphabet):
66 letters = "LCAGSPFEKH" 67 size = 10
68 murphy_10 = Murphy10() 69 70 murphy_8_tab = {"L": "L", 71 "V": "L", 72 "I": "L", 73 "M": "L", 74 "C": "L", 75 "A": "A", 76 "G": "A", 77 "S": "S", 78 "T": "S", 79 "P": "P", 80 "F": "F", 81 "Y": "F", 82 "W": "F", 83 "E": "E", 84 "D": "E", 85 "N": "E", 86 "Q": "E", 87 "K": "K", 88 "R": "K", 89 "H": "H"} 90
91 -class Murphy8(Alphabet.ProteinAlphabet):
92 letters = "LASPFEKH" 93 size = 8
94 murphy_8 = Murphy8() 95 96 murphy_4_tab = {"L": "L", 97 "V": "L", 98 "I": "L", 99 "M": "L", 100 "C": "L", 101 "A": "A", 102 "G": "A", 103 "S": "A", 104 "T": "A", 105 "P": "A", 106 "F": "F", 107 "Y": "F", 108 "W": "F", 109 "E": "E", 110 "D": "E", 111 "N": "E", 112 "Q": "E", 113 "K": "E", 114 "R": "E", 115 "H": "E"} 116
117 -class Murphy4(Alphabet.ProteinAlphabet):
118 letters = "LAFE" 119 size = 4
120 murphy_4 = Murphy4() 121 122 hp_model_tab = {"A": "P", # Hydrophilic 123 "G": "P", 124 "T": "P", 125 "S": "P", 126 "N": "P", 127 "Q": "P", 128 "D": "P", 129 "E": "P", 130 "H": "P", 131 "R": "P", 132 "K": "P", 133 "P": "P", 134 "C": "H", # Hydrophobic 135 "M": "H", 136 "F": "H", 137 "I": "H", 138 "L": "H", 139 "V": "H", 140 "W": "H", 141 "Y": "H"} 142
143 -class HPModel(Alphabet.ProteinAlphabet):
144 letters = "HP" 145 size = 2
146 hp_model = HPModel() 147 148 pc_5_table = {"I": "A", # Aliphatic 149 "V": "A", 150 "L": "A", 151 "F": "R", # Aromatic 152 "Y": "R", 153 "W": "R", 154 "H": "R", 155 "K": "C", # Charged 156 "R": "C", 157 "D": "C", 158 "E": "C", 159 "G": "T", # Tiny 160 "A": "T", 161 "C": "T", 162 "S": "T", 163 "T": "D", # Diverse 164 "M": "D", 165 "Q": "D", 166 "N": "D", 167 "P": "D"} 168
169 -class PC5(Alphabet.ProteinAlphabet):
170 letters = "ARCTD" 171 size = 5
172 hp_model = HPModel() 173