1
2
3
4
5
6
7
8 """Alphabets used in Seq objects etc to declare sequence type and letters.
9
10 This is used by sequences which contain a finite number of similar words.
11 """
12
14 size = None
15 letters = None
16
18 return self.__class__.__name__ + "()"
19
21 """Does this alphabet 'contain' the other (OBSOLETE?).
22
23 Returns a boolean. This relies on the Alphabet subclassing
24 hierarchy only, and does not check the letters property.
25 This isn't ideal, and doesn't seem to work as intended
26 with the AlphabetEncoder classes."""
27 return isinstance(other, self.__class__)
28
29 generic_alphabet = Alphabet()
30
34
35 single_letter_alphabet = SingleLetterAlphabet()
36
37
38
41
42 generic_protein = ProteinAlphabet()
43
44
47
48 generic_nucleotide = NucleotideAlphabet()
49
52
53 generic_dna = DNAAlphabet()
54
55
56
57
60
61 generic_rna = RNAAlphabet()
62
63
64
65
66
69
71 size = 3
72 letters = [
73 "Ala", "Asx", "Cys", "Asp", "Glu", "Phe", "Gly", "His", "Ile",
74 "Lys", "Leu", "Met", "Asn", "Pro", "Gln", "Arg", "Ser", "Thr",
75 "Sec", "Val", "Trp", "Xaa", "Tyr", "Glx",
76 ]
77
78
79
80
81
83 - def __init__(self, alphabet, new_letters):
91 if key[:2] == "__" and key[-2:] == "__":
92 raise AttributeError(key)
93 return getattr(self.alphabet, key)
94
96 return "%s(%r, %r)" % (self.__class__.__name__, self.alphabet,
97 self.new_letters)
98
100 """Does this alphabet 'contain' the other (OBSOLETE?).
101
102 This is isn't implemented for the base AlphabetEncoder,
103 which will always return 0 (False)."""
104 return 0
105
106 -class Gapped(AlphabetEncoder):
107 - def __init__(self, alphabet, gap_char = "-"):
110
112 """Does this alphabet 'contain' the other (OBSOLETE?).
113
114 Returns a boolean. This relies on the Alphabet subclassing
115 hierarchy, and attempts to check the gap character. This fails
116 if the other alphabet does not have a gap character!
117 """
118 return other.gap_char == self.gap_char and \
119 self.alphabet.contains(other.alphabet)
120
122 - def __init__(self, alphabet, stop_symbol = "*"):
125
127 x = cmp(self.alphabet, other.alphabet)
128 if x == 0:
129 return cmp(self.stop_symbol, other.stop_symbol)
130 return x
131
133 """Does this alphabet 'contain' the other (OBSOLETE?).
134
135 Returns a boolean. This relies on the Alphabet subclassing
136 hierarchy, and attempts to check the stop symbol. This fails
137 if the other alphabet does not have a stop symbol!
138 """
139 return other.stop_symbol == self.stop_symbol and \
140 self.alphabet.contains(other.alphabet)
141
143 """Returns the non-gapped non-stop-codon Alphabet object (PRIVATE)."""
144 a = alphabet
145 while isinstance(a, AlphabetEncoder) :
146 a = a.alphabet
147 assert isinstance(a, Alphabet), \
148 "Invalid alphabet found, %s" % repr(a)
149 return a
150
152 """Returns a common but often generic base alphabet object (PRIVATE).
153
154 This throws away any AlphabetEncoder information, e.g. Gapped alphabets.
155
156 Note that DNA+RNA -> Nucleotide, and Nucleotide+Protein-> generic single
157 letter. These DO NOT raise an exception!"""
158 common = None
159 for alpha in alphabets :
160 a = _get_base_alphabet(alpha)
161 if common is None :
162 common = a
163 elif common == a :
164 pass
165 elif isinstance(a, common.__class__) :
166 pass
167 elif isinstance(common, a.__class__) :
168 common = a
169 elif isinstance(a, NucleotideAlphabet) \
170 and isinstance(common, NucleotideAlphabet) :
171
172 common = generic_nucleotide
173 elif isinstance(a, SingleLetterAlphabet) \
174 and isinstance(common, SingleLetterAlphabet) :
175
176 common = single_letter_alphabet
177 else :
178
179 return generic_alphabet
180 if common is None :
181
182 return generic_alphabet
183 return common
184
186 """Returns a common but often generic alphabet object (PRIVATE).
187
188 Note that DNA+RNA -> Nucleotide, and Nucleotide+Protein-> generic single
189 letter. These DO NOT raise an exception!
190
191 This is aware of Gapped and HasStopCodon and new letters added by
192 other AlphabetEncoders. This WILL raise an exception if more than
193 one gap character or stop symbol is present."""
194 base = _consensus_base_alphabet(alphabets)
195 gap = None
196 stop = None
197 new_letters = ""
198 for alpha in alphabets :
199
200 if not hasattr(alpha, "gap_char") :
201 pass
202 elif gap is None :
203 gap = alpha.gap_char
204 elif gap == alpha.gap_char :
205 pass
206 else :
207 raise ValueError("More than one gap character present")
208
209 if not hasattr(alpha, "stop_symbol") :
210 pass
211 elif stop is None :
212 stop = alpha.stop_symbol
213 elif stop == alpha.stop_symbol :
214 pass
215 else :
216 raise ValueError("More than one stop symbol present")
217
218 if hasattr(alpha, "new_letters") :
219 for letter in alpha.new_letters :
220 if letter not in new_letters \
221 and letter != gap and letter != stop :
222 new_letters += letter
223
224 alpha = base
225 if new_letters :
226 alpha = AlphabetEncoder(alpha, new_letters)
227 if gap :
228 alpha = Gapped(alpha, gap_char=gap)
229 if stop :
230 alpha = HasStopCodon(alpha, stop_symbol=stop)
231 return alpha
232
234 """Returns True except for DNA+RNA or Nucleotide+Protein (PRIVATE).
235
236 This relies on the Alphabet subclassing hierarchy. It does not
237 check things like gap characters or stop symbols."""
238 dna, rna, nucl, protein = False, False, False, False
239 for alpha in alphabets :
240 a = _get_base_alphabet(alpha)
241 if isinstance(a, DNAAlphabet) :
242 dna = True
243 nucl = True
244 if rna or protein : return False
245 elif isinstance(a, RNAAlphabet) :
246 rna = True
247 nucl = True
248 if dna or protein : return False
249 elif isinstance(a, NucleotideAlphabet) :
250 nucl = True
251 if protein : return False
252 elif isinstance(a, ProteinAlphabet) :
253 protein = True
254 if nucl : return False
255 return True
256