1
2
3
4
5
6 from Bio import Seq
7 from Bio.Alphabet import IUPAC
8 from math import sqrt
9 import sys
10
12 """A generic motif class.
13
14 A Motif has information about the alphabet used in the motif,
15 the length of the motif, the motif name, the number of occurrences,
16 and the individual instances of the motif. Each of the instances
17 is an object of the Instance class.
18
19 Methods:
20 add_instance(instance): adds a new instance of the motif.
21 get_instance_by_name(name): returns an instance with the specified name.
22 """
24 self.instances = []
25 self.alphabet = None
26 self.length = 0
27 self.num_occurrences = 0
28 self.name = ""
29 self.consensus = ""
30 self.pssm = []
31
39
41 for i in set.instances:
42 if i.seqname == name:
43 return i
44 return None
45
51
58
61
67
69 if type(number) == int:
70 self.num_occurrences = number
71 else:
72 number = int(number)
73 self.num_occurrences = number
74
76 if self.alphabet == None:
77 raise ValueError, "Alphabet for motif has not been set"
78 moieties = ''
79 if self.alphabet == IUPAC.unambiguous_dna:
80 moieties = 'ACGT'
81 if self.alphabet == IUPAC.protein:
82 moieties = 'ACDEFGHIKLMNPQRSTVWY'
83 pssm = []
84 if self.instances and self.instances[0].sequence:
85 for position in self.instances[0].sequence:
86 pos = []
87 for m in moieties:
88 pos.append(0.0)
89 pssm.append(pos)
90 for instance in self.instances:
91 for position in range(self.length):
92 my_moiety = instance.sequence[position]
93 try:
94 moiety_index = moieties.index(my_moiety)
95 except ValueError:
96 moiety_index = 0
97 pssm[position][moiety_index] += 1.0
98 pssm = [[x/len(self.instances) for x in y] for y in pssm ]
99 pssm = [tuple(x) for x in pssm]
100 self.pssm = pssm
101 else:
102 self.pssm = None
103
105 if not self.pssm:
106 self.make_pssm()
107 consensus = ''
108 null_character = 'N'
109 moieties = 'ACGT'
110 if self.alphabet == IUPAC.protein:
111 null_character = 'X'
112 moieties = 'ACDEFGHIKLMNPQRSTVWY'
113 for position in self.pssm:
114 this_position = null_character
115 vals = zip(position,moieties)
116 good_values = filter(lambda x: x[0] >= minimum_frequency, vals)
117 if good_values:
118 letters = [str(x[1]) for x in good_values]
119 my_letter = '/'.join(letters)
120 else:
121 my_letter = null_character
122 consensus += my_letter
123 self.consensus = consensus
124
126 """A subclass of Motif used in parsing MEME (and MAST) output.
127
128 This sublcass defines functions and data specific to MEME motifs.
129 This includes the evalue for a motif and the PSSM of the motif.
130
131 Methods:
132 add_instance_from_values (name = 'default', pvalue = 1, sequence = 'ATA', start = 0, strand = +): create a new instance of the motif with the specified values.
133 add_to_pssm (position): add a new position to the pssm. The position should be a list of nucleotide/amino acid frequencies
134 add_to_logodds (position): add a new position to the log odds matrix. The position should be a tuple of log odds values for the nucleotide/amino acid at that position.
135 compare_motifs (other_motif): returns the maximum correlation between this motif and other_motif
136 """
138 Motif.__init__(self)
139 self.evalue = 0.0
140 self.pssm = []
141 self.logodds = []
142
155
157 if type(evalue) == float:
158 self.evalue = evalue
159 else:
160 evalue = float(evalue)
161 self.evalue = evalue
162
164 self.pssm.append(thisposition)
165
167 self.logodds.append(thisposition)
168
170 if isinstance(motif, MEMEMotif):
171 if not self.pssm:
172 raise ValueError, 'This motif does not have a PSSM'
173 if not motif.pssm:
174 raise ValueError, 'The other motif does not have a PSSM'
175 mylen = len(self.pssm)
176 yourlen = len(motif.pssm)
177 myr = None
178 if mylen == yourlen:
179 myr = _corr(self.pssm, motif.pssm)
180 elif mylen < yourlen:
181 diff = yourlen - mylen
182 for i in range(0,diff):
183 yourpssm = motif.pssm[i:mylen+i]
184 r = _corr(self.pssm, yourpssm)
185 if r > myr:
186 myr = r
187 else:
188 diff = mylen - yourlen
189 for i in range(0, diff):
190 mypssm = self.pssm[i:yourlen+i]
191 r = _corr(mypssm, motif.pssm)
192 if r > myr:
193 myr = r
194 return myr
195 else:
196 sys.stderr.write(str(m2))
197 return None
198
199
200
202 """A class describing the instances of a motif, and the data thereof.
203 """
212
215
218
221
225
227 pval = float(pval)
228 self.pvalue = pval
229
233
236
239
240
242 sx = 0
243 sy = 0
244 sxx = 0
245 syy = 0
246 sxy = 0
247
248 if type(x[0] == list):
249 x = reduce(lambda a,b: a+b, x)
250 if type(y[0] == list):
251 y = reduce(lambda a,b: a+b, y)
252 sq = lambda t: t*t
253 length = len(x)
254 for a,b in zip(x,y):
255 sx += a
256 sy += b
257 sxx += sq(a)
258 syy += sq(b)
259 sxy += a*b
260 s1 = sxy - sx * sy * 1.0/length
261 s2 = (sxx - sx * sy * 1.0/length) * (syy - sx * sy * 1.0/length)
262 r = s1 / sqrt (s2)
263 return r
264