1
2
3
4
5 """
6 Implementation of sequence motifs.
7
8 Changes:
9 9.2007 (BW) : added the to_faste() and .weblogo() methods allowing to use the Berkeley weblogo server at http://weblogo.berkeley.edu/
10 """
11
12 from Bio.SubsMat import FreqTable
13
15 """
16 A class representing sequence motifs.
17 """
19 self.instances = []
20 self.score = 0.0
21 self.mask = []
22 self._pwm_is_current = 0
23 self._pwm = []
24 self.alphabet=None
25 self.length=None
26
28 if self.length==None:
29 self.length = len
30 elif self.length != len:
31 raise ValueError("You can't change the length of the motif")
32
38
47
49 """
50 sets the mask for the motif
51
52 The mask should be a string containing asterisks in the position of significant columns and spaces in other columns
53 """
54 self._check_length(len(mask))
55 self.mask=[]
56 for char in mask:
57 if char=="*":
58 self.mask.append(1)
59 elif char==" ":
60 self.mask.append(0)
61 else:
62 raise ValueError("Mask should contain only '*' or ' ' and not a '%s'"%char)
63
65 """
66 returns the PWM computed for the set of instances
67 """
68
69 if self._pwm_is_current:
70 return self._pwm
71
72 self._pwm = []
73 for i in xrange(len(self.mask)):
74 dict = {}
75
76 for letter in self.alphabet.letters:
77 dict[letter]=0
78
79 for seq in self.instances:
80 dict[seq[i]]=dict[seq[i]]+1
81 self._pwm.append(FreqTable.FreqTable(dict,FreqTable.COUNT,self.alphabet))
82 self._pwm_is_current=True
83 return self._pwm
84
94
95 - def score_hit(self,sequence,position,normalized=1,masked=0):
109
110 - def search_pwm(self,sequence,threshold=0.0,normalized=1,masked=1):
119
120
121 - def sim(self, motif, masked = 0):
122 """
123 return the similarity score for the given motif against self.
124
125 We use the Pearson's correlation of the respective probabilities.
126 If the motifs have different length or mask raise the ValueError.
127 """
128
129 from math import sqrt
130
131 if self.alphabet != motif.alphabet:
132 raise ValueError("Wrong alphabet")
133
134 if self.length != motif.length:
135 raise ValueError("Wrong length")
136
137 if masked and self.mask!=motif.mask:
138 raise ValueError("Wrong mask")
139
140 sxx = 0
141 sxy = 0
142 sx = 0
143 sy = 0
144 syy = 0
145
146 for pos in xrange(self.length):
147 if not masked or self.mask:
148 for l in self.alphabet.letters:
149 xi = self.pwm()[pos][l]
150 yi = motif.pwm()[pos][l]
151 sx = sx + xi
152 sy = sy + yi
153 sxx = sxx + xi * xi
154 syy = syy + yi * yi
155 sxy = sxy + xi * yi
156
157 if masked:
158 norm = len(filter(lambda x: x,self.mask))
159 else:
160 norm = self.length
161
162 norm *= len(self.alphabet.letters)
163 s1 = (sxy - sx*sy*1.0/norm)
164 s2 = (sxx - sx*sx*1.0/norm)*(syy- sy*sy*1.0/norm)
165
166 return s1/sqrt(s2)
167
168 - def read(self,stream):
169 """
170 reads the motif from the stream
171
172 the self.alphabet variable must be set before
173 """
174 from Bio.Seq import Seq
175 while 1:
176 ln = stream.readline()
177 if "*" in ln:
178 self.set_mask(ln.strip("\n\c"))
179 break
180 self.add_instance(Seq(ln.strip(),self.alphabet))
181
183 """
184 string representation of motif
185 """
186 str = ""
187 for inst in self.instances:
188 str = str + inst.tostring() + "\n"
189
190 for i in xrange(self.length):
191 if self.mask[i]:
192 str = str + "*"
193 else:
194 str = str + " "
195 str = str + "\n"
196
197 return str
198
200 """
201 writes the motif to the stream
202 """
203
204 stream.write(self.__str__())
205
206
207
209 """
210 FASTA representation of motif
211 """
212 str = ""
213 for i,inst in enumerate(self.instances):
214 str = str + "> instance %d\n"%i + inst.tostring() + "\n"
215
216 return str
217
218 - def weblogo(self,fname,format="PNG",**kwds):
219 """
220 uses the Berkeley weblogo service to download and save a weblogo of itself
221
222 requires an internet connection.
223 The parameters from **kwds are passed directly to the weblogo server.
224 """
225 import urllib
226 import urllib2
227
228 al= self.to_fasta()
229
230 url = 'http://weblogo.berkeley.edu/logo.cgi'
231 values = {'sequence' : al,
232 'format' : format,
233 'logowidth' : '18',
234 'logoheight' : '5',
235 'logounits' : 'cm',
236 'kind' : 'AUTO',
237 'firstnum' : "1",
238 'command' : 'Create Logo',
239 'smallsamplecorrection' : "on",
240 'symbolsperline' : 32,
241 'res' : '96',
242 'res_units' : 'ppi',
243 'antialias' : 'on',
244 'title' : '',
245 'barbits' : '',
246 'xaxis': 'on',
247 'xaxis_label' : '',
248 'yaxis': 'on',
249 'yaxis_label' : '',
250 'showends' : 'on',
251 'shrink' : '0.5',
252 'fineprint' : 'on',
253 'ticbits' : '1',
254 'colorscheme' : 'DEFAULT',
255 'color1' : 'green',
256 'color2' : 'blue',
257 'color3' : 'red',
258 'color4' : 'black',
259 'color5' : 'purple',
260 'color6' : 'orange',
261 'color1' : 'black',
262 }
263 for k,v in kwds.items():
264 values[k]=str(v)
265
266 data = urllib.urlencode(values)
267 req = urllib2.Request(url, data)
268 response = urllib2.urlopen(req)
269 f=open(fname,"w")
270 im=response.read()
271
272 f.write(im)
273 f.close()
274