Package Bio :: Package Compass
[hide private]
[frames] | no frames]

Source Code for Package Bio.Compass

  1  # Copyright 2004 by James Casbon.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  Code to deal with COMPASS output, a program for profile/profile comparison. 
  8   
  9  Compass is described in: 
 10   
 11  Sadreyev R, Grishin N. COMPASS: a tool for comparison of multiple protein 
 12  alignments with assessment of statistical significance. J Mol Biol. 2003 Feb 
 13  7;326(1):317-36. 
 14   
 15  Tested with COMPASS 1.24. 
 16   
 17  Classes: 
 18  Record        One result of a compass file 
 19  _Scanner      Scan compass results 
 20  _Consumer     Consume scanner events 
 21  RecordParser  Parse one compass record 
 22  Iterator      Iterate through a number of compass records 
 23  """ 
 24  from Bio import File 
 25  from Bio.ParserSupport import * 
 26  import re,string 
 27   
28 -class Record:
29 """ 30 Hold information from one compass hit. 31 Ali1 one is the query, Ali2 the hit. 32 """ 33
34 - def __init__(self):
35 self.query='' 36 self.hit='' 37 self.gap_threshold=0 38 self.query_length=0 39 self.query_filtered_length=0 40 self.query_nseqs=0 41 self.query_neffseqs=0 42 self.hit_length=0 43 self.hit_filtered_length=0 44 self.hit_nseqs=0 45 self.hit_neffseqs=0 46 self.sw_score=0 47 self.evalue=-1 48 self.query_start=-1 49 self.hit_start=-1 50 self.query_aln='' 51 self.hit_aln='' 52 self.positives=''
53 54
55 - def query_coverage(self):
56 """Return the length of the query covered in alignment""" 57 s = string.replace(self.query_aln, "=", "") 58 return len(s)
59
60 - def hit_coverage(self):
61 """Return the length of the hit covered in the alignment""" 62 s = string.replace(self.hit_aln, "=", "") 63 return len(s)
64
65 -class _Scanner:
66 """Reads compass output and generate events""" 67
68 - def feed(self, handle, consumer):
69 """Feed in COMPASS ouput""" 70 71 if isinstance(handle, File.UndoHandle): 72 pass 73 else: 74 handle = File.UndoHandle(handle) 75 76 77 assert isinstance(handle, File.UndoHandle), \ 78 "handle must be an UndoHandle" 79 if handle.peekline(): 80 self._scan_record(handle, consumer)
81 82
83 - def _scan_record(self,handle,consumer):
84 self._scan_names(handle, consumer) 85 self._scan_threshold(handle, consumer) 86 self._scan_lengths(handle,consumer) 87 self._scan_profilewidth(handle, consumer) 88 self._scan_scores(handle,consumer) 89 self._scan_alignment(handle,consumer)
90
91 - def _scan_names(self,handle,consumer):
92 """ 93 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 94 """ 95 read_and_call(handle, consumer.names, contains="Ali1:")
96
97 - def _scan_threshold(self,handle, consumer):
98 """ 99 Threshold of effective gap content in columns: 0.5 100 """ 101 read_and_call(handle, consumer.threshold, start="Threshold")
102
103 - def _scan_lengths(self,handle, consumer):
104 """ 105 length1=388 filtered_length1=386 length2=145 filtered_length2=137 106 """ 107 read_and_call(handle, consumer.lengths, start="length1=")
108
109 - def _scan_profilewidth(self,handle,consumer):
110 """ 111 Nseqs1=399 Neff1=12.972 Nseqs2=1 Neff2=6.099 112 """ 113 read_and_call(handle, consumer.profilewidth, contains="Nseqs1")
114
115 - def _scan_scores(self,handle, consumer):
116 """ 117 Smith-Waterman score = 37 Evalue = 5.75e+02 118 """ 119 read_and_call(handle, consumer.scores, start="Smith-Waterman")
120
121 - def _scan_alignment(self,handle, consumer):
122 """ 123 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 124 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 125 QUERY 2 LSDRLELVSASEIRKLFDIAAGMKDVISLGIGEPDFDTPQHIKEYAKEALDKGLTHYGPN 126 127 128 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 129 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 130 QUERY IGLLELREAIAEKLKKQNGIEADPKTEIMVLLGANQAFLMGLSAFLKDGEEVLIPTPAFV 131 132 """ 133 while 1: 134 line = handle.readline() 135 if not line: 136 break 137 if is_blank_line(line): 138 continue 139 else: 140 consumer.query_alignment(line) 141 read_and_call(handle, consumer.positive_alignment) 142 read_and_call(handle, consumer.hit_alignment)
143
144 -class _Consumer:
145 # all regular expressions used -- compile only once 146 _re_names = re.compile("Ali1:\s+(\S+)\s+Ali2:\s+(\S+)\s+") 147 _re_threshold = \ 148 re.compile("Threshold of effective gap content in columns: (\S+)") 149 _re_lengths = \ 150 re.compile("length1=(\S+)\s+filtered_length1=(\S+)\s+length2=(\S+)" 151 + "\s+filtered_length2=(\S+)") 152 _re_profilewidth = \ 153 re.compile("Nseqs1=(\S+)\s+Neff1=(\S+)\s+Nseqs2=(\S+)\s+Neff2=(\S+)") 154 _re_scores = re.compile("Smith-Waterman score = (\S+)\s+Evalue = (\S+)") 155 _re_start = re.compile("(\d+)") 156 _re_align = re.compile("^.{15}(\S+)") 157 _re_positive_alignment = re.compile("^.{15}(.+)") 158
159 - def __init__(self):
160 self.data = None
161
162 - def names(self, line):
163 """ 164 Ali1: 60456.blo.gz.aln Ali2: allscop//14984.blo.gz.aln 165 ------query----- -------hit------------- 166 """ 167 self.data = Record() 168 m = self.__class__._re_names.search(line) 169 self.data.query = m.group(1) 170 self.data.hit = m.group(2)
171
172 - def threshold(self,line):
173 m = self.__class__._re_threshold.search(line) 174 self.data.gap_threshold = float(m.group(1))
175
176 - def lengths(self,line):
177 m = self.__class__._re_lengths.search(line) 178 self.data.query_length = int(m.group(1)) 179 self.data.query_filtered_length = float(m.group(2)) 180 self.data.hit_length = int(m.group(3)) 181 self.data.hit_filtered_length = float(m.group(4))
182
183 - def profilewidth(self,line):
184 m = self.__class__._re_profilewidth.search(line) 185 self.data.query_nseqs = int(m.group(1)) 186 self.data.query_neffseqs = float(m.group(2)) 187 self.data.hit_nseqs = int(m.group(3)) 188 self.data.hit_neffseqs = float(m.group(4))
189
190 - def scores(self, line):
191 m = self.__class__._re_scores.search(line) 192 if m: 193 self.data.sw_score = int(m.group(1)) 194 self.data.evalue = float(m.group(2)) 195 else: 196 self.data.sw_score = 0 197 self.data.evalue = -1.0
198
199 - def query_alignment(self, line):
200 m = self.__class__._re_start.search(line) 201 if m: 202 self.data.query_start = int(m.group(1)) 203 m = self.__class__._re_align.match(line) 204 assert m!=None, "invalid match" 205 self.data.query_aln = self.data.query_aln + m.group(1)
206
207 - def positive_alignment(self,line):
208 m = self.__class__._re_positive_alignment.match(line) 209 assert m!=None, "invalid match" 210 self.data.positives = self.data.positives + m.group(1)
211
212 - def hit_alignment(self,line):
213 m = self.__class__._re_start.search(line) 214 if m: 215 self.data.hit_start = int(m.group(1)) 216 m = self.__class__._re_align.match(line) 217 assert m!=None, "invalid match" 218 self.data.hit_aln = self.data.hit_aln + m.group(1)
219
220 -class RecordParser(AbstractParser):
221 """Parses compass results into a Record object. 222 """
223 - def __init__(self):
224 self._scanner = _Scanner() 225 self._consumer = _Consumer()
226 227
228 - def parse(self, handle):
229 if isinstance(handle, File.UndoHandle): 230 uhandle = handle 231 else: 232 uhandle = File.UndoHandle(handle) 233 self._scanner.feed(uhandle, self._consumer) 234 return self._consumer.data
235
236 -class Iterator:
237 """Iterate through a file of compass results"""
238 - def __init__(self, handle):
239 self._uhandle = File.UndoHandle(handle) 240 self._parser = RecordParser()
241
242 - def next(self):
243 lines = [] 244 while 1: 245 line = self._uhandle.readline() 246 if not line: 247 break 248 if line[0:4] == "Ali1" and lines: 249 self._uhandle.saveline(line) 250 break 251 252 lines.append(line) 253 254 255 if not lines: 256 return None 257 258 data = string.join(lines, '') 259 return self._parser.parse(File.StringHandle(data))
260