Package Bio :: Package Wise :: Module dnal
[hide private]
[frames] | no frames]

Source Code for Module Bio.Wise.dnal

  1  #!/usr/bin/env python 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5  # 
  6  # Bio.Wise contains modules for running and processing the output of 
  7  # some of the models in the Wise2 package by Ewan Birney available from: 
  8  # ftp://ftp.ebi.ac.uk/pub/software/unix/wise2/ 
  9  # http://www.ebi.ac.uk/Wise2/ 
 10  #  
 11  # Bio.Wise.psw is for protein Smith-Waterman alignments 
 12  # Bio.Wise.dnal is for Smith-Waterman DNA alignments 
 13   
 14  __version__ = "$Revision: 1.12 $" 
 15   
 16  import commands 
 17  import itertools 
 18  import os 
 19  import re 
 20   
 21  from Bio import Wise 
 22   
 23  _SCORE_MATCH = 4 
 24  _SCORE_MISMATCH = -1 
 25  _SCORE_GAP_START = -5 
 26  _SCORE_GAP_EXTENSION = -1 
 27   
 28  _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"] 
 29   
30 -def _build_dnal_cmdline(match, mismatch, gap, extension):
31 res = _CMDLINE_DNAL[:] 32 res.extend(["-match", str(match)]) 33 res.extend(["-mis", str(mismatch)]) 34 res.extend(["-gap", str(-gap)]) # negative: convert score to penalty 35 res.extend(["-ext", str(-extension)]) # negative: convert score to penalty 36 37 return res
38 39 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
40 -def _fgrep_count(pattern, file):
41 return int(commands.getoutput(_CMDLINE_FGREP_COUNT % (pattern, file)))
42 43 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
44 -def _alb_line2coords(line):
45 return tuple([int(coord)+1 # one-based -> zero-based 46 for coord 47 in _re_alb_line2coords.match(line).groups()])
48
49 -def _get_coords(filename):
50 alb = file(filename) 51 52 start_line = None 53 end_line = None 54 55 for line in alb: 56 if line.startswith("["): 57 if not start_line: 58 start_line = line # rstrip not needed 59 else: 60 end_line = line 61 62 if end_line is None: # sequence is too short 63 return [(0, 0), (0, 0)] 64 65 return zip(*map(_alb_line2coords, [start_line, end_line])) # returns [(start0, end0), (start1, end1)]
66
67 -def _any(seq, pred=bool):
68 "Returns True if pred(x) is True at least one element in the iterable" 69 return True in itertools.imap(pred, seq)
70
71 -class Statistics(object):
72 """ 73 Calculate statistics from an ALB report 74 """
75 - def __init__(self, filename, match, mismatch, gap, extension):
76 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename) 77 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename) 78 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename) 79 80 if gap == extension: 81 self.extensions = 0 82 else: 83 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename) 84 85 self.score = (match*self.matches + 86 mismatch*self.mismatches + 87 gap*self.gaps + 88 extension*self.extensions) 89 90 if _any([self.matches, self.mismatches, self.gaps, self.extensions]): 91 self.coords = _get_coords(filename) 92 else: 93 self.coords = [(0, 0), (0,0)]
94
95 - def identity_fraction(self):
96 return self.matches/(self.matches+self.mismatches)
97 98 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions" 99
100 - def __str__(self):
101 return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
102
103 -def align(pair, match=_SCORE_MATCH, mismatch=_SCORE_MISMATCH, gap=_SCORE_GAP_START, extension=_SCORE_GAP_EXTENSION, **keywds):
104 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension) 105 temp_file = Wise.align(cmdline, pair, **keywds) 106 try: 107 return Statistics(temp_file.name, match, mismatch, gap, extension) 108 except AttributeError: 109 try: 110 keywds['dry_run'] 111 return None 112 except KeyError: 113 raise
114
115 -def main():
116 import sys 117 stats = align(sys.argv[1:3]) 118 print "\n".join(["%s: %s" % (attr, getattr(stats, attr)) 119 for attr in 120 ("matches", "mismatches", "gaps", "extensions")]) 121 print "identity_fraction: %s" % stats.identity_fraction() 122 print "coords: %s" % stats.coords
123
124 -def _test(*args, **keywds):
125 import doctest, sys 126 doctest.testmod(sys.modules[__name__], *args, **keywds)
127 128 if __name__ == "__main__": 129 if __debug__: 130 _test() 131 main() 132