1
2
3
4
5
6
7
8
9
10
11
12
13
14 __version__ = "$Revision: 1.12 $"
15
16 import commands
17 import itertools
18 import os
19 import re
20
21 from Bio import Wise
22
23 _SCORE_MATCH = 4
24 _SCORE_MISMATCH = -1
25 _SCORE_GAP_START = -5
26 _SCORE_GAP_EXTENSION = -1
27
28 _CMDLINE_DNAL = ["dnal", "-alb", "-nopretty"]
29
38
39 _CMDLINE_FGREP_COUNT = "fgrep -c '%s' %s"
42
43 _re_alb_line2coords = re.compile(r"^\[([^:]+):[^\[]+\[([^:]+):")
48
50 alb = file(filename)
51
52 start_line = None
53 end_line = None
54
55 for line in alb:
56 if line.startswith("["):
57 if not start_line:
58 start_line = line
59 else:
60 end_line = line
61
62 if end_line is None:
63 return [(0, 0), (0, 0)]
64
65 return zip(*map(_alb_line2coords, [start_line, end_line]))
66
67 -def _any(seq, pred=bool):
68 "Returns True if pred(x) is True at least one element in the iterable"
69 return True in itertools.imap(pred, seq)
70
72 """
73 Calculate statistics from an ALB report
74 """
75 - def __init__(self, filename, match, mismatch, gap, extension):
76 self.matches = _fgrep_count('"SEQUENCE" %s' % match, filename)
77 self.mismatches = _fgrep_count('"SEQUENCE" %s' % mismatch, filename)
78 self.gaps = _fgrep_count('"INSERT" %s' % gap, filename)
79
80 if gap == extension:
81 self.extensions = 0
82 else:
83 self.extensions = _fgrep_count('"INSERT" %s' % extension, filename)
84
85 self.score = (match*self.matches +
86 mismatch*self.mismatches +
87 gap*self.gaps +
88 extension*self.extensions)
89
90 if _any([self.matches, self.mismatches, self.gaps, self.extensions]):
91 self.coords = _get_coords(filename)
92 else:
93 self.coords = [(0, 0), (0,0)]
94
96 return self.matches/(self.matches+self.mismatches)
97
98 header = "identity_fraction\tmatches\tmismatches\tgaps\textensions"
99
101 return "\t".join([str(x) for x in (self.identity_fraction(), self.matches, self.mismatches, self.gaps, self.extensions)])
102
104 cmdline = _build_dnal_cmdline(match, mismatch, gap, extension)
105 temp_file = Wise.align(cmdline, pair, **keywds)
106 try:
107 return Statistics(temp_file.name, match, mismatch, gap, extension)
108 except AttributeError:
109 try:
110 keywds['dry_run']
111 return None
112 except KeyError:
113 raise
114
116 import sys
117 stats = align(sys.argv[1:3])
118 print "\n".join(["%s: %s" % (attr, getattr(stats, attr))
119 for attr in
120 ("matches", "mismatches", "gaps", "extensions")])
121 print "identity_fraction: %s" % stats.identity_fraction()
122 print "coords: %s" % stats.coords
123
124 -def _test(*args, **keywds):
125 import doctest, sys
126 doctest.testmod(sys.modules[__name__], *args, **keywds)
127
128 if __name__ == "__main__":
129 if __debug__:
130 _test()
131 main()
132