1
2
3
4
5 """
6 Parser for PHD files output by PHRED and used by PHRAP and CONSED.
7
8 Works fine with PHRED 0.020425.c
9
10 Version 1.1, 03/09/2004
11 written by Cymon J. Cox (cymon@duke.edu) and Frank Kauff (fkauff@duke.edu)
12 Comments, bugs, problems, suggestions to one uf us are welcome!
13
14 Uses the Biopython Parser interface for parsing: ParserSupport.py
15
16 """
17
18 import os
19 from Bio import File
20 from Bio import Seq
21 from Bio.ParserSupport import *
22 from Bio.Alphabet import IUPAC
23
24 CKEYWORDS=['CHROMAT_FILE','ABI_THUMBPRINT','PHRED_VERSION','CALL_METHOD',\
25 'QUALITY_LEVELS','TIME','TRACE_ARRAY_MIN_INDEX','TRACE_ARRAY_MAX_INDEX',\
26 'TRIM','TRACE_PEAK_AREA_RATIO','CHEM','DYE']
27
29 """Hold information from a PHD file."""
31 self.file_name = ''
32 self.comments={}
33 for kw in CKEYWORDS:
34 self.comments[kw.lower()]=None
35 self.sites = []
36 self.seq = ''
37 self.seq_trimmed = ''
38
39
41 """Iterates over a file of multiple PHD records.
42
43 Methods:
44 next Return the next record from the stream, or None.
45 """
46
47 - def __init__(self, handle, parser=None):
48 """__init__(self, handle, parser=None)
49
50 Create a new iterator. handle is a file-like object. parser
51 is an optional Parser object to change the results into another form.
52 If set to None, then the raw contents of the file will be returned.
53 """
54 self._uhandle = File.UndoHandle(handle)
55 self._parser = parser
56
58 """next(self) -> object
59
60 Return the next PHD record from the file. If no more records
61 return None.
62 """
63
64 lines = []
65 while 1:
66 line = self._uhandle.readline()
67 if not line:
68 break
69
70 if lines and line[:14] == 'BEGIN_SEQUENCE':
71 self._uhandle.saveline(line)
72 break
73 lines.append(line)
74
75 if not lines:
76 return None
77
78 data = ''.join(lines)
79 if self._parser is not None:
80 return self._parser.parse(File.StringHandle(data))
81 return data
82
84 """Iterate over the PHY file record bt record."""
85 return iter(self.next, None)
86
88 """Parses PHD file data into a Record object."""
92
100
101
103 """Scans a PHD-formatted file.
104
105 Methods:
106 feed - Feed one PHD record.
107 """
108 - def feed(self, handle, consumer):
109 """Reads in PDH data from the handle for scanning.
110
111 Feed in PHD data for scanning. handle is a file-like object
112 containing PHD data. consumer is a Consumer object that will
113 receive events as the PHD data is scanned.
114 """
115 assert isinstance(handle, File.UndoHandle), \
116 "handle must be an UndoHandle"
117 if handle.peekline():
118 self._scan_record(handle, consumer)
119
125
128
144
146 while 1:
147 line = uhandle.readline()
148 if is_blank_line(line) or line == 'BEGIN_DNA\n':
149 continue
150 elif line == 'END_DNA\n':
151 break
152 consumer.read_dna(line)
153
154
156 """Consumer that converts a PHD record to a Record object."""
159
163
170
173
176
179
182
185
186 - def time(self, line):
188
191
194
195 - def trim(self, line):
196 first, last, prob = line[5:-1].split()
197 self.data.comments['trim'] = (int(first), int(last), float(prob))
198
201
202 - def chem(self, line):
204
205 - def dye(self, line):
207
211
212 if __name__ == "__main__" :
213 print "Quick self test"
214
215 handle = open("../../Tests/Phd/phd1")
216 recordparser = RecordParser()
217 iterator = Iterator(handle,recordparser)
218 for record in iterator :
219 print record.file_name, len(record.seq)
220 handle.close()
221 print "Done"
222