1
2
3
4
5
6 """
7 Classes for accessing the information in Affymetrix cel files.
8
9 Functions:
10 read Read a cel file and store its contents in a Record
11
12 Classes:
13 Record Contains the information from a cel file
14
15
16 The following classes are obsolete:
17
18 class CelParser: parses cel files
19 class CelRecord: stores the information from a cel file
20
21 """
22
23 import numpy
24
26 """
27 Stores the information in a cel file
28 """
35
36
38 """
39 Read the information in a cel file, and store it in a Record.
40 """
41
42
43 record = Record()
44 section = ""
45 for line in handle:
46 if not line.strip():
47 continue
48 if line[:8]=="[HEADER]":
49 section = "HEADER"
50 elif line[:11]=="[INTENSITY]":
51 section = "INTENSITY"
52 record.intensities = numpy.zeros((record.nrows, record.ncols))
53 record.stdevs = numpy.zeros((record.nrows, record.ncols))
54 record.npix = numpy.zeros((record.nrows, record.ncols), int)
55 elif line[0]=="[":
56 section = ""
57 elif section=="HEADER":
58 keyword, value = line.split("=", 1)
59 if keyword=="Cols":
60 record.ncols = int(value)
61 elif keyword=="Rows":
62 record.nrows = int(value)
63 elif section=="INTENSITY":
64 if "=" in line:
65 continue
66 words = line.split()
67 y, x = map(int, words[:2])
68 record.intensities[x,y] = float(words[2])
69 record.stdevs[x,y] = float(words[3])
70 record.npix[x,y] = int(words[4])
71 return record
72
73
74
75
76 from Bio.ParserSupport import AbstractConsumer
77 from numpy import *
78
80 """Scannner for Affymetrix CEL files.
81
82 Methods:
83 feed Feed data into the scanner.
84
85 The scanner generates (and calls the consumer) the following
86 types of events:
87
88 Rows - the number of rows on the microarray
89 Cols - the number of columns on the microarray
90 StartIntensity - generated when the section [INTENSITY] is found
91 ReadIntensity - one line in the section [INTENSITY]
92
93 """
94 - def feed(self, handle, consumer):
95 """scanner.feed(handle, consumer)
96
97 Feed in a handle to a Cel file for scanning. handle is a file-like
98 object that contains the Cel file. consumer is a Consumer
99 object that will receive events as the report is scanned.
100 """
101 section = ""
102 for line in handle:
103 if line.strip()=="": continue
104 if line[0]=="[":
105 section = ""
106 if line[:8]=="[HEADER]":
107 section = "HEADER"
108 elif line[:11]=="[INTENSITY]":
109 section = "INTENSITY"
110 consumer.StartIntensity()
111 continue
112 if section=="HEADER":
113 keyword, value = line.split("=", 1)
114 if keyword=="Cols": consumer.Cols(value)
115 if keyword=="Rows": consumer.Rows(value)
116 continue
117 elif section=="INTENSITY":
118 if "=" in line: continue
119 consumer.ReadIntensity(line)
120
121
123
125 self._mean = None
126 self._stdev = None
127 self._npix = None
128
129 - def Cols(self, value):
130 self._cols = int(value)
131
132 - def Rows(self, value):
133 self._rows = int(value)
134
136 self._mean = zeros((self._rows, self._cols))
137 self._stdev = zeros((self._rows, self._cols))
138 self._npix = zeros((self._rows, self._cols), int)
139
147
149 """
150 Stores the information in a cel file
151
152 Needs error handling.
153 Needs to know the chip design.
154 """
155
156
158 """
159 Pass the data attributes as a dictionary.
160 """
161 from copy import deepcopy as dcopy
162
163 self._intensities = dcopy(data_dict['intensities'])
164 self._stdevs = dcopy(data_dict['stdevs'])
165 self._npix = dcopy(data_dict['npix'])
166
167 self._nrows, self._ncols = self._intensities.shape
168
169
171 """
172 Return a two dimensional array of probe cell intensities.
173 Dimension 1 -> rows
174 Dimension 2 -> columns
175 """
176 return self._intensities
177
178
180 """
181 Return a two dimensional array of probe cell standard deviations.
182 Dimension 1 -> rows
183 Dimension 2 -> columns
184 """
185 return self._stdevs
186
187
189 """
190 Return a two dimensional array of the number of pixels in a probe cell.
191 Dimension 1 -> rows
192 Dimension 2 -> columns
193 """
194 return self._npix
195
196
198 """
199 The number of rows of probe cells in an array.
200 """
201 return self._nrows
202
204 """
205 The number of columns of probe cells in an array.
206 """
207 return self._ncols
208
210 """
211 The size of the probe cell array as a tuple (nrows,ncols).
212 """
213 return self._nrows, self._ncols
214
215
216
218 """
219 Takes a handle to an Affymetrix cel file, parses the file and
220 returns an instance of a CelRecord
221
222 This class needs error handling.
223 """
224
226 """
227 Usually load the class with the cel file (not file name) as
228 an argument.
229 """
230
231 self._intensities = None
232 self._stdevs = None
233 self._npix = None
234
235 if handle is not None: self.parse(handle)
236
237
238 - def parse(self, handle):
239 """
240 Takes a handle to a cel file, parses it
241 and stores it in the three arrays.
242
243 There is more information in the cel file that could be retrieved
244 and stored in CelRecord. The chip type should be a priority.
245 """
246
247
248 scanner = CelScanner()
249 consumer = CelConsumer()
250 scanner.feed(handle, consumer)
251 self._intensities = consumer._mean
252 self._stdevs = consumer._stdev
253 self._npix = consumer._npix
254 self._nrows = self._intensities.shape[0]
255 self._ncols = self._intensities.shape[1]
256
257
259 """
260 Returns the parsed data as a CelRecord.
261 """
262
263 record_dict = {}
264 record_dict['intensities'] = self._intensities
265 record_dict['stdevs'] = self._stdevs
266 record_dict['npix'] = self._npix
267
268 return CelRecord(record_dict)
269