Package Bio :: Package Affy :: Module CelFile
[hide private]
[frames] | no frames]

Source Code for Module Bio.Affy.CelFile

  1  # Copyright 2004 by Harry Zuzan.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  Classes for accessing the information in Affymetrix cel files. 
  8   
  9  Functions: 
 10  read      Read a cel file and store its contents in a Record 
 11   
 12  Classes: 
 13  Record    Contains the information from a cel file 
 14   
 15   
 16  The following classes are obsolete: 
 17   
 18  class CelParser: parses cel files 
 19  class CelRecord: stores the information from a cel file 
 20   
 21  """ 
 22   
 23  import numpy 
 24   
25 -class Record:
26 """ 27 Stores the information in a cel file 28 """
29 - def __init__(self):
30 self.intensities = None 31 self.stdevs = None 32 self.npix = None 33 self.nrows = None 34 self.ncols = None
35 36
37 -def read(handle):
38 """ 39 Read the information in a cel file, and store it in a Record. 40 """ 41 # Needs error handling. 42 # Needs to know the chip design. 43 record = Record() 44 section = "" 45 for line in handle: 46 if not line.strip(): 47 continue 48 if line[:8]=="[HEADER]": 49 section = "HEADER" 50 elif line[:11]=="[INTENSITY]": 51 section = "INTENSITY" 52 record.intensities = numpy.zeros((record.nrows, record.ncols)) 53 record.stdevs = numpy.zeros((record.nrows, record.ncols)) 54 record.npix = numpy.zeros((record.nrows, record.ncols), int) 55 elif line[0]=="[": 56 section = "" 57 elif section=="HEADER": 58 keyword, value = line.split("=", 1) 59 if keyword=="Cols": 60 record.ncols = int(value) 61 elif keyword=="Rows": 62 record.nrows = int(value) 63 elif section=="INTENSITY": 64 if "=" in line: 65 continue 66 words = line.split() 67 y, x = map(int, words[:2]) 68 record.intensities[x,y] = float(words[2]) 69 record.stdevs[x,y] = float(words[3]) 70 record.npix[x,y] = int(words[4]) 71 return record
72 73 74 # Everything below is considered obsolete 75 76 from Bio.ParserSupport import AbstractConsumer 77 from numpy import * 78
79 -class CelScanner:
80 """Scannner for Affymetrix CEL files. 81 82 Methods: 83 feed Feed data into the scanner. 84 85 The scanner generates (and calls the consumer) the following 86 types of events: 87 88 Rows - the number of rows on the microarray 89 Cols - the number of columns on the microarray 90 StartIntensity - generated when the section [INTENSITY] is found 91 ReadIntensity - one line in the section [INTENSITY] 92 93 """
94 - def feed(self, handle, consumer):
95 """scanner.feed(handle, consumer) 96 97 Feed in a handle to a Cel file for scanning. handle is a file-like 98 object that contains the Cel file. consumer is a Consumer 99 object that will receive events as the report is scanned. 100 """ 101 section = "" 102 for line in handle: 103 if line.strip()=="": continue 104 if line[0]=="[": 105 section = "" 106 if line[:8]=="[HEADER]": 107 section = "HEADER" 108 elif line[:11]=="[INTENSITY]": 109 section = "INTENSITY" 110 consumer.StartIntensity() 111 continue 112 if section=="HEADER": 113 keyword, value = line.split("=", 1) 114 if keyword=="Cols": consumer.Cols(value) 115 if keyword=="Rows": consumer.Rows(value) 116 continue 117 elif section=="INTENSITY": 118 if "=" in line: continue 119 consumer.ReadIntensity(line)
120 121
122 -class CelConsumer(AbstractConsumer):
123
124 - def __init__(self):
125 self._mean = None 126 self._stdev = None 127 self._npix = None
128
129 - def Cols(self, value):
130 self._cols = int(value)
131
132 - def Rows(self, value):
133 self._rows = int(value)
134
135 - def StartIntensity(self):
136 self._mean = zeros((self._rows, self._cols)) 137 self._stdev = zeros((self._rows, self._cols)) 138 self._npix = zeros((self._rows, self._cols), int)
139
140 - def ReadIntensity(self, line):
141 y, x, mean, stdev, npix = map(float, line.split()) 142 x = int(x) 143 y = int(y) 144 self._mean[x,y] = mean 145 self._stdev[x,y] = stdev 146 self._npix[x,y] = int(npix)
147
148 -class CelRecord:
149 """ 150 Stores the information in a cel file 151 152 Needs error handling. 153 Needs to know the chip design. 154 """ 155 156
157 - def __init__(self, data_dict):
158 """ 159 Pass the data attributes as a dictionary. 160 """ 161 from copy import deepcopy as dcopy 162 163 self._intensities = dcopy(data_dict['intensities']) 164 self._stdevs = dcopy(data_dict['stdevs']) 165 self._npix = dcopy(data_dict['npix']) 166 167 self._nrows, self._ncols = self._intensities.shape
168 169
170 - def intensities(self):
171 """ 172 Return a two dimensional array of probe cell intensities. 173 Dimension 1 -> rows 174 Dimension 2 -> columns 175 """ 176 return self._intensities
177 178
179 - def stdevs(self):
180 """ 181 Return a two dimensional array of probe cell standard deviations. 182 Dimension 1 -> rows 183 Dimension 2 -> columns 184 """ 185 return self._stdevs
186 187
188 - def npix(self):
189 """ 190 Return a two dimensional array of the number of pixels in a probe cell. 191 Dimension 1 -> rows 192 Dimension 2 -> columns 193 """ 194 return self._npix
195 196
197 - def nrows(self):
198 """ 199 The number of rows of probe cells in an array. 200 """ 201 return self._nrows
202
203 - def ncols(self):
204 """ 205 The number of columns of probe cells in an array. 206 """ 207 return self._ncols
208
209 - def size(self):
210 """ 211 The size of the probe cell array as a tuple (nrows,ncols). 212 """ 213 return self._nrows, self._ncols
214 215 216
217 -class CelParser:
218 """ 219 Takes a handle to an Affymetrix cel file, parses the file and 220 returns an instance of a CelRecord 221 222 This class needs error handling. 223 """ 224
225 - def __init__(self, handle=None):
226 """ 227 Usually load the class with the cel file (not file name) as 228 an argument. 229 """ 230 231 self._intensities = None 232 self._stdevs = None 233 self._npix = None 234 235 if handle is not None: self.parse(handle)
236 237
238 - def parse(self, handle):
239 """ 240 Takes a handle to a cel file, parses it 241 and stores it in the three arrays. 242 243 There is more information in the cel file that could be retrieved 244 and stored in CelRecord. The chip type should be a priority. 245 """ 246 247 # (self._intensities, self._stdevs, self._npix) = _cel.parse(data) 248 scanner = CelScanner() 249 consumer = CelConsumer() 250 scanner.feed(handle, consumer) 251 self._intensities = consumer._mean 252 self._stdevs = consumer._stdev 253 self._npix = consumer._npix 254 self._nrows = self._intensities.shape[0] 255 self._ncols = self._intensities.shape[1]
256 257
258 - def __call__(self):
259 """ 260 Returns the parsed data as a CelRecord. 261 """ 262 263 record_dict = {} 264 record_dict['intensities'] = self._intensities 265 record_dict['stdevs'] = self._stdevs 266 record_dict['npix'] = self._npix 267 268 return CelRecord(record_dict)
269