Package Bio :: Package PopGen :: Package GenePop
[hide private]
[frames] | no frames]

Source Code for Package Bio.PopGen.GenePop

  1  # Copyright 2007 by Tiago Antao.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with GenePop. 
  8   
  9  See http://wbiomed.curtin.edu.au/genepop/ , the format is documented 
 10  here: http://wbiomed.curtin.edu.au/genepop/help_input.html . 
 11   
 12  Classes: 
 13  Record           Holds GenePop data. 
 14  RecordParser     Parses a GenePop record (file) into a Record object. 
 15   
 16  _Scanner         Scans a GenePop record. 
 17  _RecordConsumer  Consumes GenePop data to a Record object. 
 18   
 19  Partially inspired on MedLine Code. 
 20   
 21  """ 
 22  from copy import deepcopy 
 23  from types import * 
 24   
 25  from Bio import File 
 26  from Bio.ParserSupport import * 
 27   
 28   
29 -class Record:
30 """Holds information from a GenePop record. 31 32 Members: 33 marker_len The marker length (2 or 3 digit code per allele). 34 35 comment_line Comment line. 36 37 loci_list List of loci names. 38 39 pop_list List of population names. 40 41 populations List of population data. 42 43 In most genepop files, the population name is not trustable. 44 It is strongly recommended that populations are referred by index. 45 46 populations has one element per population. Each element is itself 47 a list of individuals, each individual is a pair composed by individual 48 name and a list of alleles (2 per marker): Example 49 [ 50 [ 51 ('Ind1', [(1,2), (3,3), (200,201)], 52 ('Ind2', [(2,None), (3,3), (None,None)], 53 ], 54 [ 55 ('Other1', [(1,1), (4,3), (200,200)], 56 ] 57 ] 58 59 60 """
61 - def __init__(self):
62 self.marker_len = 0 63 self.comment_line = "" 64 self.loci_list = [] 65 self.pop_list = [] 66 self.populations = []
67
68 - def __str__(self):
69 """Returns (reconstructs) a GenePop textual representation. 70 """ 71 rep = [self.comment_line + '\n'] 72 rep.append('\n'.join(self.loci_list) + '\n') 73 for pop in self.populations: 74 rep.append('Pop\n') 75 for indiv in pop: 76 name, markers = indiv 77 rep.append(name) 78 rep.append(',') 79 for marker in markers: 80 rep.append(' ') 81 for al in marker: 82 if al == None: 83 al = '0' 84 aStr = str(al) 85 while len(aStr)<self.marker_len: 86 aStr = "".join(['0', aStr]) 87 rep.append(aStr) 88 rep.append('\n') 89 return "".join(rep)
90
91 - def split_in_pops(self, pop_names):
92 """Splits a GP record in a dictionary with 1 pop per entry. 93 94 Given a record with n pops and m loci returns a dictionary 95 of records (key pop_name) where each item is a record 96 with a single pop and m loci. 97 98 Parameters: 99 pop_names - Population names 100 """ 101 gp_pops = {} 102 for i in range(len(self.populations)): 103 gp_pop = GenePop.Record() 104 gp_pop.marker_len = self.marker_len 105 gp_pop.comment_line = self.comment_line 106 gp_pop.loci_list = deepcopy(self.loci_list) 107 gp_pop.populations = [deepcopy(self.populations[i])] 108 gp_pops[pop_names[i]] = gp_pop 109 return gp_pops
110
111 - def split_in_loci(self, gp):
112 """Splits a GP record in a dictionary with 1 locus per entry. 113 114 Given a record with n pops and m loci returns a dictionary 115 of records (key locus name) where each item is a record 116 with a single locus and n pops. 117 """ 118 gp_loci = {} 119 for i in range(len(self.loci_list)): 120 gp_pop = GenePop.Record() 121 gp_pop.marker_len = self.marker_len 122 gp_pop.comment_line = self.comment_line 123 gp_pop.loci_list = [self.loci_list[i]] 124 gp_pop.populations = [] 125 for pop in self.populations: 126 my_pop = [] 127 for indiv in pop: 128 my_pop.append((indiv[0], [indiv[1][i]])) 129 gp_pop.populations.append(my_pop) 130 gp_loci[gp_pop.loci_list[0]] = gp_pop 131 return gp_loci
132 133
134 - def remove_population(self, pos):
135 """Removes a population (by position). 136 """ 137 del self.populations[pos]
138
139 - def remove_locus_by_position(self, pos):
140 """Removes a locus by position. 141 """ 142 del self.loci_list[pos] 143 for pop in self.populations: 144 for indiv in pop: 145 name, loci = indiv 146 del loci[pos]
147
148 - def remove_locus_by_name(self, name):
149 """Removes a locus by name. 150 """ 151 for i in range(len(self.loci_list)): 152 if self.loci_list[i] == name: 153 self.remove_locus_by_position(i) 154 return
155 #If here than locus not existent... Maybe raise exception? 156 # Although it should be Ok... Just a boolean return, maybe? 157 158
159 -class RecordParser(AbstractParser):
160 """Parses GenePop data into a Record object. 161 162 """
163 - def __init__(self):
164 self._scanner = _Scanner() 165 self._consumer = _RecordConsumer()
166
167 - def parse(self, handle):
168 self._scanner.feed(handle, self._consumer) 169 return self._consumer.data
170
171 -def parse(handle):
172 """Parses a handle containing a GenePop file. 173 """ 174 parser = RecordParser() 175 return parser.parse(handle)
176
177 -class _Scanner:
178 """Scans a GenePop record. 179 180 There is only one record per file. 181 182 """ 183
184 - def feed(self, handle, consumer):
185 """feed(self, handle, consumer) 186 187 Feed in a GenePop unit record for scanning. handle is a file-like 188 object that contains a Genepop record. consumer is a 189 Consumer object that will receive events as the report is scanned. 190 191 """ 192 if isinstance(handle, File.UndoHandle): 193 uhandle = handle 194 else: 195 uhandle = File.UndoHandle(handle) 196 197 198 consumer.start_record() 199 200 comment_line = uhandle.readline().rstrip() 201 consumer.comment(comment_line) 202 203 #We can now have one loci per line or all loci in a single line 204 #seperated by either space or comma+space... 205 #We will remove all commas on loci... that should not be a problem 206 sample_loci_line = uhandle.readline().rstrip().replace(',', '') 207 all_loci = sample_loci_line.split(' ') 208 if len(all_loci)>1: #This is all loci in one line 209 for locus in all_loci: 210 consumer.loci_name(locus) 211 else: 212 consumer.loci_name(sample_loci_line) 213 next_line = uhandle.readline().rstrip() 214 while next_line.upper()!='POP': 215 if next_line == '': 216 raise ValueError('No population data found, file probably not GenePop related') 217 consumer.loci_name(next_line) 218 next_line = uhandle.readline().rstrip() 219 consumer.start_pop() 220 first_individual = True 221 line = uhandle.readline().rstrip() 222 while line!='': 223 if line.upper()=='POP': 224 consumer.start_pop() 225 else: 226 (indiv_name, marker_line) = line.split(',') 227 markers = marker_line.replace('\t', ' ').split(' ') 228 for i in range(len(markers), 0, -1): 229 if markers[i-1] == '': 230 del(markers[i-1]) 231 if first_individual: 232 first_individual = False 233 if len(markers[0]) == 4: #2 digits per allele 234 marker_len = 2 235 else: 236 marker_len = 3 237 consumer.marker_len(marker_len) 238 allele_list = [] 239 for marker in markers: 240 allele_list.append(( 241 int(marker[0:marker_len]), 242 int(marker[marker_len:]) 243 )) 244 consumer.individual(indiv_name, allele_list) 245 line = uhandle.readline().rstrip() 246 consumer.end_record()
247
248 -class _RecordConsumer(AbstractConsumer):
249 """Consumer that converts a GenePop record to a Record object. 250 251 Members: 252 data Record with GenePop data. 253 254 """
255 - def __init__(self):
256 self.data = None
257
258 - def start_record(self):
259 self.data = Record()
260
261 - def end_record(self):
262 pops = self.data.populations 263 loci = self.data.loci_list 264 for pop_i in range(len(pops)): 265 self.data.pop_list.append(pops[pop_i][-1][0]) 266 for indiv_i in range(len(pops[pop_i])): 267 for mk_i in range(len(loci)): 268 mk_orig = pops[pop_i][indiv_i][1][mk_i] 269 mk_real = [] 270 for al in mk_orig: 271 if al == 0: 272 mk_real.append(None) 273 else: 274 mk_real.append(al) 275 pops[pop_i][indiv_i][1][mk_i] = tuple(mk_real)
276
277 - def comment(self, comment_line):
278 self.data.comment_line = comment_line
279
280 - def loci_name(self, locus):
281 self.data.loci_list.append(locus)
282
283 - def marker_len(self, marker_len):
285
286 - def start_pop(self):
287 self.current_pop = [] 288 self.data.populations.append(self.current_pop)
289
290 - def individual(self, indiv_name, allele_list):
291 self.current_pop.append((indiv_name, allele_list))
292