Package Bio :: Package SubsMat :: Module FreqTable
[hide private]
[frames] | no frames]

Source Code for Module Bio.SubsMat.FreqTable

 1  import string 
 2  from Bio import Alphabet 
 3  COUNT = 1 
 4  FREQ = 2 
 5  ################################################################## 
 6  # A class to handle frequency tables 
 7  # Copyright Iddo Friedberg idoerg@cc.huji.ac.il 
 8  # Biopython (http://biopython.org) license applies 
 9  # Methods to read a letter frequency or a letter count file: 
10  # Example files for a DNA alphabet: 
11  # 
12  # A count file (whitespace seperated): 
13  # 
14  # A  50 
15  # C  37 
16  # G  23 
17  # T  58 
18  # 
19  # The same info as a frequency file: 
20  # 
21  # A 0.2976 
22  # C 0.2202 
23  # G 0.1369 
24  # T 0.3452 
25  #  
26  # Functions: 
27  #   read_count(f): read a count file from stream f. Then convert to 
28  #   frequencies 
29  #   read_freq(f): read a frequency data file from stream f. Of course, we then 
30  #   don't have the counts, but it is usually the letter frquencies which are 
31  #   interesting. 
32  # 
33  # Methods: 
34  #   (all internal) 
35  # Attributes: 
36  #   alphabet: The IUPAC alphabet set (or any other) whose letters you are 
37  #   using. Common sets are: IUPAC.protein (20-letter protein), 
38  #   IUPAC.unambiguous_dna (4-letter DNA). See Bio/alphabet for more. 
39  #   data: frequency dictionary. 
40  #   count: count dictionary. Empty if no counts are provided. 
41  # 
42  # Example of use: 
43  #   >>> from SubsMat import FreqTable 
44  #   >>> ftab = FreqTable.FreqTable(my_frequency_dictionary,FreqTable.FREQ) 
45  #   >>> ftab = FreqTable.FreqTable(my_count_dictionary,FreqTable.COUNT) 
46  #   >>> ftab = FreqTable.read_count(open('myDNACountFile')) 
47  # 
48  #   
49  ################################################################## 
50 -class FreqTable(dict):
51
52 - def _freq_from_count(self):
53 sum = 0. 54 for i in self.count.values(): 55 sum = sum + i 56 for i in self.count.keys(): 57 self[i] = self.count[i] / sum
58
59 - def _alphabet_from_input(self):
60 s = '' 61 letters_list = self.keys() 62 letters_list.sort() 63 for i in letters_list: 64 s = s + i 65 return s
66
67 - def __init__(self,in_dict,dict_type,alphabet=None):
68 self.alphabet = alphabet 69 if dict_type == COUNT: 70 self.count = in_dict 71 self._freq_from_count() 72 elif dict_type == FREQ: 73 self.count = {} 74 self.update(in_dict) 75 else: 76 raise ValueError,"bad dict_type" 77 if not alphabet: 78 self.alphabet = Alphabet.Alphabet() 79 self.alphabet.letters = self._alphabet_from_input()
80
81 -def read_count(f):
82 l = map(string.split,map(string.strip,f.readlines())) 83 count = {} 84 for i in l: 85 count[i[0]] = int(i[1]) 86 freq_table = FreqTable(count,COUNT) 87 return freq_table
88
89 -def read_freq(f):
90 freq_dict = {} 91 l = map(string.split,map(string.strip,f.readlines())) 92 for i in l: 93 freq_dict[i[0]] = float(i[1]) 94 return FreqTable(freq_dict,FREQ)
95