Package Bio :: Package NeuralNetwork :: Package Gene :: Module Pattern
[hide private]
[frames] | no frames]

Source Code for Module Bio.NeuralNetwork.Gene.Pattern

  1  """Generic functionality useful for all gene representations. 
  2   
  3  This module contains classes which can be used for all the different 
  4  types of patterns available for representing gene information (ie. motifs, 
  5  signatures and schemas). These are the general classes which should be 
  6  handle any of the different specific patterns. 
  7  """ 
  8  # standard library 
  9  import random 
 10   
 11  # biopython 
 12  from Bio import utils 
 13  from Bio.Seq import Seq, MutableSeq 
 14   
15 -class PatternIO:
16 """Allow reading and writing of patterns to files. 17 18 This just defines a simple persistance class for patterns, making 19 it easy to write them to a file and read 'em back. 20 """
21 - def __init__(self, alphabet = None):
22 """Intialize the reader and writer class. 23 24 Arguments: 25 26 o alphabet - An optional argument specifying the alphabet 27 which patterns should follow. If an alphabet is set it'll be used 28 to verify that all patterns follow it. 29 30 Attributes: 31 o separator - A character to use in separating items in a signature 32 when it is written to a file and read back. This character should 33 not be in the possible alphabet of the sequences, or there will 34 be trouble. 35 """ 36 self._alphabet = alphabet 37 38 self.separator = ";"
39
40 - def write(self, pattern_list, output_handle):
41 """Write a list of patterns to the given handle. 42 """ 43 for pattern in pattern_list: 44 # deal with signatures, concatentate them with the separator 45 if (type(pattern) == type([]) or 46 type(pattern) == type(tuple([]))): 47 string_pattern = self.separator.join(pattern) 48 # deal with the normal cases 49 else: 50 string_pattern = pattern 51 52 output_handle.write("%s\n" % string_pattern)
53
54 - def write_seq(self, seq_pattern_list, output_handle):
55 """Convenience function to write Seq objects to a file. 56 57 This can take Seqs and MutableSeqs, and write them to a file 58 as strings. 59 """ 60 # convert the seq patterns into just string patterns 61 all_patterns = [] 62 63 for seq_pattern in seq_pattern_list: 64 if isinstance(seq_pattern, MutableSeq): 65 seq = seq_pattern.toseq() 66 all_patterns.append(seq.data) 67 elif isinstance(seq_pattern, Seq): 68 all_patterns.append(seq_pattern.data) 69 else: 70 raise ValueError("Unexpected pattern type %r" % seq_pattern) 71 72 self.write(all_patterns, output_handle)
73
74 - def read(self, input_handle):
75 """Read patterns from the specified handle. 76 """ 77 all_patterns = [] 78 79 while 1: 80 cur_line = input_handle.readline() 81 82 if not(cur_line): 83 break 84 85 cur_pattern = cur_line.rstrip() 86 # split up signatures 87 if cur_pattern.find(self.separator) >= 0: 88 cur_pattern = tuple(cur_pattern.split(self.separator)) 89 90 if self._alphabet is not None: 91 # make single patterns (not signatures) into lists, so we 92 # can check signatures and single patterns the same 93 if type(cur_pattern) != type(tuple([])): 94 test_pattern = [cur_pattern] 95 else: 96 test_pattern = cur_pattern 97 for pattern_item in test_pattern: 98 pattern_seq = Seq(pattern_item, self._alphabet) 99 if not(utils.verify_alphabet(pattern_seq)): 100 raise ValueError("Pattern %s not matching alphabet %s" 101 % (cur_pattern, self._alphabet)) 102 103 all_patterns.append(cur_pattern) 104 105 return all_patterns
106
107 -class PatternRepository:
108 """This holds a list of specific patterns found in sequences. 109 110 This is designed to be a general holder for a set of patterns and 111 should be subclassed for specific implementations (ie. holding Motifs 112 or Signatures. 113 """
114 - def __init__(self, pattern_info):
115 """Initialize a repository with patterns, 116 117 Arguments: 118 119 o pattern_info - A representation of all of the patterns found in 120 a *Finder search. This should be a dictionary, where the keys 121 are patterns, and the values are the number of times a pattern is 122 found. 123 124 The patterns are represented interally as a list of two 125 tuples, where the first element is the number of times a pattern 126 occurs, and the second is the pattern itself. This makes it easy 127 to sort the list and return the top N patterns. 128 """ 129 self._pattern_dict = pattern_info 130 131 # create the list representation 132 self._pattern_list = [] 133 for pattern_name in self._pattern_dict.keys(): 134 self._pattern_list.append((self._pattern_dict[pattern_name], 135 pattern_name)) 136 137 self._pattern_list.sort() 138 self._pattern_list.reverse()
139
140 - def get_all(self):
141 """Retrieve all of the patterns in the repository. 142 """ 143 patterns = [] 144 for pattern_info in self._pattern_list: 145 patterns.append(pattern_info[1]) 146 147 return patterns
148
149 - def get_random(self, num_patterns):
150 """Retrieve the specified number of patterns randomly. 151 152 Randomly selects patterns from the list and returns them. 153 154 Arguments: 155 156 o num_patterns - The total number of patterns to return. 157 """ 158 all_patterns = [] 159 160 while len(all_patterns) < num_patterns: 161 # pick a pattern, and only add it if it is not already present 162 new_pattern_info = random.choice(self._pattern_list) 163 164 if new_pattern_info[1] not in all_patterns: 165 all_patterns.append(new_pattern_info[1]) 166 167 return all_patterns
168
169 - def get_top_percentage(self, percent):
170 """Return a percentage of the patterns. 171 172 This returns the top 'percent' percentage of the patterns in the 173 repository. 174 """ 175 all_patterns = self.get_all() 176 177 num_to_return = int(len(all_patterns) * percent) 178 179 return all_patterns[:num_to_return]
180
181 - def get_top(self, num_patterns):
182 """Return the specified number of most frequently occurring patterns 183 184 Arguments: 185 186 o num_patterns - The number of patterns to return. 187 """ 188 all_patterns = [] 189 for pattern_info in self._pattern_list[:num_patterns]: 190 all_patterns.append(pattern_info[1]) 191 192 return all_patterns
193
194 - def get_differing(self, top_num, bottom_num):
195 """Retrieve patterns that are at the extreme ranges. 196 197 This returns both patterns at the top of the list (ie. the same as 198 returned by get_top) and at the bottom of the list. This 199 is especially useful for patterns that are the differences between 200 two sets of patterns. 201 202 Arguments: 203 204 o top_num - The number of patterns to take from the top of the list. 205 206 o bottom_num - The number of patterns to take from the bottom of 207 the list. 208 """ 209 all_patterns = [] 210 # first get from the top of the list 211 for pattern_info in self._pattern_list[:top_num]: 212 all_patterns.append(pattern_info[1]) 213 214 # then from the bottom 215 for pattern_info in self._pattern_list[-bottom_num:]: 216 all_patterns.append(pattern_info[1]) 217 218 return all_patterns
219
220 - def remove_polyA(self, at_percentage = .9):
221 """Remove patterns which are likely due to polyA tails from the lists. 222 223 This is just a helper function to remove pattenrs which are likely 224 just due to polyA tails, and thus are not really great motifs. 225 This will also get rid of stuff like ATATAT, which might be a 226 useful motif, so use at your own discretion. 227 228 XXX Could we write a more general function, based on info content 229 or something like that? 230 231 Arguments: 232 233 o at_percentage - The percentage of A and T residues in a pattern 234 that qualifies it for being removed. 235 """ 236 remove_list = [] 237 # find all of the really AT rich patterns 238 for pattern_info in self._pattern_list: 239 pattern_at = float(pattern_info[1].count('A') + pattern_info[1].count('T')) / len(pattern_info[1]) 240 if pattern_at > at_percentage: 241 remove_list.append(pattern_info) 242 243 # now remove them from the master list 244 for to_remove in remove_list: 245 self._pattern_list.remove(to_remove)
246
247 - def count(self, pattern):
248 """Return the number of times the specified pattern is found. 249 """ 250 try: 251 return self._pattern_dict[pattern] 252 except KeyError: 253 return 0
254