Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """ Notes about the diverses class of the restriction enzyme implementation. 
  12   
  13          RestrictionType is the type of all restriction enzymes. 
  14      ---------------------------------------------------------------------------- 
  15          AbstractCut implements some methods that are common to all enzymes. 
  16      ---------------------------------------------------------------------------- 
  17          NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  18                                  produced by the enzyme. 
  19                                  they correspond to the 4th field of the rebase 
  20                                  record emboss_e.NNN. 
  21                  0->NoCut    : the enzyme is not characterised. 
  22                  2->OneCut   : the enzyme produce one double strand cut. 
  23                  4->TwoCuts  : two double strand cuts. 
  24      ---------------------------------------------------------------------------- 
  25          Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  26                                  the enzyme. 
  27                                  Not implemented yet. 
  28      ---------------------------------------------------------------------------- 
  29          Palindromic,            if the site is palindromic or not. 
  30          NotPalindromic          allow some optimisations of the code. 
  31                                  No need to check the reverse strand 
  32                                  with palindromic sites. 
  33      ----------------------------------------------------------------------------                                     
  34          Unknown, Blunt,         represent the overhang. 
  35          Ov5, Ov3                Unknown is here for symetry reasons and 
  36                                  correspond to enzymes that are not characterised 
  37                                  in rebase. 
  38      ---------------------------------------------------------------------------- 
  39          Defined, Ambiguous,     represent the sequence of the overhang. 
  40          NotDefined              
  41                                  NotDefined is for enzymes not characterised in 
  42                                  rebase. 
  43                                   
  44                                  Defined correspond to enzymes that display a 
  45                                  constant overhang whatever the sequence. 
  46                                  ex : EcoRI. G^AATTC -> overhang :AATT 
  47                                              CTTAA^G 
  48   
  49                                  Ambiguous : the overhang varies with the 
  50                                  sequence restricted. 
  51                                  Typically enzymes which cut outside their 
  52                                  restriction site or (but not always) 
  53                                  inside an ambiguous site. 
  54                                  ex : 
  55                                  AcuI CTGAAG(22/20)  -> overhang : NN 
  56                                  AasI GACNNN^NNNGTC  -> overhang : NN 
  57                                       CTGN^NNNNNCAG 
  58   
  59              note : these 3 classes refers to the overhang not the site. 
  60                 So the enzyme ApoI (RAATTY) is defined even if its restriction 
  61                 site is ambiguous. 
  62                                   
  63                      ApoI R^AATTY -> overhang : AATT -> Defined 
  64                           YTTAA^R 
  65                 Accordingly, blunt enzymes are always Defined even 
  66                 when they cut outside their restriction site. 
  67      ---------------------------------------------------------------------------- 
  68          Not_available,          as found in rebase file emboss_r.NNN files. 
  69          Commercially_available 
  70                                  allow the selection of the enzymes according to 
  71                                  their suppliers to reduce the quantity 
  72                                  of results. 
  73                                  Also will allow the implementation of buffer 
  74                                  compatibility tables. Not implemented yet. 
  75   
  76                                  the list of suppliers is extracted from 
  77                                  emboss_s.NNN 
  78      ---------------------------------------------------------------------------- 
  79          """ 
  80   
  81  import re 
  82  import itertools 
  83   
  84  from Bio.Seq import Seq, MutableSeq 
  85  from Bio.Alphabet import IUPAC 
  86   
  87  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict,\ 
  88       typedict, suppliers as suppliers_dict 
  89  from Bio.Restriction.RanaConfig import * 
  90  from Bio.Restriction.PrintFormat import PrintFormat 
  91  from Bio.Restriction.DNAUtils import check_bases 
  92   
  93   
  94   
  95  matching = {'A' : 'ARWMHVDN', 'C' : 'CYSMHBVN', 'G' : 'GRSKBVDN', 
  96              'T' : 'TYWKHBDN', 'R' : 'ABDGHKMNSRWV', 'Y' : 'CBDHKMNSTWVY', 
  97              'W' : 'ABDHKMNRTWVY', 'S' : 'CBDGHKMNSRVY', 'M' : 'ACBDHMNSRWVY', 
  98              'K' : 'BDGHKNSRTWVY', 'H' : 'ACBDHKMNSRTWVY', 
  99              'B' : 'CBDGHKMNSRTWVY', 'V' : 'ACBDGHKMNSRWVY', 
 100              'D' : 'ABDGHKMNSRTWVY', 'N' : 'ACBDGHKMNSRTWVY'} 
 101   
 102  DNA = Seq 
 103       
104 -class FormattedSeq(object) :
105 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 106 107 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 108 109 Roughly : 110 remove anything which is not IUPAC alphabet and then add a space 111 in front of the sequence to get a biological index instead of a 112 python index (i.e. index of the first base is 1 not 0). 113 114 Retains information about the shape of the molecule linear (default) 115 or circular. Restriction sites are search over the edges of circular 116 sequence.""" 117
118 - def __init__(self, seq, linear = True) :
119 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 120 121 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 122 if seq is a FormattedSeq, linear will have no effect on the 123 shape of the sequence.""" 124 if isinstance(seq, Seq) or isinstance(seq, MutableSeq) : 125 stringy = seq.tostring() 126 self.lower = stringy.islower() 127 self.data = check_bases(stringy) 128 self.linear = linear 129 self.klass = seq.__class__ 130 self.alphabet = seq.alphabet 131 elif isinstance(seq, FormattedSeq) : 132 self.lower = seq.lower 133 self.data = seq.data 134 self.linear = seq.linear 135 self.alphabet = seq.alphabet 136 self.klass = seq.klass 137 else : 138 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
139
140 - def __len__(self) :
141 return len(self.data) - 1
142
143 - def __repr__(self) :
144 return 'FormattedSeq(%s, linear=%s)' %(repr(self[1:]), repr(self.linear))
145
146 - def __eq__(self, other) :
147 if isinstance(other, FormattedSeq) : 148 if repr(self) == repr(other) : 149 return True 150 else : 151 return False 152 return False
153
154 - def circularise(self) :
155 """FS.circularise() -> circularise FS""" 156 self.linear = False 157 return
158
159 - def linearise(self) :
160 """FS.linearise() -> linearise FS""" 161 self.linear = True 162 return
163
164 - def to_linear(self) :
165 """FS.to_linear() -> new linear FS instance""" 166 new = self.__class__(self) 167 new.linear = True 168 return new
169
170 - def to_circular(self) :
171 """FS.to_circular() -> new circular FS instance""" 172 new = self.__class__(self) 173 new.linear = False 174 return new
175
176 - def is_linear(self) :
177 """FS.is_linear() -> bool. 178 179 True if the sequence will analysed as a linear sequence.""" 180 return self.linear
181
182 - def finditer(self, pattern, size) :
183 """FS.finditer(pattern, size) -> list. 184 185 return a list of pattern into the sequence. 186 the list is made of tuple (location, pattern.group). 187 the latter is used with non palindromic sites. 188 pattern is the regular expression pattern corresponding to the 189 enzyme restriction site. 190 size is the size of the restriction enzyme recognition-site size.""" 191 if self.is_linear() : 192 data = self.data 193 else : 194 data = self.data + self.data[1:size] 195 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
196
197 - def __getitem__(self, i) :
198 if self.lower : 199 return self.klass((self.data[i]).lower(), self.alphabet) 200 return self.klass(self.data[i], self.alphabet)
201 202
203 -class RestrictionType(type) :
204 """RestrictionType. Type from which derives all enzyme classes. 205 206 Implement the operator methods.""" 207
208 - def __init__(cls, name='', bases=(), dct={}) :
209 """RE(name, bases, dct) -> RestrictionType instance. 210 211 Not intended to be used in normal operation. The enzymes are 212 instantiated when importing the module. 213 214 see below.""" 215 super(RestrictionType, cls).__init__(cls, name, bases, dct) 216 cls.compsite = re.compile(cls.compsite)
217
218 - def __add__(cls, other) :
219 """RE.__add__(other) -> RestrictionBatch(). 220 221 if other is an enzyme returns a batch of the two enzymes. 222 if other is already a RestrictionBatch add enzyme to it.""" 223 if isinstance(other, RestrictionType) : 224 return RestrictionBatch([cls, other]) 225 elif isinstance(other, RestrictionBatch) : 226 return other.add_nocheck(cls) 227 else : 228 raise TypeError
229
230 - def __div__(cls, other) :
231 """RE.__div__(other) -> list. 232 233 RE/other 234 returns RE.search(other).""" 235 return cls.search(other)
236
237 - def __rdiv__(cls, other) :
238 """RE.__rdiv__(other) -> list. 239 240 other/RE 241 returns RE.search(other).""" 242 return cls.search(other)
243
244 - def __truediv__(cls, other) :
245 """RE.__truediv__(other) -> list. 246 247 RE/other 248 returns RE.search(other).""" 249 return cls.search(other)
250
251 - def __rtruediv__(cls, other) :
252 """RE.__rtruediv__(other) -> list. 253 254 other/RE 255 returns RE.search(other).""" 256 return cls.search(other)
257
258 - def __floordiv__(cls, other) :
259 """RE.__floordiv__(other) -> list. 260 261 RE//other 262 returns RE.catalyse(other).""" 263 return cls.catalyse(other)
264
265 - def __rfloordiv__(cls, other) :
266 """RE.__rfloordiv__(other) -> list. 267 268 other//RE 269 returns RE.catalyse(other).""" 270 return cls.catalyse(other)
271
272 - def __str__(cls) :
273 """RE.__str__() -> str. 274 275 return the name of the enzyme.""" 276 return cls.__name__
277
278 - def __repr__(cls) :
279 """RE.__repr__() -> str. 280 281 used with eval or exec will instantiate the enzyme.""" 282 return "%s" % cls.__name__
283
284 - def __len__(cls) :
285 """RE.__len__() -> int. 286 287 length of the recognition site.""" 288 return cls.size
289
290 - def __eq__(cls, other) :
291 """RE == other -> bool 292 293 True if RE and other are the same enzyme.""" 294 return other is cls
295
296 - def __ne__(cls, other) :
297 """RE != other -> bool. 298 isoschizomer strict, same recognition site, same restriction -> False 299 all the other-> True""" 300 if not isinstance(other, RestrictionType) : 301 return True 302 elif cls.charac == other.charac : 303 return False 304 else : 305 return True
306
307 - def __rshift__(cls, other) :
308 """RE >> other -> bool. 309 310 neoschizomer : same recognition site, different restriction. -> True 311 all the others : -> False""" 312 if not isinstance(other, RestrictionType) : 313 return False 314 elif cls.site == other.site and cls.charac != other.charac : 315 return True 316 else : 317 return False
318
319 - def __mod__(cls, other) :
320 """a % b -> bool. 321 322 Test compatibility of the overhang of a and b. 323 True if a and b have compatible overhang.""" 324 if not isinstance(other, RestrictionType) : 325 raise TypeError( \ 326 'expected RestrictionType, got %s instead' % type(other)) 327 return cls._mod1(other)
328
329 - def __ge__(cls, other) :
330 """a >= b -> bool. 331 332 a is greater or equal than b if the a site is longer than b site. 333 if their site have the same length sort by alphabetical order of their 334 names.""" 335 if not isinstance(other, RestrictionType) : 336 raise NotImplementedError 337 if len(cls) > len(other) : 338 return True 339 elif cls.size == len(other) and cls.__name__ >= other.__name__ : 340 return True 341 else : 342 return False
343
344 - def __gt__(cls, other) :
345 """a > b -> bool. 346 347 sorting order : 348 1. size of the recognition site. 349 2. if equal size, alphabetical order of the names.""" 350 if not isinstance(other, RestrictionType) : 351 raise NotImplementedError 352 if len(cls) > len(other) : 353 return True 354 elif cls.size == len(other) and cls.__name__ > other.__name__ : 355 return True 356 else : 357 return False
358
359 - def __le__(cls, other) :
360 """a <= b -> bool. 361 362 sorting order : 363 1. size of the recognition site. 364 2. if equal size, alphabetical order of the names.""" 365 if not isinstance(other, RestrictionType) : 366 raise NotImplementedError 367 elif len(cls) < len(other) : 368 return True 369 elif len(cls) == len(other) and cls.__name__ <= other.__name__ : 370 return True 371 else : 372 return False
373
374 - def __lt__(cls, other) :
375 """a < b -> bool. 376 377 sorting order : 378 1. size of the recognition site. 379 2. if equal size, alphabetical order of the names.""" 380 if not isinstance(other, RestrictionType) : 381 raise NotImplementedError 382 elif len(cls) < len(other) : 383 return True 384 elif len(cls) == len(other) and cls.__name__ < other.__name__ : 385 return True 386 else : 387 return False
388 389
390 -class AbstractCut(RestrictionType) :
391 """Implement the methods that are common to all restriction enzymes. 392 393 All the methods are classmethod. 394 395 For internal use only. Not meant to be instantiate.""" 396
397 - def search(cls, dna, linear=True) :
398 """RE.search(dna, linear=True) -> list. 399 400 return a list of all the site of RE in dna. Compensate for circular 401 sequences and so on. 402 403 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 404 405 if linear is False, the restriction sites than span over the boundaries 406 will be included. 407 408 The positions are the first base of the 3' fragment, 409 i.e. the first base after the position the enzyme will cut. """ 410 # 411 # Separating search from _search allow a (very limited) optimisation 412 # of the search when using a batch of restriction enzymes. 413 # in this case the DNA is tested once by the class which implements 414 # the batch instead of being tested by each enzyme single. 415 # see RestrictionBatch.search() for example. 416 # 417 if isinstance(dna, FormattedSeq) : 418 cls.dna = dna 419 return cls._search() 420 else : 421 cls.dna = FormattedSeq(dna, linear) 422 return cls._search()
423 search = classmethod(search) 424
425 - def all_suppliers(self) :
426 """RE.all_suppliers -> print all the suppliers of R""" 427 supply = [x[0] for x in suppliers_dict.itervalues()] 428 supply.sort() 429 print ",\n".join(supply) 430 return
431 all_suppliers = classmethod(all_suppliers) 432
433 - def is_equischizomer(self, other) :
434 """RE.is_equischizomers(other) -> bool. 435 436 True if other is an isoschizomer of RE. 437 False else. 438 439 equischizomer <=> same site, same position of restriction.""" 440 return not self != other
441 is_equischizomer = classmethod(is_equischizomer) 442
443 - def is_neoschizomer(self, other) :
444 """RE.is_neoschizomers(other) -> bool. 445 446 True if other is an isoschizomer of RE. 447 False else. 448 449 neoschizomer <=> same site, different position of restriction.""" 450 return self >> other
451 is_neoschizomer = classmethod(is_neoschizomer) 452
453 - def is_isoschizomer(self, other) :
454 """RE.is_isoschizomers(other) -> bool. 455 456 True if other is an isoschizomer of RE. 457 False else. 458 459 isoschizomer <=> same site.""" 460 return (not self != other) or self >> other
461 is_isoschizomer = classmethod(is_isoschizomer) 462
463 - def equischizomers(self, batch=None) :
464 """RE.equischizomers([batch]) -> list. 465 466 return a tuple of all the isoschizomers of RE. 467 if batch is supplied it is used instead of the default AllEnzymes. 468 469 equischizomer <=> same site, same position of restriction.""" 470 if not batch : batch = AllEnzymes 471 r = [x for x in batch if not self != x] 472 i = r.index(self) 473 del r[i] 474 r.sort() 475 return r
476 equischizomers = classmethod(equischizomers) 477
478 - def neoschizomers(self, batch=None) :
479 """RE.neoschizomers([batch]) -> list. 480 481 return a tuple of all the neoschizomers of RE. 482 if batch is supplied it is used instead of the default AllEnzymes. 483 484 neoschizomer <=> same site, different position of restriction.""" 485 if not batch : batch = AllEnzymes 486 r = [x for x in batch if self >> x] 487 r.sort() 488 return r
489 neoschizomers = classmethod(neoschizomers) 490
491 - def isoschizomers(self, batch=None) :
492 """RE.isoschizomers([batch]) -> list. 493 494 return a tuple of all the equischizomers and neoschizomers of RE. 495 if batch is supplied it is used instead of the default AllEnzymes.""" 496 if not batch : batch = AllEnzymes 497 r = [x for x in batch if (self >> x) or (not self != x)] 498 i = r.index(self) 499 del r[i] 500 r.sort() 501 return r
502 isoschizomers = classmethod(isoschizomers) 503
504 - def frequency(self) :
505 """RE.frequency() -> int. 506 507 frequency of the site.""" 508 return self.freq
509 frequency = classmethod(frequency)
510 511
512 -class NoCut(AbstractCut) :
513 """Implement the methods specific to the enzymes that do not cut. 514 515 These enzymes are generally enzymes that have been only partially 516 characterised and the way they cut the DNA is unknow or enzymes for 517 which the pattern of cut is to complex to be recorded in Rebase 518 (ncuts values of 0 in emboss_e.###). 519 520 When using search() with these enzymes the values returned are at the start of 521 the restriction site. 522 523 Their catalyse() method returns a TypeError. 524 525 Unknown and NotDefined are also part of the base classes of these enzymes. 526 527 Internal use only. Not meant to be instantiated.""" 528
529 - def cut_once(self) :
530 """RE.cut_once() -> bool. 531 532 True if the enzyme cut the sequence one time on each strand.""" 533 return False
534 cut_once = classmethod(cut_once) 535
536 - def cut_twice(self) :
537 """RE.cut_twice() -> bool. 538 539 True if the enzyme cut the sequence twice on each strand.""" 540 return False
541 cut_twice = classmethod(cut_twice) 542
543 - def _modify(self, location) :
544 """RE._modify(location) -> int. 545 546 for internal use only. 547 548 location is an integer corresponding to the location of the match for 549 the enzyme pattern in the sequence. 550 _modify returns the real place where the enzyme will cut. 551 552 example : 553 EcoRI pattern : GAATTC 554 EcoRI will cut after the G. 555 so in the sequence : 556 ______ 557 GAATACACGGAATTCGA 558 | 559 10 560 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 561 EcoRI cut after the G so : 562 EcoRI._modify(10) -> 11. 563 564 if the enzyme cut twice _modify will returns two integer corresponding 565 to each cutting site. 566 """ 567 yield location
568 _modify = classmethod(_modify) 569
570 - def _rev_modify(self, location) :
571 """RE._rev_modify(location) -> generator of int. 572 573 for internal use only. 574 575 as _modify for site situated on the antiparallel strand when the 576 enzyme is not palindromic 577 """ 578 yield location
579 _rev_modify = classmethod(_rev_modify) 580
581 - def characteristic(self) :
582 """RE.characteristic() -> tuple. 583 584 the tuple contains the attributes : 585 fst5 -> first 5' cut ((current strand) or None 586 fst3 -> first 3' cut (complementary strand) or None 587 scd5 -> second 5' cut (current strand) or None 588 scd5 -> second 3' cut (complementary strand) or None 589 site -> recognition site.""" 590 return None, None, None, None, self.site
591 characteristic = classmethod(characteristic)
592
593 -class OneCut(AbstractCut) :
594 """Implement the methods specific to the enzymes that cut the DNA only once 595 596 Correspond to ncuts values of 2 in emboss_e.### 597 598 Internal use only. Not meant to be instantiated.""" 599
600 - def cut_once(self) :
601 """RE.cut_once() -> bool. 602 603 True if the enzyme cut the sequence one time on each strand.""" 604 return True
605 cut_once = classmethod(cut_once) 606
607 - def cut_twice(self) :
608 """RE.cut_twice() -> bool. 609 610 True if the enzyme cut the sequence twice on each strand.""" 611 return False
612 cut_twice = classmethod(cut_twice) 613
614 - def _modify(self, location) :
615 """RE._modify(location) -> int. 616 617 for internal use only. 618 619 location is an integer corresponding to the location of the match for 620 the enzyme pattern in the sequence. 621 _modify returns the real place where the enzyme will cut. 622 623 example : 624 EcoRI pattern : GAATTC 625 EcoRI will cut after the G. 626 so in the sequence : 627 ______ 628 GAATACACGGAATTCGA 629 | 630 10 631 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 632 EcoRI cut after the G so : 633 EcoRI._modify(10) -> 11. 634 635 if the enzyme cut twice _modify will returns two integer corresponding 636 to each cutting site. 637 """ 638 yield location + self.fst5
639 _modify = classmethod(_modify) 640
641 - def _rev_modify(self, location) :
642 """RE._rev_modify(location) -> generator of int. 643 644 for internal use only. 645 646 as _modify for site situated on the antiparallel strand when the 647 enzyme is not palindromic 648 """ 649 yield location - self.fst3
650 _rev_modify = classmethod(_rev_modify) 651
652 - def characteristic(self) :
653 """RE.characteristic() -> tuple. 654 655 the tuple contains the attributes : 656 fst5 -> first 5' cut ((current strand) or None 657 fst3 -> first 3' cut (complementary strand) or None 658 scd5 -> second 5' cut (current strand) or None 659 scd5 -> second 3' cut (complementary strand) or None 660 site -> recognition site.""" 661 return self.fst5, self.fst3, None, None, self.site
662 characteristic = classmethod(characteristic)
663 664
665 -class TwoCuts(AbstractCut) :
666 """Implement the methods specific to the enzymes that cut the DNA twice 667 668 Correspond to ncuts values of 4 in emboss_e.### 669 670 Internal use only. Not meant to be instantiated.""" 671
672 - def cut_once(self) :
673 """RE.cut_once() -> bool. 674 675 True if the enzyme cut the sequence one time on each strand.""" 676 return False
677 cut_once = classmethod(cut_once) 678
679 - def cut_twice(self) :
680 """RE.cut_twice() -> bool. 681 682 True if the enzyme cut the sequence twice on each strand.""" 683 return True
684 cut_twice = classmethod(cut_twice) 685
686 - def _modify(self, location) :
687 """RE._modify(location) -> int. 688 689 for internal use only. 690 691 location is an integer corresponding to the location of the match for 692 the enzyme pattern in the sequence. 693 _modify returns the real place where the enzyme will cut. 694 695 example : 696 EcoRI pattern : GAATTC 697 EcoRI will cut after the G. 698 so in the sequence : 699 ______ 700 GAATACACGGAATTCGA 701 | 702 10 703 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 704 EcoRI cut after the G so : 705 EcoRI._modify(10) -> 11. 706 707 if the enzyme cut twice _modify will returns two integer corresponding 708 to each cutting site. 709 """ 710 yield location + self.fst5 711 yield location + self.scd5
712 _modify = classmethod(_modify) 713
714 - def _rev_modify(self, location) :
715 """RE._rev_modify(location) -> generator of int. 716 717 for internal use only. 718 719 as _modify for site situated on the antiparallel strand when the 720 enzyme is not palindromic 721 """ 722 yield location - self.fst3 723 yield location - self.scd3
724 _rev_modify = classmethod(_rev_modify) 725
726 - def characteristic(self) :
727 """RE.characteristic() -> tuple. 728 729 the tuple contains the attributes : 730 fst5 -> first 5' cut ((current strand) or None 731 fst3 -> first 3' cut (complementary strand) or None 732 scd5 -> second 5' cut (current strand) or None 733 scd5 -> second 3' cut (complementary strand) or None 734 site -> recognition site.""" 735 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
736 characteristic = classmethod(characteristic)
737 738
739 -class Meth_Dep(AbstractCut) :
740 """Implement the information about methylation. 741 742 Enzymes of this class possess a site which is methylable.""" 743
744 - def is_methylable(self) :
745 """RE.is_methylable() -> bool. 746 747 True if the recognition site is a methylable.""" 748 return True
749 is_methylable = classmethod(is_methylable)
750
751 -class Meth_Undep(AbstractCut) :
752 """Implement informations about methylation sensitibility. 753 754 Enzymes of this class are not sensible to methylation.""" 755
756 - def is_methylable(self) :
757 """RE.is_methylable() -> bool. 758 759 True if the recognition site is a methylable.""" 760 return False
761 is_methylable = classmethod(is_methylable)
762
763 -class Palindromic(AbstractCut) :
764 """Implement the methods specific to the enzymes which are palindromic 765 766 palindromic means : the recognition site and its reverse complement are 767 identical. 768 Remarks : an enzyme with a site CGNNCG is palindromic even if some 769 of the sites that it will recognise are not. 770 for example here : CGAACG 771 772 Internal use only. Not meant to be instantiated.""" 773
774 - def _search(self) :
775 """RE._search() -> list. 776 777 for internal use only. 778 779 implement the search method for palindromic and non palindromic enzyme. 780 """ 781 siteloc = self.dna.finditer(self.compsite,self.size) 782 self.results = [r for s,g in siteloc for r in self._modify(s)] 783 if self.results : self._drop() 784 return self.results
785 _search = classmethod(_search) 786
787 - def is_palindromic(self) :
788 """RE.is_palindromic() -> bool. 789 790 True if the recognition site is a palindrom.""" 791 return True
792 is_palindromic = classmethod(is_palindromic)
793 794
795 -class NonPalindromic(AbstractCut) :
796 """Implement the methods specific to the enzymes which are not palindromic 797 798 palindromic means : the recognition site and its reverse complement are 799 identical. 800 801 Internal use only. Not meant to be instantiated.""" 802
803 - def _search(self) :
804 """RE._search() -> list. 805 806 for internal use only. 807 808 implement the search method for palindromic and non palindromic enzyme. 809 """ 810 iterator = self.dna.finditer(self.compsite, self.size) 811 self.results = [] 812 modif = self._modify 813 revmodif = self._rev_modify 814 s = str(self) 815 self.on_minus = [] 816 for start, group in iterator : 817 if group(s) : 818 self.results += [r for r in modif(start)] 819 else : 820 self.on_minus += [r for r in revmodif(start)] 821 self.results += self.on_minus 822 if self.results : 823 self.results.sort() 824 self._drop() 825 return self.results
826 _search = classmethod(_search) 827
828 - def is_palindromic(self) :
829 """RE.is_palindromic() -> bool. 830 831 True if the recognition site is a palindrom.""" 832 return False
833 is_palindromic = classmethod(is_palindromic)
834
835 -class Unknown(AbstractCut) :
836 """Implement the methods specific to the enzymes for which the overhang 837 is unknown. 838 839 These enzymes are also NotDefined and NoCut. 840 841 Internal use only. Not meant to be instantiated.""" 842
843 - def catalyse(self, dna, linear=True) :
844 """RE.catalyse(dna, linear=True) -> tuple of DNA. 845 RE.catalyze(dna, linear=True) -> tuple of DNA. 846 847 return a tuple of dna as will be produced by using RE to restrict the 848 dna. 849 850 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 851 852 if linear is False, the sequence is considered to be circular and the 853 output will be modified accordingly.""" 854 raise NotImplementedError('%s restriction is unknown.' \ 855 % self.__name__)
856 catalyze = catalyse = classmethod(catalyse) 857
858 - def is_blunt(self) :
859 """RE.is_blunt() -> bool. 860 861 True if the enzyme produces blunt end. 862 863 see also : 864 RE.is_3overhang() 865 RE.is_5overhang() 866 RE.is_unknown()""" 867 return False
868 is_blunt = classmethod(is_blunt) 869
870 - def is_5overhang(self) :
871 """RE.is_5overhang() -> bool. 872 873 True if the enzyme produces 5' overhang sticky end. 874 875 see also : 876 RE.is_3overhang() 877 RE.is_blunt() 878 RE.is_unknown()""" 879 return False
880 is_5overhang = classmethod(is_5overhang) 881
882 - def is_3overhang(self) :
883 """RE.is_3overhang() -> bool. 884 885 True if the enzyme produces 3' overhang sticky end. 886 887 see also : 888 RE.is_5overhang() 889 RE.is_blunt() 890 RE.is_unknown()""" 891 return False
892 is_3overhang = classmethod(is_3overhang) 893
894 - def overhang(self) :
895 """RE.overhang() -> str. type of overhang of the enzyme., 896 897 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 898 return 'unknown'
899 overhang = classmethod(overhang) 900
901 - def compatible_end(self) :
902 """RE.compatible_end() -> list. 903 904 list of all the enzymes that share compatible end with RE.""" 905 return []
906 compatible_end = classmethod(compatible_end) 907
908 - def _mod1(self, other) :
909 """RE._mod1(other) -> bool. 910 911 for internal use only 912 913 test for the compatibility of restriction ending of RE and other.""" 914 return False
915 _mod1 = classmethod(_mod1)
916
917 -class Blunt(AbstractCut) :
918 """Implement the methods specific to the enzymes for which the overhang 919 is blunt. 920 921 The enzyme cuts the + strand and the - strand of the DNA at the same 922 place. 923 924 Internal use only. Not meant to be instantiated.""" 925
926 - def catalyse(self, dna, linear=True) :
927 """RE.catalyse(dna, linear=True) -> tuple of DNA. 928 RE.catalyze(dna, linear=True) -> tuple of DNA. 929 930 return a tuple of dna as will be produced by using RE to restrict the 931 dna. 932 933 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 934 935 if linear is False, the sequence is considered to be circular and the 936 output will be modified accordingly.""" 937 r = self.search(dna, linear) 938 d = self.dna 939 if not r : return d[1:], 940 fragments = [] 941 length = len(r)-1 942 if d.is_linear() : 943 # 944 # START of the sequence to FIRST site. 945 # 946 fragments.append(d[1:r[0]]) 947 if length : 948 # 949 # if more than one site add them. 950 # 951 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 952 # 953 # LAST site to END of the sequence. 954 # 955 fragments.append(d[r[-1]:]) 956 else : 957 # 958 # circular : bridge LAST site to FIRST site. 959 # 960 fragments.append(d[r[-1]:]+d[1:r[0]]) 961 if not length: 962 # 963 # one site we finish here. 964 # 965 return tuple(fragments) 966 # 967 # add the others. 968 # 969 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 970 return tuple(fragments)
971 catalyze = catalyse = classmethod(catalyse) 972
973 - def is_blunt(self) :
974 """RE.is_blunt() -> bool. 975 976 True if the enzyme produces blunt end. 977 978 see also : 979 RE.is_3overhang() 980 RE.is_5overhang() 981 RE.is_unknown()""" 982 return True
983 is_blunt = classmethod(is_blunt) 984
985 - def is_5overhang(self) :
986 """RE.is_5overhang() -> bool. 987 988 True if the enzyme produces 5' overhang sticky end. 989 990 see also : 991 RE.is_3overhang() 992 RE.is_blunt() 993 RE.is_unknown()""" 994 return False
995 is_5overhang = classmethod(is_5overhang) 996
997 - def is_3overhang(self) :
998 """RE.is_3overhang() -> bool. 999 1000 True if the enzyme produces 3' overhang sticky end. 1001 1002 see also : 1003 RE.is_5overhang() 1004 RE.is_blunt() 1005 RE.is_unknown()""" 1006 return False
1007 is_3overhang = classmethod(is_3overhang) 1008
1009 - def overhang(self) :
1010 """RE.overhang() -> str. type of overhang of the enzyme., 1011 1012 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1013 return 'blunt'
1014 overhang = classmethod(overhang) 1015
1016 - def compatible_end(self, batch=None) :
1017 """RE.compatible_end() -> list. 1018 1019 list of all the enzymes that share compatible end with RE.""" 1020 if not batch : batch = AllEnzymes 1021 r = [x for x in iter(AllEnzymes) if x.is_blunt()] 1022 r.sort() 1023 return r
1024 compatible_end = classmethod(compatible_end) 1025
1026 - def _mod1(other) :
1027 """RE._mod1(other) -> bool. 1028 1029 for internal use only 1030 1031 test for the compatibility of restriction ending of RE and other.""" 1032 if issubclass(other, Blunt) : return True 1033 else : return False
1034 _mod1 = staticmethod(_mod1)
1035
1036 -class Ov5(AbstractCut) :
1037 """Implement the methods specific to the enzymes for which the overhang 1038 is recessed in 3'. 1039 1040 The enzyme cuts the + strand after the - strand of the DNA. 1041 1042 Internal use only. Not meant to be instantiated.""" 1043
1044 - def catalyse(self, dna, linear=True) :
1045 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1046 RE.catalyze(dna, linear=True) -> tuple of DNA. 1047 1048 return a tuple of dna as will be produced by using RE to restrict the 1049 dna. 1050 1051 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1052 1053 if linear is False, the sequence is considered to be circular and the 1054 output will be modified accordingly.""" 1055 r = self.search(dna, linear) 1056 d = self.dna 1057 if not r : return d[1:], 1058 length = len(r)-1 1059 fragments = [] 1060 if d.is_linear() : 1061 # 1062 # START of the sequence to FIRST site. 1063 # 1064 fragments.append(d[1:r[0]]) 1065 if length : 1066 # 1067 # if more than one site add them. 1068 # 1069 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1070 # 1071 # LAST site to END of the sequence. 1072 # 1073 fragments.append(d[r[-1]:]) 1074 else : 1075 # 1076 # circular : bridge LAST site to FIRST site. 1077 # 1078 fragments.append(d[r[-1]:]+d[1:r[0]]) 1079 if not length: 1080 # 1081 # one site we finish here. 1082 # 1083 return tuple(fragments) 1084 # 1085 # add the others. 1086 # 1087 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1088 return tuple(fragments)
1089 catalyze = catalyse = classmethod(catalyse) 1090
1091 - def is_blunt(self) :
1092 """RE.is_blunt() -> bool. 1093 1094 True if the enzyme produces blunt end. 1095 1096 see also : 1097 RE.is_3overhang() 1098 RE.is_5overhang() 1099 RE.is_unknown()""" 1100 return False
1101 is_blunt = classmethod(is_blunt) 1102
1103 - def is_5overhang(self) :
1104 """RE.is_5overhang() -> bool. 1105 1106 True if the enzyme produces 5' overhang sticky end. 1107 1108 see also : 1109 RE.is_3overhang() 1110 RE.is_blunt() 1111 RE.is_unknown()""" 1112 return True
1113 is_5overhang = classmethod(is_5overhang) 1114
1115 - def is_3overhang(self) :
1116 """RE.is_3overhang() -> bool. 1117 1118 True if the enzyme produces 3' overhang sticky end. 1119 1120 see also : 1121 RE.is_5overhang() 1122 RE.is_blunt() 1123 RE.is_unknown()""" 1124 return False
1125 is_3overhang = classmethod(is_3overhang) 1126
1127 - def overhang(self) :
1128 """RE.overhang() -> str. type of overhang of the enzyme., 1129 1130 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1131 return "5' overhang"
1132 overhang = classmethod(overhang) 1133
1134 - def compatible_end(self, batch=None) :
1135 """RE.compatible_end() -> list. 1136 1137 list of all the enzymes that share compatible end with RE.""" 1138 if not batch : batch = AllEnzymes 1139 r = [x for x in iter(AllEnzymes) if x.is_5overhang() and x % self] 1140 r.sort() 1141 return r
1142 compatible_end = classmethod(compatible_end) 1143
1144 - def _mod1(self, other) :
1145 """RE._mod1(other) -> bool. 1146 1147 for internal use only 1148 1149 test for the compatibility of restriction ending of RE and other.""" 1150 if issubclass(other, Ov5) : return self._mod2(other) 1151 else : return False
1152 _mod1 = classmethod(_mod1)
1153 1154
1155 -class Ov3(AbstractCut) :
1156 """Implement the methods specific to the enzymes for which the overhang 1157 is recessed in 5'. 1158 1159 The enzyme cuts the - strand after the + strand of the DNA. 1160 1161 Internal use only. Not meant to be instantiated.""" 1162
1163 - def catalyse(self, dna, linear=True) :
1164 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1165 RE.catalyze(dna, linear=True) -> tuple of DNA. 1166 1167 return a tuple of dna as will be produced by using RE to restrict the 1168 dna. 1169 1170 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1171 1172 if linear is False, the sequence is considered to be circular and the 1173 output will be modified accordingly.""" 1174 r = self.search(dna, linear) 1175 d = self.dna 1176 if not r : return d[1:], 1177 fragments = [] 1178 length = len(r)-1 1179 if d.is_linear() : 1180 # 1181 # START of the sequence to FIRST site. 1182 # 1183 fragments.append(d[1:r[0]]) 1184 if length : 1185 # 1186 # if more than one site add them. 1187 # 1188 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1189 # 1190 # LAST site to END of the sequence. 1191 # 1192 fragments.append(d[r[-1]:]) 1193 else : 1194 # 1195 # circular : bridge LAST site to FIRST site. 1196 # 1197 fragments.append(d[r[-1]:]+d[1:r[0]]) 1198 if not length: 1199 # 1200 # one site we finish here. 1201 # 1202 return tuple(fragments) 1203 # 1204 # add the others. 1205 # 1206 fragments += [d[r[x]:r[x+1]] for x in xrange(length)] 1207 return tuple(fragments)
1208 catalyze = catalyse = classmethod(catalyse) 1209
1210 - def is_blunt(self) :
1211 """RE.is_blunt() -> bool. 1212 1213 True if the enzyme produces blunt end. 1214 1215 see also : 1216 RE.is_3overhang() 1217 RE.is_5overhang() 1218 RE.is_unknown()""" 1219 return False
1220 is_blunt = classmethod(is_blunt) 1221
1222 - def is_5overhang(self) :
1223 """RE.is_5overhang() -> bool. 1224 1225 True if the enzyme produces 5' overhang sticky end. 1226 1227 see also : 1228 RE.is_3overhang() 1229 RE.is_blunt() 1230 RE.is_unknown()""" 1231 return False
1232 is_5overhang = classmethod(is_5overhang) 1233
1234 - def is_3overhang(self) :
1235 """RE.is_3overhang() -> bool. 1236 1237 True if the enzyme produces 3' overhang sticky end. 1238 1239 see also : 1240 RE.is_5overhang() 1241 RE.is_blunt() 1242 RE.is_unknown()""" 1243 return True
1244 is_3overhang = classmethod(is_3overhang) 1245
1246 - def overhang(self) :
1247 """RE.overhang() -> str. type of overhang of the enzyme., 1248 1249 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1250 return "3' overhang"
1251 overhang = classmethod(overhang) 1252
1253 - def compatible_end(self, batch=None) :
1254 """RE.compatible_end() -> list. 1255 1256 list of all the enzymes that share compatible end with RE.""" 1257 if not batch : batch = AllEnzymes 1258 r = [x for x in iter(AllEnzymes) if x.is_3overhang() and x % self] 1259 r.sort() 1260 return r
1261 compatible_end = classmethod(compatible_end) 1262
1263 - def _mod1(self, other) :
1264 """RE._mod1(other) -> bool. 1265 1266 for internal use only 1267 1268 test for the compatibility of restriction ending of RE and other.""" 1269 # 1270 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1271 # 1272 if issubclass(other, Ov3) : return self._mod2(other) 1273 else : return False
1274 _mod1 = classmethod(_mod1)
1275 1276
1277 -class Defined(AbstractCut) :
1278 """Implement the methods specific to the enzymes for which the overhang 1279 and the cut are not variable. 1280 1281 Typical example : EcoRI -> G^AATT_C 1282 The overhang will always be AATT 1283 Notes : 1284 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1285 There overhang is always the same : blunt! 1286 1287 Internal use only. Not meant to be instantiated.""" 1288
1289 - def _drop(self) :
1290 """RE._drop() -> list. 1291 1292 for internal use only. 1293 1294 drop the site that are situated outside the sequence in linear sequence. 1295 modify the index for site in circular sequences.""" 1296 # 1297 # remove or modify the results that are outside the sequence. 1298 # This is necessary since after finding the site we add the distance 1299 # from the site to the cut with the _modify and _rev_modify methods. 1300 # For linear we will remove these sites altogether. 1301 # For circular sequence, we modify the result rather than _drop it 1302 # since the site is in the sequence. 1303 # 1304 length = len(self.dna) 1305 drop = itertools.dropwhile 1306 take = itertools.takewhile 1307 if self.dna.is_linear() : 1308 self.results = [x for x in drop(lambda x:x<1, self.results)] 1309 self.results = [x for x in take(lambda x:x<length, self.results)] 1310 else : 1311 for index, location in enumerate(self.results) : 1312 if location < 1 : 1313 self.results[index] += length 1314 else : 1315 break 1316 for index, location in enumerate(self.results[::-1]) : 1317 if location > length : 1318 self.results[-(index+1)] -= length 1319 else : 1320 break 1321 return
1322 _drop = classmethod(_drop) 1323
1324 - def is_defined(self) :
1325 """RE.is_defined() -> bool. 1326 1327 True if the sequence recognised and cut is constant, 1328 i.e. the recognition site is not degenerated AND the enzyme cut inside 1329 the site. 1330 1331 see also : 1332 RE.is_ambiguous() 1333 RE.is_unknown()""" 1334 return True
1335 is_defined = classmethod(is_defined) 1336
1337 - def is_ambiguous(self) :
1338 """RE.is_ambiguous() -> bool. 1339 1340 True if the sequence recognised and cut is ambiguous, 1341 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1342 the site. 1343 1344 see also : 1345 RE.is_defined() 1346 RE.is_unknown()""" 1347 return False
1348 is_ambiguous = classmethod(is_ambiguous) 1349
1350 - def is_unknown(self) :
1351 """RE.is_unknown() -> bool. 1352 1353 True if the sequence is unknown, 1354 i.e. the recognition site has not been characterised yet. 1355 1356 see also : 1357 RE.is_defined() 1358 RE.is_ambiguous()""" 1359 return False
1360 is_unknown = classmethod(is_unknown) 1361
1362 - def elucidate(self) :
1363 """RE.elucidate() -> str 1364 1365 return a representation of the site with the cut on the (+) strand 1366 represented as '^' and the cut on the (-) strand as '_'. 1367 ie : 1368 >>> EcoRI.elucidate() # 5' overhang 1369 'G^AATT_C' 1370 >>> KpnI.elucidate() # 3' overhang 1371 'G_GTAC^C' 1372 >>> EcoRV.elucidate() # blunt 1373 'GAT^_ATC' 1374 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1375 '? GTATAC ?' 1376 >>> 1377 """ 1378 f5 = self.fst5 1379 f3 = self.fst3 1380 site = self.site 1381 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1382 elif self.is_5overhang() : 1383 if f5 == f3 == 0 : re = 'N^'+ self.site + '_N' 1384 elif f3 == 0 : re = site[:f5] + '^' + site[f5:] + '_N' 1385 else : re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1386 elif self.is_blunt() : 1387 re = site[:f5] + '^_' + site[f5:] 1388 else : 1389 if f5 == f3 == 0 : re = 'N_'+ site + '^N' 1390 else : re = site[:f3] + '_' + site[f3:f5] +'^'+ site[f5:] 1391 return re
1392 elucidate = classmethod(elucidate) 1393
1394 - def _mod2(self, other) :
1395 """RE._mod2(other) -> bool. 1396 1397 for internal use only 1398 1399 test for the compatibility of restriction ending of RE and other.""" 1400 # 1401 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1402 # 1403 if other.ovhgseq == self.ovhgseq : 1404 return True 1405 elif issubclass(other, Ambiguous) : 1406 return other._mod2(self) 1407 else: 1408 return False
1409 _mod2 = classmethod(_mod2)
1410 1411
1412 -class Ambiguous(AbstractCut) :
1413 """Implement the methods specific to the enzymes for which the overhang 1414 is variable. 1415 1416 Typical example : BstXI -> CCAN_NNNN^NTGG 1417 The overhang can be any sequence of 4 bases. 1418 Notes : 1419 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1420 There overhang is always the same : blunt! 1421 1422 Internal use only. Not meant to be instantiated.""" 1423
1424 - def _drop(self) :
1425 """RE._drop() -> list. 1426 1427 for internal use only. 1428 1429 drop the site that are situated outside the sequence in linear sequence. 1430 modify the index for site in circular sequences.""" 1431 length = len(self.dna) 1432 drop = itertools.dropwhile 1433 take = itertools.takewhile 1434 if self.dna.is_linear() : 1435 self.results = [x for x in drop(lambda x : x < 1, self.results)] 1436 self.results = [x for x in take(lambda x : x <length, self.results)] 1437 else : 1438 for index, location in enumerate(self.results) : 1439 if location < 1 : 1440 self.results[index] += length 1441 else : 1442 break 1443 for index, location in enumerate(self.results[::-1]) : 1444 if location > length : 1445 self.results[-(index+1)] -= length 1446 else : 1447 break 1448 return
1449 _drop = classmethod(_drop) 1450
1451 - def is_defined(self) :
1452 """RE.is_defined() -> bool. 1453 1454 True if the sequence recognised and cut is constant, 1455 i.e. the recognition site is not degenerated AND the enzyme cut inside 1456 the site. 1457 1458 see also : 1459 RE.is_ambiguous() 1460 RE.is_unknown()""" 1461 return False
1462 is_defined = classmethod(is_defined) 1463
1464 - def is_ambiguous(self) :
1465 """RE.is_ambiguous() -> bool. 1466 1467 True if the sequence recognised and cut is ambiguous, 1468 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1469 the site. 1470 1471 1472 see also : 1473 RE.is_defined() 1474 RE.is_unknown()""" 1475 return True
1476 is_ambiguous = classmethod(is_ambiguous) 1477
1478 - def is_unknown(self) :
1479 """RE.is_unknown() -> bool. 1480 1481 True if the sequence is unknown, 1482 i.e. the recognition site has not been characterised yet. 1483 1484 see also : 1485 RE.is_defined() 1486 RE.is_ambiguous()""" 1487 return False
1488 is_unknown = classmethod(is_unknown) 1489
1490 - def _mod2(self, other) :
1491 """RE._mod2(other) -> bool. 1492 1493 for internal use only 1494 1495 test for the compatibility of restriction ending of RE and other.""" 1496 # 1497 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1498 # 1499 if len(self.ovhgseq) != len(other.ovhgseq) : 1500 return False 1501 else : 1502 se = self.ovhgseq 1503 for base in se : 1504 if base in 'ATCG' : 1505 pass 1506 if base in 'N' : 1507 se = '.'.join(se.split('N')) 1508 if base in 'RYWMSKHDBV': 1509 expand = '['+ matching[base] + ']' 1510 se = expand.join(se.split(base)) 1511 if re.match(se, other.ovhgseq) : 1512 return True 1513 else : 1514 return False
1515 _mod2 = classmethod(_mod2) 1516
1517 - def elucidate(self) :
1518 """RE.elucidate() -> str 1519 1520 return a representation of the site with the cut on the (+) strand 1521 represented as '^' and the cut on the (-) strand as '_'. 1522 ie : 1523 >>> EcoRI.elucidate() # 5' overhang 1524 'G^AATT_C' 1525 >>> KpnI.elucidate() # 3' overhang 1526 'G_GTAC^C' 1527 >>> EcoRV.elucidate() # blunt 1528 'GAT^_ATC' 1529 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1530 '? GTATAC ?' 1531 >>> 1532 """ 1533 f5 = self.fst5 1534 f3 = self.fst3 1535 length = len(self) 1536 site = self.site 1537 if self.cut_twice() : re = 'cut twice, not yet implemented sorry.' 1538 elif self.is_5overhang() : 1539 if f3 == f5 == 0 : 1540 re = 'N^' + site +'_N' 1541 elif 0 <= f5 <= length and 0 <= f3+length <= length : 1542 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1543 elif 0 <= f5 <= length : 1544 re = site[:f5] + '^' + site[f5:] + f3*'N' + '_N' 1545 elif 0 <= f3+length <= length : 1546 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1547 elif f3+length < 0 : 1548 re = 'N^'*abs(f5)*'N' + '_' + abs(length+f3)*'N' + site 1549 elif f5 > length : 1550 re = site + (f5-length)*'N'+'^'+(length+f3-f5)*'N'+'_N' 1551 else : 1552 re = 'N^' + abs(f5) * 'N' + site + f3*'N' + '_N' 1553 elif self.is_blunt() : 1554 if f5 < 0 : 1555 re = 'N^_' + abs(f5)*'N' + site 1556 elif f5 > length : 1557 re = site + (f5-length)*'N' + '^_N' 1558 else : 1559 raise ValueError('%s.easyrepr() : error f5=%i' \ 1560 % (self.name,f5)) 1561 else : 1562 if f3 == 0 : 1563 if f5 == 0 : re = 'N_' + site + '^N' 1564 else : re = site + '_' + (f5-length)*'N' + '^N' 1565 elif 0 < f3+length <= length and 0 <= f5 <= length : 1566 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1567 elif 0 < f3+length <= length : 1568 re = site[:f3] + '_' + site[f3:] + (f5-length)*'N' + '^N' 1569 elif 0 <= f5 <= length: 1570 re = 'N_' +'N'*(f3+length) + site[:f5] + '^' + site[f5:] 1571 elif f3 > 0 : 1572 re = site + f3*'N' + '_' + (f5-f3-length)*'N' + '^N' 1573 elif f5 < 0 : 1574 re = 'N_' + abs(f3-f5+length)*'N' + '^' + abs(f5)*'N' + site 1575 else : 1576 re = 'N_' + abs(f3+length)*'N' + site + (f5-length)*'N' + '^N' 1577 return re
1578 elucidate = classmethod(elucidate)
1579 1580
1581 -class NotDefined(AbstractCut) :
1582 """Implement the methods specific to the enzymes for which the overhang 1583 is not characterised. 1584 1585 Correspond to NoCut and Unknown. 1586 1587 Internal use only. Not meant to be instantiated.""" 1588
1589 - def _drop(self) :
1590 """RE._drop() -> list. 1591 1592 for internal use only. 1593 1594 drop the site that are situated outside the sequence in linear sequence. 1595 modify the index for site in circular sequences.""" 1596 if self.dna.is_linear() : 1597 return 1598 else : 1599 length = len(self.dna) 1600 for index, location in enumerate(self.results) : 1601 if location < 1 : 1602 self.results[index] += length 1603 else : 1604 break 1605 for index, location in enumerate(self.results[:-1]) : 1606 if location > length : 1607 self.results[-(index+1)] -= length 1608 else : 1609 break 1610 return
1611 _drop = classmethod(_drop) 1612
1613 - def is_defined(self) :
1614 """RE.is_defined() -> bool. 1615 1616 True if the sequence recognised and cut is constant, 1617 i.e. the recognition site is not degenerated AND the enzyme cut inside 1618 the site. 1619 1620 see also : 1621 RE.is_ambiguous() 1622 RE.is_unknown()""" 1623 return False
1624 is_defined = classmethod(is_defined) 1625
1626 - def is_ambiguous(self) :
1627 """RE.is_ambiguous() -> bool. 1628 1629 True if the sequence recognised and cut is ambiguous, 1630 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1631 the site. 1632 1633 1634 see also : 1635 RE.is_defined() 1636 RE.is_unknown()""" 1637 return False
1638 is_ambiguous = classmethod(is_ambiguous) 1639
1640 - def is_unknown(self) :
1641 """RE.is_unknown() -> bool. 1642 1643 True if the sequence is unknown, 1644 i.e. the recognition site has not been characterised yet. 1645 1646 see also : 1647 RE.is_defined() 1648 RE.is_ambiguous()""" 1649 return True
1650 is_unknown = classmethod(is_unknown) 1651
1652 - def _mod2(self, other) :
1653 """RE._mod2(other) -> bool. 1654 1655 for internal use only 1656 1657 test for the compatibility of restriction ending of RE and other.""" 1658 # 1659 # Normally we should not arrive here. But well better safe than sorry. 1660 # the overhang is not defined we are compatible with nobody. 1661 # could raise an Error may be rather than return quietly. 1662 # 1663 #return False 1664 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" \ 1665 % (str(self), str(other), str(self)))
1666 _mod2 = classmethod(_mod2) 1667
1668 - def elucidate(self) :
1669 """RE.elucidate() -> str 1670 1671 return a representation of the site with the cut on the (+) strand 1672 represented as '^' and the cut on the (-) strand as '_'. 1673 ie : 1674 >>> EcoRI.elucidate() # 5' overhang 1675 'G^AATT_C' 1676 >>> KpnI.elucidate() # 3' overhang 1677 'G_GTAC^C' 1678 >>> EcoRV.elucidate() # blunt 1679 'GAT^_ATC' 1680 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1681 '? GTATAC ?' 1682 >>> 1683 """ 1684 return '? %s ?' % self.site
1685 elucidate = classmethod(elucidate)
1686 1687
1688 -class Commercially_available(AbstractCut) :
1689 # 1690 # Recent addition to Rebase make this naming convention uncertain. 1691 # May be better to says enzymes which have a supplier. 1692 # 1693 """Implement the methods specific to the enzymes which are commercially 1694 available. 1695 1696 Internal use only. Not meant to be instantiated.""" 1697
1698 - def suppliers(self) :
1699 """RE.suppliers() -> print the suppliers of RE.""" 1700 supply = suppliers_dict.items() 1701 for k,v in supply : 1702 if k in self.suppl : 1703 print v[0]+',' 1704 return
1705 suppliers = classmethod(suppliers) 1706
1707 - def supplier_list(self) :
1708 """RE.supplier_list() -> list. 1709 1710 list of the supplier names for RE.""" 1711 return [v[0] for k,v in suppliers_dict.items() if k in self.suppl]
1712 supplier_list = classmethod(supplier_list) 1713
1714 - def buffers(self, supplier) :
1715 """RE.buffers(supplier) -> string. 1716 1717 not implemented yet.""" 1718 return
1719 buffers = classmethod(buffers) 1720
1721 - def is_comm(self) :
1722 """RE.iscomm() -> bool. 1723 1724 True if RE has suppliers.""" 1725 return True
1726 is_comm = classmethod(is_comm)
1727 1728
1729 -class Not_available(AbstractCut) :
1730 """Implement the methods specific to the enzymes which are not commercially 1731 available. 1732 1733 Internal use only. Not meant to be instantiated.""" 1734
1735 - def suppliers() :
1736 """RE.suppliers() -> print the suppliers of RE.""" 1737 return None
1738 suppliers = staticmethod(suppliers) 1739
1740 - def supplier_list(self) :
1741 """RE.supplier_list() -> list. 1742 1743 list of the supplier names for RE.""" 1744 return []
1745 supplier_list = classmethod(supplier_list) 1746
1747 - def buffers(self, supplier) :
1748 """RE.buffers(supplier) -> string. 1749 1750 not implemented yet.""" 1751 raise TypeError("Enzyme not commercially available.")
1752 buffers = classmethod(buffers) 1753
1754 - def is_comm(self) :
1755 """RE.iscomm() -> bool. 1756 1757 True if RE has suppliers.""" 1758 return False
1759 is_comm = classmethod(is_comm)
1760 1761 1762 ############################################################################### 1763 # # 1764 # Restriction Batch # 1765 # # 1766 ############################################################################### 1767 1768
1769 -class RestrictionBatch(set) :
1770
1771 - def __init__(self, first=[], suppliers=[]) :
1772 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1773 first = [self.format(x) for x in first] 1774 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1775 set.__init__(self, first) 1776 self.mapping = dict.fromkeys(self) 1777 self.already_mapped = None
1778
1779 - def __str__(self) :
1780 if len(self) < 5 : 1781 return '+'.join(self.elements()) 1782 else : 1783 return '...'.join(('+'.join(self.elements()[:2]),\ 1784 '+'.join(self.elements()[-2:])))
1785
1786 - def __repr__(self) :
1787 return 'RestrictionBatch(%s)' % self.elements()
1788
1789 - def __contains__(self, other) :
1790 try : 1791 other = self.format(other) 1792 except ValueError : # other is not a restriction enzyme 1793 return False 1794 return set.__contains__(self, other)
1795
1796 - def __div__(self, other) :
1797 return self.search(other)
1798
1799 - def __rdiv__(self, other) :
1800 return self.search(other)
1801
1802 - def get(self, enzyme, add=False) :
1803 """B.get(enzyme[, add]) -> enzyme class. 1804 1805 if add is True and enzyme is not in B add enzyme to B. 1806 if add is False (which is the default) only return enzyme. 1807 if enzyme is not a RestrictionType or can not be evaluated to 1808 a RestrictionType, raise a ValueError.""" 1809 e = self.format(enzyme) 1810 if e in self : 1811 return e 1812 elif add : 1813 self.add(e) 1814 return e 1815 else : 1816 raise ValueError('enzyme %s is not in RestrictionBatch' \ 1817 % e.__name__)
1818
1819 - def lambdasplit(self, func) :
1820 """B.lambdasplit(func) -> RestrictionBatch . 1821 1822 the new batch will contains only the enzymes for which 1823 func return True.""" 1824 d = [x for x in itertools.ifilter(func, self)] 1825 new = RestrictionBatch() 1826 new._data = dict(map(None, d, [True]*len(d))) 1827 return new
1828
1829 - def add_supplier(self, letter) :
1830 """B.add_supplier(letter) -> add a new set of enzyme to B. 1831 1832 letter represents the suppliers as defined in the dictionary 1833 RestrictionDictionary.suppliers 1834 return None. 1835 raise a KeyError if letter is not a supplier code.""" 1836 supplier = suppliers_dict[letter] 1837 self.suppliers.append(letter) 1838 for x in supplier[1] : 1839 self.add_nocheck(eval(x)) 1840 return
1841
1842 - def current_suppliers(self) :
1843 """B.current_suppliers() -> add a new set of enzyme to B. 1844 1845 return a sorted list of the suppliers which have been used to 1846 create the batch.""" 1847 suppl_list = [suppliers_dict[x][0] for x in self.suppliers] 1848 suppl_list.sort() 1849 return suppl_list
1850
1851 - def __iadd__(self, other) :
1852 """ b += other -> add other to b, check the type of other.""" 1853 self.add(other) 1854 return self
1855
1856 - def __add__(self, other) :
1857 """ b + other -> new RestrictionBatch.""" 1858 new = self.__class__(self) 1859 new.add(other) 1860 return new
1861
1862 - def remove(self, other) :
1863 """B.remove(other) -> remove other from B if other is a RestrictionType. 1864 1865 Safe set.remove method. Verify that other is a RestrictionType or can be 1866 evaluated to a RestrictionType. 1867 raise a ValueError if other can not be evaluated to a RestrictionType. 1868 raise a KeyError if other is not in B.""" 1869 return set.remove(self, self.format(other))
1870
1871 - def add(self, other) :
1872 """B.add(other) -> add other to B if other is a RestrictionType. 1873 1874 Safe set.add method. Verify that other is a RestrictionType or can be 1875 evaluated to a RestrictionType. 1876 raise a ValueError if other can not be evaluated to a RestrictionType. 1877 """ 1878 return set.add(self, self.format(other))
1879
1880 - def add_nocheck(self, other) :
1881 """B.add_nocheck(other) -> add other to B. don't check type of other. 1882 """ 1883 return set.add(self, other)
1884
1885 - def format(self, y) :
1886 """B.format(y) -> RestrictionType or raise ValueError. 1887 1888 if y is a RestrictionType return y 1889 if y can be evaluated to a RestrictionType return eval(y) 1890 raise a Value Error in all other case.""" 1891 try : 1892 if isinstance(y, RestrictionType) : 1893 return y 1894 elif isinstance(eval(str(y)), RestrictionType): 1895 return eval(y) 1896 1897 else : 1898 pass 1899 except (NameError, SyntaxError) : 1900 pass 1901 raise ValueError('%s is not a RestrictionType' % y.__class__)
1902 1903
1904 - def is_restriction(self, y) :
1905 """B.is_restriction(y) -> bool. 1906 1907 True is y or eval(y) is a RestrictionType.""" 1908 return isinstance(y, RestrictionType) or \ 1909 isinstance(eval(str(y)), RestrictionType)
1910
1911 - def split(self, *classes, **bool) :
1912 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 1913 1914 it works but it is slow, so it has really an interest when splitting 1915 over multiple conditions.""" 1916 def splittest(element) : 1917 for klass in classes : 1918 b = bool.get(klass.__name__, True) 1919 if issubclass(element, klass) : 1920 if b : 1921 continue 1922 else : 1923 return False 1924 elif b : 1925 return False 1926 else : 1927 continue 1928 return True
1929 d = [k for k in itertools.ifilter(splittest, self)] 1930 new = RestrictionBatch() 1931 new._data = dict(map(None, d, [True]*len(d))) 1932 return new
1933
1934 - def elements(self) :
1935 """B.elements() -> tuple. 1936 1937 give all the names of the enzymes in B sorted alphabetically.""" 1938 l = [str(e) for e in self] 1939 l.sort() 1940 return l
1941
1942 - def as_string(self) :
1943 """B.as_string() -> list. 1944 1945 return a list of the name of the elements of B.""" 1946 return [str(e) for e in self]
1947
1948 - def suppl_codes(self) :
1949 """B.suppl_codes() -> dict 1950 1951 letter code for the suppliers""" 1952 supply = dict([(k,v[0]) for k,v in suppliers_dict.iteritems()]) 1953 return supply
1954 suppl_codes = classmethod(suppl_codes) 1955
1956 - def show_codes(self) :
1957 "B.show_codes() -> letter codes for the suppliers""" 1958 supply = [' = '.join(i) for i in self.suppl_codes().iteritems()] 1959 print '\n'.join(supply) 1960 return
1961 show_codes = classmethod(show_codes) 1962
1963 - def search(self, dna, linear=True) :
1964 """B.search(dna) -> dict.""" 1965 # 1966 # here we replace the search method of the individual enzymes 1967 # with one unique testing method. 1968 # 1969 if isinstance(dna, DNA) : 1970 # For the searching, we just care about the sequence as a string, 1971 # if that is the same we can use the cached search results. 1972 # At the time of writing, Seq == method isn't implemented, 1973 # and therefore does object identity which is stricter. 1974 if (str(dna), linear) == self.already_mapped : 1975 return self.mapping 1976 else : 1977 self.already_mapped = str(dna), linear 1978 fseq = FormattedSeq(dna, linear) 1979 self.mapping = dict([(x, x.search(fseq)) for x in self]) 1980 return self.mapping 1981 elif isinstance(dna, FormattedSeq) : 1982 if (str(dna), dna.linear) == self.already_mapped : 1983 return self.mapping 1984 else : 1985 self.already_mapped = str(dna), dna.linear 1986 self.mapping = dict([(x, x.search(dna)) for x in self]) 1987 return self.mapping 1988 raise TypeError("Expected Seq or MutableSeq instance, got %s instead"\ 1989 %type(dna))
1990 1991 ############################################################################### 1992 # # 1993 # Restriction Analysis # 1994 # # 1995 ############################################################################### 1996
1997 -class Analysis(RestrictionBatch, PrintFormat) :
1998
1999 - def __init__(self, restrictionbatch=RestrictionBatch(),sequence=DNA(''), 2000 linear=True) :
2001 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2002 2003 For most of the method of this class if a dictionary is given it will 2004 be used as the base to calculate the results. 2005 If no dictionary is given a new analysis using the Restriction Batch 2006 which has been given when the Analysis class has been instantiated.""" 2007 RestrictionBatch.__init__(self, restrictionbatch) 2008 self.rb = restrictionbatch 2009 self.sequence = sequence 2010 self.linear = linear 2011 if self.sequence : 2012 self.search(self.sequence, self.linear)
2013
2014 - def __repr__(self) :
2015 return 'Analysis(%s,%s,%s)'%\ 2016 (repr(self.rb),repr(self.sequence),self.linear)
2017
2018 - def _sub_set(self, wanted) :
2019 """A._sub_set(other_set) -> dict. 2020 2021 Internal use only. 2022 2023 screen the results through wanted set. 2024 Keep only the results for which the enzymes is in wanted set. 2025 """ 2026 return dict([(k,v) for k,v in self.mapping.iteritems() if k in wanted])
2027
2028 - def _boundaries(self, start, end) :
2029 """A._boundaries(start, end) -> tuple. 2030 2031 Format the boundaries for use with the methods that limit the 2032 search to only part of the sequence given to analyse. 2033 """ 2034 if not isinstance(start, int) : 2035 raise TypeError('expected int, got %s instead' % type(start)) 2036 if not isinstance(end, int) : 2037 raise TypeError('expected int, got %s instead' % type(end)) 2038 if start < 1 : 2039 start += len(self.sequence) 2040 if end < 1 : 2041 end += len(self.sequence) 2042 if start < end : 2043 pass 2044 else : 2045 start, end == end, start 2046 if start < 1 : 2047 start == 1 2048 if start < end : 2049 return start, end, self._test_normal 2050 else : 2051 return start, end, self._test_reverse
2052
2053 - def _test_normal(self, start, end, site) :
2054 """A._test_normal(start, end, site) -> bool. 2055 2056 Internal use only 2057 Test if site is in between start and end. 2058 """ 2059 return start <= site < end
2060
2061 - def _test_reverse(self, start, end, site) :
2062 """A._test_reverse(start, end, site) -> bool. 2063 2064 Internal use only 2065 Test if site is in between end and start (for circular sequences). 2066 """ 2067 return start <= site <= len(self.sequence) or 1 <= site < end
2068
2069 - def print_that(self, dct=None, title='', s1='') :
2070 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2071 2072 If dct is not given the full dictionary is used. 2073 """ 2074 if not dct : 2075 dct = self.mapping 2076 print 2077 return PrintFormat.print_that(self, dct, title, s1)
2078
2079 - def change(self, **what) :
2080 """A.change(**attribute_name) -> Change attribute of Analysis. 2081 2082 It is possible to change the width of the shell by setting 2083 self.ConsoleWidth to what you want. 2084 self.NameWidth refer to the maximal length of the enzyme name. 2085 2086 Changing one of these parameters here might not give the results 2087 you expect. In which case, you can settle back to a 80 columns shell 2088 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2089 you get it right.""" 2090 for k,v in what.iteritems() : 2091 if k in ('NameWidth', 'ConsoleWidth') : 2092 setattr(self, k, v) 2093 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2094 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2095 elif k is 'sequence' : 2096 setattr(self, 'sequence', v) 2097 self.search(self.sequence, self.linear) 2098 elif k is 'rb' : 2099 self = Analysis.__init__(self, v, self.sequence, self.linear) 2100 elif k is 'linear' : 2101 setattr(self, 'linear', v) 2102 self.search(self.sequence, v) 2103 elif k in ('Indent', 'Maxsize') : 2104 setattr(self, k, v) 2105 elif k in ('Cmodulo', 'PrefWidth') : 2106 raise AttributeError( \ 2107 'To change %s, change NameWidth and/or ConsoleWidth' \ 2108 % name) 2109 else : 2110 raise AttributeError( \ 2111 'Analysis has no attribute %s' % name) 2112 return
2113
2114 - def full(self, linear=True) :
2115 """A.full() -> dict. 2116 2117 Full Restriction Map of the sequence.""" 2118 return self.mapping
2119
2120 - def blunt(self, dct = None) :
2121 """A.blunt([dct]) -> dict. 2122 2123 Only the enzymes which have a 3'overhang restriction site.""" 2124 if not dct : 2125 dct = self.mapping 2126 return dict([(k,v) for k,v in dct.iteritems() if k.is_blunt()])
2127
2128 - def overhang5(self, dct=None) :
2129 """A.overhang5([dct]) -> dict. 2130 2131 Only the enzymes which have a 5' overhang restriction site.""" 2132 if not dct : 2133 dct = self.mapping 2134 return dict([(k,v) for k,v in dct.iteritems() if k.is_5overhang()])
2135 2136
2137 - def overhang3(self, dct=None) :
2138 """A.Overhang3([dct]) -> dict. 2139 2140 Only the enzymes which have a 3'overhang restriction site.""" 2141 if not dct : 2142 dct = self.mapping 2143 return dict([(k,v) for k,v in dct.iteritems() if k.is_3overhang()])
2144 2145
2146 - def defined(self, dct=None) :
2147 """A.defined([dct]) -> dict. 2148 2149 Only the enzymes that have a defined restriction site in Rebase.""" 2150 if not dct : 2151 dct = self.mapping 2152 return dict([(k,v) for k,v in dct.iteritems() if k.is_defined()])
2153
2154 - def with_sites(self, dct=None) :
2155 """A.with_sites([dct]) -> dict. 2156 2157 Enzymes which have at least one site in the sequence.""" 2158 if not dct : 2159 dct = self.mapping 2160 return dict([(k,v) for k,v in dct.iteritems() if v])
2161
2162 - def without_site(self, dct=None) :
2163 """A.without_site([dct]) -> dict. 2164 2165 Enzymes which have no site in the sequence.""" 2166 if not dct : 2167 dct = self.mapping 2168 return dict([(k,v) for k,v in dct.iteritems() if not v])
2169
2170 - def with_N_sites(self, N, dct=None) :
2171 """A.With_N_Sites(N [, dct]) -> dict. 2172 2173 Enzymes which cut N times the sequence.""" 2174 if not dct : 2175 dct = self.mapping 2176 return dict([(k,v) for k,v in dct.iteritems()if len(v) == N])
2177
2178 - def with_number_list(self, list, dct= None) :
2179 if not dct : 2180 dct = self.mapping 2181 return dict([(k,v) for k,v in dct.iteritems() if len(v) in list])
2182
2183 - def with_name(self, names, dct=None) :
2184 """A.with_name(list_of_names [, dct]) -> 2185 2186 Limit the search to the enzymes named in list_of_names.""" 2187 for i, enzyme in enumerate(names) : 2188 if not enzyme in AllEnzymes : 2189 print "no datas for the enzyme:", str(name) 2190 del names[i] 2191 if not dct : 2192 return RestrictionBatch(names).search(self.sequence) 2193 return dict([(n, dct[n]) for n in names if n in dct])
2194
2195 - def with_site_size(self, site_size, dct=None) :
2196 """A.with_site_size(site_size [, dct]) -> 2197 2198 Limit the search to the enzymes whose site is of size <site_size>.""" 2199 sites = [name for name in self if name.size == site_size] 2200 if not dct : 2201 return RestrictionBatch(sites).search(self.sequence) 2202 return dict([(k,v) for k,v in dct.iteritems() if k in site_size])
2203
2204 - def only_between(self, start, end, dct=None) :
2205 """A.only_between(start, end[, dct]) -> dict. 2206 2207 Enzymes that cut the sequence only in between start and end.""" 2208 start, end, test = self._boundaries(start, end) 2209 if not dct : 2210 dct = self.mapping 2211 d = dict(dct) 2212 for key, sites in dct.iteritems() : 2213 if not sites : 2214 del d[key] 2215 continue 2216 for site in sites: 2217 if test(start, end, site) : 2218 continue 2219 else : 2220 del d[key] 2221 break 2222 return d
2223
2224 - def between(self, start, end, dct=None) :
2225 """A.between(start, end [, dct]) -> dict. 2226 2227 Enzymes that cut the sequence at least in between start and end. 2228 They may cut outside as well.""" 2229 start, end, test = self._boundaries(start, end) 2230 d = {} 2231 if not dct : 2232 dct = self.mapping 2233 for key, sites in dct.iteritems() : 2234 for site in sites : 2235 if test(start, end, site) : 2236 d[key] = sites 2237 break 2238 continue 2239 return d
2240
2241 - def show_only_between(self, start, end, dct=None) :
2242 """A.show_only_between(start, end [, dct]) -> dict. 2243 2244 Enzymes that cut the sequence outside of the region 2245 in between start and end but do not cut inside.""" 2246 d = [] 2247 if start <= end : 2248 d = [(k, [vv for vv in v if start<=vv<=end]) 2249 for v in self.between(start, end, dct)] 2250 else : 2251 d = [(k, [vv for vv in v if start<=vv or vv <= end]) 2252 for v in self.between(start, end, dct)] 2253 return dict(d)
2254
2255 - def only_outside(self, start, end, dct = None) :
2256 """A.only_outside(start, end [, dct]) -> dict. 2257 2258 Enzymes that cut the sequence outside of the region 2259 in between start and end but do not cut inside.""" 2260 start, end, test = self._boundaries(start, end) 2261 if not dct : dct = self.mapping 2262 d = dict(dct) 2263 for key, sites in dct.iteritems() : 2264 if not sites : 2265 del d[key] 2266 continue 2267 for site in sites : 2268 if test(start, end, site) : 2269 del d[key] 2270 break 2271 else : 2272 continue 2273 return d
2274
2275 - def outside(self, start, end, dct=None) :
2276 """A.outside((start, end [, dct]) -> dict. 2277 2278 Enzymes that cut outside the region in between start and end. 2279 No test is made to know if they cut or not inside this region.""" 2280 start, end, test = self._boundaries(start, end) 2281 if not dct : 2282 dct = self.mapping 2283 d = {} 2284 for key, sites in dct.iteritems() : 2285 for site in sites : 2286 if test(start, end, site) : 2287 continue 2288 else : 2289 d[key] = sites 2290 break 2291 return d
2292 2293
2294 - def do_not_cut(self, start, end, dct = None) :
2295 """A.do_not_cut(start, end [, dct]) -> dict. 2296 2297 Enzymes that do not cut the region in between start and end.""" 2298 if not dct : 2299 dct = self.mapping 2300 d = self.without_site() 2301 d.update(self.only_outside(start, end, dct)) 2302 return d
2303 2304 # 2305 # The restriction enzyme classes are created dynamically when the module is 2306 # imported. Here is the magic which allow the creation of the 2307 # restriction-enzyme classes. 2308 # 2309 # The reason for the two dictionaries in Restriction_Dictionary 2310 # one for the types (which will be called pseudo-type as they really 2311 # correspond to the values that instances of RestrictionType can take) 2312 # and one for the enzymes is efficiency as the bases are evaluated 2313 # once per pseudo-type. 2314 # 2315 # However Restriction is still a very inefficient module at import. But 2316 # remember that around 660 classes (which is more or less the size of Rebase) 2317 # have to be created dynamically. However, this processing take place only 2318 # once. 2319 # This inefficiency is however largely compensated by the use of metaclass 2320 # which provide a very efficient layout for the class themselves mostly 2321 # alleviating the need of if/else loops in the class methods. 2322 # 2323 # It is essential to run Restriction with doc string optimisation (-OO switch) 2324 # as the doc string of 660 classes take a lot of processing. 2325 # 2326 CommOnly = RestrictionBatch() # commercial enzymes 2327 NonComm = RestrictionBatch() # not available commercially 2328 for TYPE, (bases, enzymes) in typedict.iteritems() : 2329 # 2330 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2331 # The names are not important and are only present to differentiate 2332 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2333 # These names will not be used after and the pseudo-types are not 2334 # kept in the locals() dictionary. It is therefore impossible to 2335 # import them. 2336 # Now, if you have look at the dictionary, you will see that not all the 2337 # types are present as those without corresponding enzymes have been 2338 # removed by Dictionary_Builder(). 2339 # 2340 # The values are tuples which contain 2341 # as first element a tuple of bases (as string) and 2342 # as second element the names of the enzymes. 2343 # 2344 # First eval the bases. 2345 # 2346 bases = tuple([eval(x) for x in bases]) 2347 # 2348 # now create the particular value of RestrictionType for the classes 2349 # in enzymes. 2350 # 2351 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2352 for k in enzymes : 2353 # 2354 # Now, we go through all the enzymes and assign them their type. 2355 # enzymedict[k] contains the values of the attributes for this 2356 # particular class (self.site, self.ovhg,....). 2357 # 2358 newenz = T(k, bases, enzymedict[k]) 2359 # 2360 # we add the enzymes to the corresponding batch. 2361 # 2362 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2363 # 2364 if newenz.is_comm() : CommOnly.add_nocheck(newenz) 2365 else : NonComm.add_nocheck(newenz) 2366 # 2367 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2368 # 2369 AllEnzymes = CommOnly | NonComm 2370 # 2371 # Now, place the enzymes in locals so they can be imported. 2372 # 2373 names = [str(x) for x in AllEnzymes] 2374 locals().update(dict(map(None, names, AllEnzymes))) 2375 __all__=['FormattedSeq', 'Analysis', 'RestrictionBatch','AllEnzymes','CommOnly','NonComm']+names 2376 del k, x, enzymes, TYPE, bases, names 2377