Package Bio :: Package SeqUtils :: Module MeltingTemp
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqUtils.MeltingTemp

  1  # Copyright 2004-2008 by Sebastian Bassi. 
  2  # All rights reserved. 
  3  # This code is part of the Biopython distribution and governed by its 
  4  # license.  Please see the LICENSE file that should have been included 
  5  # as part of this package. 
  6   
  7  """Calculate the thermodynamic melting temperatures of nucleotide sequences.""" 
  8   
  9  import math 
10 -def Tm_staluc(s,dnac=50,saltc=50,rna=0):
11 """Returns DNA/DNA tm using nearest neighbor thermodynamics. 12 13 dnac is DNA concentration [nM] 14 saltc is salt concentration [mM]. 15 rna=0 is for DNA/DNA (default), for RNA, rna should be 1. 16 17 Sebastian Bassi <sbassi@genesdigitales.com>""" 18 19 #Credits: 20 #Main author: Sebastian Bassi <sbassi@genesdigitales.com> 21 #Overcount function: Greg Singer <singerg@tcd.ie> 22 #Based on the work of Nicolas Le Novere <lenov@ebi.ac.uk> Bioinformatics. 23 #17:1226-1227(2001) 24 25 #This function returns better results than EMBOSS DAN because it uses 26 #updated thermodynamics values and takes into account inicialization 27 #parameters from the work of SantaLucia (1998). 28 29 #Things to do: 30 #+Detect complementary sequences. Change K according to result. 31 #+Add support for heteroduplex (see Sugimoto et al. 1995). 32 #+Correction for Mg2+. Now supports only monovalent ions. 33 #+Put thermodinamics table in a external file for users to change at will 34 #+Add support for danglings ends (see Le Novele. 2001) and mismatches. 35 36 dh = 0 #DeltaH. Enthalpy 37 ds = 0 #deltaS Entropy 38 39 def tercorr(stri): 40 deltah = 0 41 deltas = 0 42 if rna==0: 43 #DNA/DNA 44 #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594 45 if stri.startswith('G') or stri.startswith('C'): 46 deltah -= 0.1 47 deltas += 2.8 48 elif stri.startswith('A') or stri.startswith('T'): 49 deltah -= 2.3 50 deltas -= 4.1 51 if stri.endswith('G') or stri.endswith('C'): 52 deltah -= 0.1 53 deltas += 2.8 54 elif stri.endswith('A') or stri.endswith('T'): 55 deltah -= 2.3 56 deltas -= 4.1 57 dhL = dh + deltah 58 dsL = ds + deltas 59 return dsL,dhL 60 elif rna==1: 61 #RNA 62 if stri.startswith('G') or stri.startswith('C'): 63 deltah -= 3.61 64 deltas -= 1.5 65 elif stri.startswith('A') or stri.startswith('T') or \ 66 stri.startswith('U'): 67 deltah -= 3.72 68 deltas += 10.5 69 if stri.endswith('G') or stri.endswith('C'): 70 deltah -= 3.61 71 deltas -= 1.5 72 elif stri.endswith('A') or stri.endswith('T') or \ 73 stri.endswith('U'): 74 deltah -= 3.72 75 deltas += 10.5 76 dhL = dh + deltah 77 dsL = ds + deltas 78 # print "delta h=",dhL 79 return dsL,dhL
80 81 def overcount(st,p): 82 """Returns how many p are on st, works even for overlapping""" 83 ocu = 0 84 x = 0 85 while 1: 86 try: 87 i = st.index(p,x) 88 except ValueError: 89 break 90 ocu += 1 91 x = i + 1 92 return ocu 93 94 R = 1.987 # universal gas constant in Cal/degrees C*Mol 95 sup = s.upper() 96 vsTC,vh = tercorr(sup) 97 vs = vsTC 98 99 k = (dnac/4.0)*1e-9 100 #With complementary check on, the 4.0 should be changed to a variable. 101 102 if rna==0: 103 #DNA/DNA 104 #Allawi and SantaLucia (1997). Biochemistry 36 : 10581-10594 105 vh = vh + (overcount(sup,"AA"))*7.9 + (overcount(sup,"TT"))*\ 106 7.9 + (overcount(sup,"AT"))*7.2 + (overcount(sup,"TA"))*7.2 \ 107 + (overcount(sup,"CA"))*8.5 + (overcount(sup,"TG"))*8.5 + \ 108 (overcount(sup,"GT"))*8.4 + (overcount(sup,"AC"))*8.4 109 vh = vh + (overcount(sup,"CT"))*7.8+(overcount(sup,"AG"))*\ 110 7.8 + (overcount(sup,"GA"))*8.2 + (overcount(sup,"TC"))*8.2 111 vh = vh + (overcount(sup,"CG"))*10.6+(overcount(sup,"GC"))*\ 112 9.8 + (overcount(sup,"GG"))*8 + (overcount(sup,"CC"))*8 113 vs = vs + (overcount(sup,"AA"))*22.2+(overcount(sup,"TT"))*\ 114 22.2 + (overcount(sup,"AT"))*20.4 + (overcount(sup,"TA"))*21.3 115 vs = vs + (overcount(sup,"CA"))*22.7+(overcount(sup,"TG"))*\ 116 22.7 + (overcount(sup,"GT"))*22.4 + (overcount(sup,"AC"))*22.4 117 vs = vs + (overcount(sup,"CT"))*21.0+(overcount(sup,"AG"))*\ 118 21.0 + (overcount(sup,"GA"))*22.2 + (overcount(sup,"TC"))*22.2 119 vs = vs + (overcount(sup,"CG"))*27.2+(overcount(sup,"GC"))*\ 120 24.4 + (overcount(sup,"GG"))*19.9 + (overcount(sup,"CC"))*19.9 121 ds = vs 122 dh = vh 123 124 else: 125 #RNA/RNA hybridisation of Xia et al (1998) 126 #Biochemistry 37: 14719-14735 127 vh = vh+(overcount(sup,"AA"))*6.82+(overcount(sup,"TT"))*6.6+\ 128 (overcount(sup,"AT"))*9.38 + (overcount(sup,"TA"))*7.69+\ 129 (overcount(sup,"CA"))*10.44 + (overcount(sup,"TG"))*10.5+\ 130 (overcount(sup,"GT"))*11.4 + (overcount(sup,"AC"))*10.2 131 vh = vh + (overcount(sup,"CT"))*10.48 + (overcount(sup,"AG"))\ 132 *7.6+(overcount(sup,"GA"))*12.44+(overcount(sup,"TC"))*13.3 133 vh = vh + (overcount(sup,"CG"))*10.64 + (overcount(sup,"GC"))\ 134 *14.88+(overcount(sup,"GG"))*13.39+(overcount(sup,"CC"))*12.2 135 vs = vs + (overcount(sup,"AA"))*19.0 + (overcount(sup,"TT"))*\ 136 18.4+(overcount(sup,"AT"))*26.7+(overcount(sup,"TA"))*20.5 137 vs = vs + (overcount(sup,"CA"))*26.9 + (overcount(sup,"TG"))*\ 138 27.8 + (overcount(sup,"GT"))*29.5 + (overcount(sup,"AC"))*26.2 139 vs = vs + (overcount(sup,"CT"))*27.1 + (overcount(sup,"AG"))*\ 140 19.2 + (overcount(sup,"GA"))*32.5 + (overcount(sup,"TC"))*35.5 141 vs = vs + (overcount(sup,"CG"))*26.7 + (overcount(sup,"GC"))\ 142 *36.9 + (overcount(sup,"GG"))*32.7 + (overcount(sup,"CC"))*29.7 143 ds = vs 144 dh = vh 145 146 ds = ds-0.368*(len(s)-1)*math.log(saltc/1e3) 147 tm = ((1000* (-dh))/(-ds+(R * (math.log(k)))))-273.15 148 # print "ds="+str(ds) 149 # print "dh="+str(dh) 150 return tm 151 152 if __name__ == "__main__" : 153 print "Quick self test" 154 assert Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA') == 59.865612727457972 155 assert Tm_staluc('CAGTCAGTACGTACGTGTACTGCCGTA',rna=1) == 68.141611264576682 156 print "Done" 157