1
2
3
4 from binascii import crc32 as _crc32
5
7 """Returns the crc32 checksum for a sequence (string or Seq object)"""
8 try :
9
10 return _crc32(seq.tostring())
11 except AttributeError :
12
13 return _crc32(seq)
14
16 _table_h = []
17 for i in range(256):
18 l = i
19 part_h = 0
20 for j in range(8):
21 rflag = l & 1
22 l >>= 1
23 if part_h & 1: l |= (1L << 31)
24 part_h >>= 1L
25 if rflag: part_h ^= 0xd8000000L
26 _table_h.append(part_h)
27 return _table_h
28
29
30 _table_h = _init_table_h()
31
33 """Returns the crc64 checksum for a sequence (string or Seq object)"""
34 crcl = 0
35 crch = 0
36 for c in s:
37 shr = (crch & 0xFF) << 24
38 temp1h = crch >> 8
39 temp1l = (crcl >> 8) | shr
40 idx = (crcl ^ ord(c)) & 0xFF
41 crch = temp1h ^ _table_h[idx]
42 crcl = temp1l
43
44 return "CRC-%08X%08X" % (crch, crcl)
45
46
48 """Returns the GCG checksum (int) for a sequence (string or Seq object)
49
50 Given a nucleotide or amino-acid secuence (or any string),
51 returns the GCG checksum (int). Checksum used by GCG program.
52 seq type = str.
53 Based on BioPerl GCG_checksum. Adapted by Sebastian Bassi
54 with the help of John Lenton, Pablo Ziliani, and Gabriel Genellina.
55 All sequences are converted to uppercase """
56 index = checksum = 0
57 if type(seq)!=type("aa"):
58 seq=seq.tostring()
59 for char in seq:
60 index += 1
61 checksum += index * ord(char.upper())
62 if index == 57: index = 0
63 return checksum % 10000
64
66 """Returns the SEGUID (string) for a sequence (string or Seq object)
67
68 Given a nucleotide or amino-acid secuence (or any string),
69 returns the SEGUID string (A SEquence Globally Unique IDentifier).
70 seq type = str.
71 For more information about SEGUID, see:
72 http://bioinformatics.anl.gov/seguid/
73 DOI: 10.1002/pmic.200600032 """
74 try:
75
76 import hashlib
77 m = hashlib.sha1()
78 except:
79
80 import sha
81 m = sha.new()
82 import base64
83 if type(seq)!=type("aa"):
84 seq=seq.tostring().upper()
85 else:
86 seq=seq.upper()
87 m.update(seq)
88 try:
89
90 return base64.b64encode(m.digest()).rstrip("=")
91 except:
92
93 import os
94
95
96
97 return base64.encodestring(m.digest()).replace("\n","").rstrip("=")
98
99 if __name__ == "__main__" :
100 print "Quick self test"
101
102 str_light_chain_one = "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGK" \
103 + "APKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGLQAEDEADY" \
104 + "YCSSYAGSSTLVFGGGTKLTVL"
105
106 str_light_chain_two = "QSALTQPASVSGSPGQSITISCTGTSSDVGSYNLVSWYQQHPGK" \
107 + "APKLMIYEGSKRPSGVSNRFSGSKSGNTASLTISGLQAEDEADY" \
108 + "YCCSYAGSSTWVFGGGTKLTVL"
109
110 assert crc64(str_light_chain_one) == crc64(str_light_chain_two)
111 assert 'CRC-44CAAD88706CC153' == crc64(str_light_chain_one)
112
113 assert 'BpBeDdcNUYNsdk46JoJdw7Pd3BI' == seguid(str_light_chain_one)
114 assert 'X5XEaayob1nZLOc7eVT9qyczarY' == seguid(str_light_chain_two)
115
116 print "Done"
117