1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 """ SCOP: Structural Classification of Proteins.
17
18 The SCOP database aims to provide a manually constructed classification of
19 all know protein structures into a hierarchy, the main levels of which
20 are family, superfamily and fold.
21
22 * "SCOP":http://scop.mrc-lmb.cam.ac.uk/scop/
23
24 * "Introduction":http://scop.mrc-lmb.cam.ac.uk/scop/intro.html
25
26 * "SCOP parsable files":http://scop.mrc-lmb.cam.ac.uk/scop/parse/
27
28 The Scop object in this module represents the entire SCOP classification. It
29 can be built from the three SCOP parsable files, modified is so desired, and
30 converted back to the same file formats. A single SCOP domain (represented
31 by the Domain class) can be obtained from Scop using the domain's SCOP
32 identifier (sid).
33
34
35 nodeCodeDict -- A mapping between known 2 letter node codes and a longer
36 description. The known node types are 'cl' (class), 'cf'
37 (fold), 'sf' (superfamily), 'fa' (family), 'dm' (domain),
38 'sp' (species), 'px' (domain). Additional node types may
39 be added in the future.
40
41 This module also provides code to access SCOP over the WWW.
42
43 Functions:
44 search -- Access the main CGI script.
45 _open -- Internally used function.
46
47 """
48
49 from types import *
50 import os
51
52 import Des
53 import Cla
54 import Hie
55 from Residues import *
56 from Bio import SeqIO
57 from Bio.Seq import Seq
58
59
60 nodeCodeDict = { 'cl':'class', 'cf':'fold', 'sf':'superfamily',
61 'fa':'family', 'dm':'protein', 'sp':'species', 'px':'domain'}
62
63
64 _nodetype_to_code= { 'class': 'cl', 'fold': 'cf', 'superfamily': 'sf',
65 'family': 'fa', 'protein': 'dm', 'species': 'sp', 'domain': 'px'}
66
67 nodeCodeOrder = [ 'ro', 'cl', 'cf', 'sf', 'fa', 'dm', 'sp', 'px' ]
68
69 astralBibIds = [10,20,25,30,35,40,50,70,90,95,100]
70
71 astralEvs = [10, 5, 1, 0.5, 0.1, 0.05, 0.01, 0.005, 0.001, 1e-4, 1e-5, 1e-10, 1e-15,
72 1e-20, 1e-25, 1e-50]
73
74 astralEv_to_file = { 10: 'e+1', 5: 'e+0,7', 1: 'e+0', 0.5: 'e-0,3', 0.1: 'e-1',
75 0.05: 'e-1,3', 0.01: 'e-2', 0.005: 'e-2,3', 0.001: 'e-3',
76 1e-4: 'e-4', 1e-5: 'e-5', 1e-10: 'e-10', 1e-15: 'e-15',
77 1e-20: 'e-20', 1e-25: 'e-25', 1e-50: 'e-50' }
78
79 astralEv_to_sql = { 10: 'e1', 5: 'e0_7', 1: 'e0', 0.5: 'e_0_3', 0.1: 'e_1',
80 0.05: 'e_1_3', 0.01: 'e_2', 0.005: 'e_2_3', 0.001: 'e_3',
81 1e-4: 'e_4', 1e-5: 'e_5', 1e-10: 'e_10', 1e-15: 'e_15',
82 1e-20: 'e_20', 1e-25: 'e_25', 1e-50: 'e_50' }
83
84
86 """Order SCOP concise classification strings (sccs).
87
88 a.4.5.1 < a.4.5.11 < b.1.1.1
89
90 A sccs (e.g. a.4.5.11) compactly represents a domain's classification.
91 The letter represents the class, and the numbers are the fold,
92 superfamily, and family, respectively.
93
94 """
95
96 s1 = sccs1.split(".")
97 s2 = sccs2.split(".")
98
99 if s1[0] != s2[0]: return cmp(s1[0], s2[0])
100
101 s1 = map(int, s1[1:])
102 s2 = map(int, s2[1:])
103
104 return cmp(s1,s2)
105
106
107 _domain_re = re.compile(r">?([\w_\.]*)\s+([\w\.]*)\s+\(([^)]*)\) (.*)")
108
109 -def parse_domain(str) :
110 """Convert an ASTRAL header string into a Scop domain.
111
112 An ASTRAL (http://astral.stanford.edu/) header contains a concise
113 description of a SCOP domain. A very similar format is used when a
114 Domain object is converted into a string. The Domain returned by this
115 method contains most of the SCOP information, but it will not be located
116 within the SCOP hierarchy (i.e. The parent node will be None). The
117 description is composed of the SCOP protein and species descriptions.
118
119 A typical ASTRAL header looks like --
120 >d1tpt_1 a.46.2.1 (1-70) Thymidine phosphorylase {Escherichia coli}
121 """
122
123 m = _domain_re.match(str)
124 if (not m) : raise ValueError("Domain: "+ str)
125
126 dom = Domain()
127 dom.sid = m.group(1)
128 dom.sccs = m.group(2)
129 dom.residues = Residues(m.group(3))
130 if not dom.residues.pdbid :
131 dom.residues.pdbid= dom.sid[1:5]
132 dom.description = m.group(4).strip()
133
134 return dom
135
137 filename = "dir.%s.scop.txt_%s" % (filetype,version)
138 handle = open(os.path.join( scop_dir_path, filename))
139 return handle
140
141
143 """The entire SCOP hierarchy.
144
145 root -- The root node of the hierarchy
146 """
147 - def __init__(self, cla_handle=None, des_handle=None, hie_handle=None,
148 dir_path=None, db_handle=None, version=None):
149 """Build the SCOP hierarchy from the SCOP parsable files, or a sql backend.
150
151 If no file handles are given, then a Scop object with a single
152 empty root node is returned.
153
154 If a directory and version are given (with dir_path=.., version=...) or
155 file handles for each file, the whole scop tree will be built in memory.
156
157 If a MySQLdb database handle is given, the tree will be built as needed,
158 minimising construction times. To build the SQL database to the methods
159 write_xxx_sql to create the tables.
160
161 """
162 self._sidDict = {}
163 self._sunidDict = {}
164
165 if cla_handle==des_handle==hie_handle==dir_path==db_handle==None: return
166
167 if dir_path is None and db_handle is None:
168 if cla_handle == None or des_handle==None or hie_handle==None:
169 raise RuntimeError("Need CLA, DES and HIE files to build SCOP")
170
171 sunidDict = {}
172
173 self.db_handle = db_handle
174 try:
175
176 if db_handle:
177
178 pass
179
180 else:
181
182 if dir_path:
183 if not version :
184 raise RuntimeError("Need SCOP version to find parsable files in directory")
185 if cla_handle or des_handle or hie_handle:
186 raise RuntimeError("Cannot specify SCOP directory and specific files")
187
188 cla_handle = _open_scop_file( dir_path, version, 'cla')
189 des_handle = _open_scop_file( dir_path, version, 'des')
190 hie_handle = _open_scop_file( dir_path, version, 'hie')
191
192 root = Node()
193 domains = []
194 root.sunid=0
195 root.type='ro'
196 sunidDict[root.sunid] = root
197 self.root = root
198 root.description = 'SCOP Root'
199
200
201 records = Des.parse(des_handle)
202 for record in records:
203 if record.nodetype =='px' :
204 n = Domain()
205 n.sid = record.name
206 domains.append(n)
207 else :
208 n = Node()
209 n.sunid = record.sunid
210 n.type = record.nodetype
211 n.sccs = record.sccs
212 n.description = record.description
213
214 sunidDict[n.sunid] = n
215
216
217 records = Hie.parse(hie_handle)
218 for record in records:
219 if record.sunid not in sunidDict :
220 print record.sunid
221
222 n = sunidDict[record.sunid]
223
224 if record.parent != '' :
225
226 if record.parent not in sunidDict:
227 raise ValueError("Incomplete data?")
228
229 n.parent = sunidDict[record.parent]
230
231 for c in record.children:
232 if c not in sunidDict :
233 raise ValueError("Incomplete data?")
234 n.children.append(sunidDict[c])
235
236
237
238 sidDict = {}
239 records = Cla.parse(cla_handle)
240 for record in records:
241 n = sunidDict[record.sunid]
242 assert n.sccs == record.sccs
243 assert n.sid == record.sid
244 n.residues = record.residues
245 sidDict[n.sid] = n
246
247
248 self._sunidDict = sunidDict
249 self._sidDict = sidDict
250 self._domains = tuple(domains)
251
252 finally:
253 if dir_path :
254
255 if cla_handle : cla_handle.close()
256 if des_handle : des_handle.close()
257 if hie_handle : hie_handle.close()
258
259
262
263
264 - def getDomainBySid(self, sid) :
265 """Return a domain from its sid"""
266 if sid in self._sidDict:
267 return self._sidDict[sid]
268 if self.db_handle:
269 self.getDomainFromSQL(sid=sid)
270 if sid in self._sidDict:
271 return self._sidDict[sid]
272 else:
273 return None
274
275
277 """Return a node from its sunid"""
278 if sunid in self._sunidDict:
279 return self._sunidDict[sunid]
280 if self.db_handle:
281 self.getDomainFromSQL(sunid=sunid)
282 if sunid in self._sunidDict:
283 return self._sunidDict[sunid]
284 else:
285 return None
286
287
288 - def getDomains(self) :
289 """Returns an ordered tuple of all SCOP Domains"""
290 if self.db_handle:
291 return self.getRoot().getDescendents('px')
292 else:
293 return self._domains
294
295
296
305
306
316
317
326
327
328 - def getDomainFromSQL(self, sunid=None, sid=None):
329 """Load a node from the SQL backend using sunid or sid"""
330 if sunid==sid==None: return None
331
332 cur = self.db_handle.cursor()
333
334 if sid:
335 cur.execute("SELECT sunid FROM cla WHERE sid=%s", sid)
336 res = cur.fetchone()
337 if res is None:
338 return None
339 sunid = res[0]
340
341 cur.execute("SELECT * FROM des WHERE sunid=%s", sunid)
342 data = cur.fetchone()
343
344 if data is not None:
345 n = None
346
347
348 if data[1] != "px":
349 n = Node(scop=self)
350
351 cur.execute("SELECT child FROM hie WHERE parent=%s", sunid)
352 children = []
353 for c in cur.fetchall():
354 children.append(c[0])
355 n.children = children
356
357
358 else:
359 n = Domain(scop=self)
360 cur.execute("select sid, residues, pdbid from cla where sunid=%s",
361 sunid)
362
363 [n.sid,n.residues,pdbid] = cur.fetchone()
364 n.residues = Residues(n.residues)
365 n.residues.pdbid=pdbid
366 self._sidDict[n.sid] = n
367
368 [n.sunid,n.type,n.sccs,n.description] = data
369
370 if data[1] != 'ro':
371 cur.execute("SELECT parent FROM hie WHERE child=%s", sunid)
372 n.parent = cur.fetchone()[0]
373
374 n.sunid = int(n.sunid)
375
376 self._sunidDict[n.sunid] = n
377
378
390
391
393 """Get descendents of a node using the database backend. This avoids
394 repeated iteration of SQL calls and is therefore much quicker than
395 repeatedly calling node.getChildren().
396 """
397 if nodeCodeOrder.index(type) <= nodeCodeOrder.index(node.type): return []
398
399 des_list = []
400
401
402
403 if node.type == 'ro':
404 for c in node.getChildren():
405 for d in self.getDescendentsFromSQL(c,type):
406 des_list.append(d)
407 return des_list
408
409 cur = self.db_handle.cursor()
410
411
412 if type != 'px':
413 cur.execute("SELECT DISTINCT des.sunid,des.type,des.sccs,description FROM \
414 cla,des WHERE cla."+node.type+"=%s AND cla."+type+"=des.sunid", (node.sunid))
415 data = cur.fetchall()
416 for d in data:
417 if int(d[0]) not in self._sunidDict:
418 n = Node(scop=self)
419 [n.sunid,n.type,n.sccs,n.description] = d
420 n.sunid=int(n.sunid)
421 self._sunidDict[n.sunid] = n
422
423 cur.execute("SELECT parent FROM hie WHERE child=%s", n.sunid)
424 n.parent = cur.fetchone()[0]
425
426 cur.execute("SELECT child FROM hie WHERE parent=%s", n.sunid)
427 children = []
428 for c in cur.fetchall():
429 children.append(c[0])
430 n.children = children
431
432
433 des_list.append( self._sunidDict[int(d[0])] )
434
435 else:
436 cur.execute("SELECT cla.sunid,sid,pdbid,residues,cla.sccs,type,description,sp\
437 FROM cla,des where cla.sunid=des.sunid and cla."+node.type+"=%s",
438 node.sunid)
439
440 data = cur.fetchall()
441 for d in data:
442 if int(d[0]) not in self._sunidDict:
443 n = Domain(scop=self)
444
445
446 [n.sunid,n.sid, pdbid,n.residues,n.sccs,n.type,n.description,
447 n.parent] = d[0:8]
448 n.residues = Residues(n.residues)
449 n.residues.pdbid = pdbid
450 n.sunid = int(n.sunid)
451 self._sunidDict[n.sunid] = n
452 self._sidDict[n.sid] = n
453
454
455 des_list.append( self._sunidDict[int(d[0])] )
456
457 return des_list
458
459
460
461
463 """Write HIE data to SQL database"""
464 cur = handle.cursor()
465
466 cur.execute("DROP TABLE IF EXISTS hie")
467 cur.execute("CREATE TABLE hie (parent INT, child INT, PRIMARY KEY (child),\
468 INDEX (parent) )")
469
470 for p in self._sunidDict.values():
471 for c in p.children:
472 cur.execute("INSERT INTO hie VALUES (%s,%s)" % (p.sunid, c.sunid))
473
474
476 """Write CLA data to SQL database"""
477 cur = handle.cursor()
478
479 cur.execute("DROP TABLE IF EXISTS cla")
480 cur.execute("CREATE TABLE cla (sunid INT, sid CHAR(8), pdbid CHAR(4),\
481 residues VARCHAR(50), sccs CHAR(10), cl INT, cf INT, sf INT, fa INT,\
482 dm INT, sp INT, px INT, PRIMARY KEY (sunid), INDEX (SID) )")
483
484 for n in self._sidDict.values():
485 c = n.toClaRecord()
486 cur.execute( "INSERT INTO cla VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)",
487 (n.sunid, n.sid, c.residues.pdbid, c.residues, n.sccs,
488 n.getAscendent('cl').sunid, n.getAscendent('cf').sunid,
489 n.getAscendent('sf').sunid, n.getAscendent('fa').sunid,
490 n.getAscendent('dm').sunid, n.getAscendent('sp').sunid,
491 n.sunid ))
492
493
495 """Write DES data to SQL database"""
496 cur = handle.cursor()
497
498 cur.execute("DROP TABLE IF EXISTS des")
499 cur.execute("CREATE TABLE des (sunid INT, type CHAR(2), sccs CHAR(10),\
500 description VARCHAR(255),\
501 PRIMARY KEY (sunid) )")
502
503 for n in self._sunidDict.values():
504 cur.execute( "INSERT INTO des VALUES (%s,%s,%s,%s)",
505 ( n.sunid, n.type, n.sccs, n.description ) )
506
507
508
510 """ A node in the Scop hierarchy
511
512 sunid -- SCOP unique identifiers. e.g. '14986'
513
514 parent -- The parent node
515
516 children -- A list of child nodes
517
518 sccs -- SCOP concise classification string. e.g. 'a.1.1.2'
519
520 type -- A 2 letter node type code. e.g. 'px' for domains
521
522 description --
523
524 """
526 """Create a Node in the scop hierarchy. If a Scop instance is provided to the
527 constructor, this will be used to lookup related references using the SQL
528 methods. If no instance is provided, it is assumed the whole tree exists
529 and is connected."""
530 self.sunid=''
531 self.parent = None
532 self.children=[]
533 self.sccs = ''
534 self.type =''
535 self.description =''
536 self.scop=scop
537
546
548 """Return an Hie.Record"""
549 rec = Hie.Record()
550 rec.sunid = str(self.sunid)
551 if self.getParent() :
552 rec.parent = str(self.getParent().sunid)
553 else:
554 rec.parent = '-'
555 for c in self.getChildren() :
556 rec.children.append(str(c.sunid))
557 return rec
558
560 """Return a Des.Record"""
561 rec = Des.Record()
562 rec.sunid = str(self.sunid)
563 rec.nodetype = self.type
564 rec.sccs = self.sccs
565 rec.description = self.description
566 return rec
567
569 """Return a list of children of this Node"""
570 if self.scop is None:
571 return self.children
572 else:
573 return map ( self.scop.getNodeBySunid, self.children )
574
576 """Return the parent of this Node"""
577 if self.scop is None:
578 return self.parent
579 else:
580 return self.scop.getNodeBySunid( self.parent )
581
583 """ Return a list of all decendent nodes of the given type. Node type can a
584 two letter code or longer description. e.g. 'fa' or 'family'
585 """
586 if node_type in _nodetype_to_code:
587 node_type = _nodetype_to_code[node_type]
588
589 nodes = [self]
590 if self.scop:
591 return self.scop.getDescendentsFromSQL(self,node_type)
592 while nodes[0].type != node_type:
593 if nodes[0].type == 'px' : return []
594 child_list = []
595 for n in nodes:
596 for child in n.getChildren():
597 child_list.append( child )
598 nodes = child_list
599
600 return nodes
601
602
604 """ Return the ancenstor node of the given type, or None.Node type can a
605 two letter code or longer description. e.g. 'fa' or 'family'"""
606 if node_type in _nodetype_to_code:
607 node_type = _nodetype_to_code[node_type]
608
609 if self.scop:
610 return self.scop.getAscendentFromSQL(self,node_type)
611 else:
612 n = self
613 if n.type == node_type: return None
614 while n.type != node_type:
615 if n.type == 'ro': return None
616 n = n.getParent()
617
618 return n
619
620
621
622 -class Domain(Node) :
623 """ A SCOP domain. A leaf node in the Scop hierarchy.
624
625 sid -- The SCOP domain identifier. e.g. 'd5hbib_'
626
627 residues -- A Residue object. It defines the collection
628 of PDB atoms that make up this domain.
629 """
630 - def __init__(self,scop=None) :
631 Node.__init__(self,scop=scop)
632 self.sid = ''
633 self.residues = None
634
635 - def __str__(self) :
636 s = []
637 s.append(self.sid)
638 s.append(self.sccs)
639 s.append("("+str(self.residues)+")")
640
641 if not self.getParent() :
642 s.append(self.description)
643 else :
644 sp = self.getParent()
645 dm = sp.getParent()
646 s.append(dm.description)
647 s.append("{"+sp.description+"}")
648
649 return " ".join(s)
650
651 - def toDesRecord(self):
652 """Return a Des.Record"""
653 rec = Node.toDesRecord(self)
654 rec.name = self.sid
655 return rec
656
657 - def toClaRecord(self) :
658 """Return a Cla.Record"""
659 rec = Cla.Record()
660 rec.sid = self.sid
661 rec.residues = self.residues
662 rec.sccs = self.sccs
663 rec.sunid = self.sunid
664
665 n = self
666 while n.sunid != 0:
667 rec.hierarchy.append( (n.type, str(n.sunid)) )
668 n = n.getParent()
669
670 rec.hierarchy.reverse()
671
672 return rec
673
675 """Abstraction of the ASTRAL database, which has sequences for all the SCOP domains,
676 as well as clusterings by percent id or evalue.
677 """
678
679 - def __init__( self, dir_path=None, version=None, scop=None,
680 astral_file=None, db_handle=None):
681 """
682 Initialise the astral database.
683
684 You must provide either a directory of SCOP files:
685
686 dir_path - string, the path to location of the scopseq-x.xx directory
687 (not the directory itself), and
688 version -a version number.
689
690 or, a FASTA file:
691
692 astral_file - string, a path to a fasta file (which will be loaded in memory)
693
694 or, a MYSQL database:
695
696 db_handle - a database handle for a MYSQL database containing a table
697 'astral' with the astral data in it. This can be created
698 using writeToSQL.
699 """
700
701 if astral_file==dir_path==db_handle==None:
702 raise RuntimeError("Need either file handle, or (dir_path + "\
703 + "version) or database handle to construct Astral")
704 if not scop:
705 raise RuntimeError("Must provide a Scop instance to construct")
706
707 self.scop = scop
708 self.db_handle = db_handle
709
710
711 if not astral_file and not db_handle:
712 if dir_path == None or version == None:
713 raise RuntimeError("must provide dir_path and version")
714
715 self.version = version
716 self.path = os.path.join( dir_path, "scopseq-%s" % version)
717 astral_file = "astral-scopdom-seqres-all-%s.fa" % self.version
718 astral_file = os.path.join (self.path, astral_file)
719
720 if astral_file:
721
722 self.fasta_dict = SeqIO.to_dict(SeqIO.parse(open(astral_file), "fasta"))
723
724 self.astral_file = astral_file
725 self.EvDatasets = {}
726 self.EvDatahash = {}
727 self.IdDatasets = {}
728 self.IdDatahash = {}
729
730
732 """get domains clustered by evalue"""
733 if id not in self.EvDatasets:
734 if self.db_handle:
735 self.EvDatasets[id] = self.getAstralDomainsFromSQL(astralEv_to_sql[id])
736
737 else:
738 if not self.path:
739 raise RuntimeError("No scopseq directory specified")
740
741 file_prefix = "astral-scopdom-seqres-sel-gs"
742 filename = "%s-e100m-%s-%s.id" % (file_prefix, astralEv_to_file[id] ,
743 self.version)
744 filename = os.path.join(self.path,filename)
745 self.EvDatasets[id] = self.getAstralDomainsFromFile(filename)
746 return self.EvDatasets[id]
747
748
750 """get domains clustered by percent id"""
751 if id not in self.IdDatasets:
752 if self.db_handle:
753 self.IdDatasets[id] = self.getAstralDomainsFromSQL("id"+str(id))
754
755 else:
756 if not self.path:
757 raise RuntimeError("No scopseq directory specified")
758
759 file_prefix = "astral-scopdom-seqres-sel-gs"
760 filename = "%s-bib-%s-%s.id" % (file_prefix, id, self.version)
761 filename = os.path.join(self.path,filename)
762 self.IdDatasets[id] = self.getAstralDomainsFromFile(filename)
763 return self.IdDatasets[id]
764
765
766 - def getAstralDomainsFromFile(self,filename=None,file_handle=None):
767 """Get the scop domains from a file containing a list of sids"""
768 if file_handle == filename == none:
769 raise RuntimeError("You must provide a filename or handle")
770 if not file_handle:
771 file_handle = open(filename)
772 doms = []
773 while 1:
774 line = file_handle.readline()
775 if not line:
776 break
777 line = line.rstrip()
778 doms.append(line)
779 if filename:
780 file_handle.close()
781
782 doms = filter( lambda a: a[0]=='d', doms )
783 doms = map( self.scop.getDomainBySid, doms )
784 return doms
785
786 - def getAstralDomainsFromSQL(self, column):
787 """Load a set of astral domains from a column in the astral table of a MYSQL
788 database (which can be created with writeToSQL(...)"""
789 cur = self.db_handle.cursor()
790 cur.execute("SELECT sid FROM astral WHERE "+column+"=1")
791 data = cur.fetchall()
792 data = map( lambda x: self.scop.getDomainBySid(x[0]), data)
793
794 return data
795
796
798 """get the seq record of a given domain from its sid"""
799 if self.db_handle is None:
800 return self.fasta_dict[domain].seq
801
802 else:
803 cur = self.db_handle.cursor()
804 cur.execute("SELECT seq FROM astral WHERE sid=%s", domain)
805 return Seq(cur.fetchone()[0])
806
808 """Return seq associated with domain"""
809 return self.getSeqBySid(domain.sid)
810
811
812 - def hashedDomainsById(self,id):
813 """Get domains clustered by sequence identity in a dict"""
814 if id not in self.IdDatahash:
815 self.IdDatahash[id] = {}
816 for d in self.domainsClusteredById(id):
817 self.IdDatahash[id][d] = 1
818 return self.IdDatahash[id]
819
820 - def hashedDomainsByEv(self,id):
821 """Get domains clustered by evalue in a dict"""
822 if id not in self.EvDatahash:
823 self.EvDatahash[id] = {}
824 for d in self.domainsClusteredByEv(id):
825 self.EvDatahash[id][d] = 1
826 return self.EvDatahash[id]
827
828
829 - def isDomainInId(self,dom,id):
830 """Returns true if the domain is in the astral clusters for percent ID"""
831 return dom in self.hashedDomainsById(id)
832
833 - def isDomainInEv(self,dom,id):
834 """Returns true if the domain is in the ASTRAL clusters for evalues"""
835 return dom in self.hashedDomainsByEv(id)
836
837
839 """Write the ASTRAL database to a MYSQL database"""
840 cur = db_handle.cursor()
841
842 cur.execute("DROP TABLE IF EXISTS astral")
843 cur.execute("CREATE TABLE astral (sid CHAR(8), seq TEXT, PRIMARY KEY (sid))")
844
845 for dom in self.fasta_dict.keys():
846 cur.execute( "INSERT INTO astral (sid,seq) values (%s,%s)",
847 (dom, self.fasta_dict[dom].seq.data))
848
849
850 for i in astralBibIds:
851 cur.execute("ALTER TABLE astral ADD (id"+str(i)+" TINYINT)")
852
853 for d in self.domainsClusteredById(i):
854 cur.execute("UPDATE astral SET id"+str(i)+"=1 WHERE sid=%s",
855 d.sid)
856
857 for ev in astralEvs:
858 cur.execute("ALTER TABLE astral ADD ("+astralEv_to_sql[ev]+" TINYINT)")
859
860 for d in self.domainsClusteredByEv(ev):
861
862 cur.execute("UPDATE astral SET "+astralEv_to_sql[ev]+"=1 WHERE sid=%s",
863 d.sid)
864
865 -def search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
866 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds):
867 """search(pdb=None, key=None, sid=None, disp=None, dir=None, loc=None,
868 cgi='http://scop.mrc-lmb.cam.ac.uk/scop/search.cgi', **keywds)
869
870 Access search.cgi and return a handle to the results. See the
871 online help file for an explanation of the parameters:
872 http://scop.mrc-lmb.cam.ac.uk/scop/help.html
873
874 Raises an IOError if there's a network error.
875
876 """
877 params = {'pdb' : pdb, 'key' : key, 'sid' : sid, 'disp' : disp,
878 'dir' : dir, 'loc' : loc}
879 variables = {}
880 for k in params.keys():
881 if params[k] is not None:
882 variables[k] = params[k]
883 variables.update(keywds)
884 return _open(cgi, variables)
885
886 -def _open(cgi, params={}, get=1):
887 """_open(cgi, params={}, get=1) -> UndoHandle
888
889 Open a handle to SCOP. cgi is the URL for the cgi script to access.
890 params is a dictionary with the options to pass to it. get is a boolean
891 that describes whether a GET should be used. Does some
892 simple error checking, and will raise an IOError if it encounters one.
893
894 """
895 import urllib
896 from Bio import File
897
898 options = urllib.urlencode(params)
899 if get:
900 fullcgi = cgi
901 if options:
902 fullcgi = "%s?%s" % (cgi, options)
903 handle = urllib.urlopen(fullcgi)
904 else:
905 handle = urllib.urlopen(cgi, options)
906
907
908 uhandle = File.UndoHandle(handle)
909
910 return uhandle
911