Package Bio :: Package expressions :: Module blocks
[hide private]
[frames] | no frames]

Source Code for Module Bio.expressions.blocks

 1  # This was tested against BLOCKS-12.0, June 2000 
 2   
 3  import warnings 
 4  warnings.warn("Bio.expressions was deprecated, as it does not work with recent versions of mxTextTools. If you want to continue to use this module, please get in contact with the Biopython developers at biopython-dev@biopython.org to avoid permanent removal of this module from Biopython", DeprecationWarning) 
 5   
 6   
 7  from Martel import * 
 8  from Martel import RecordReader 
 9  from Bio import Std 
10   
11  # Header goes up to the line starting with "ID" 
12  header = Rep(AssertNot(Str("ID   ")) + \ 
13               ToEol()) 
14   
15  # ID   kringle; BLOCK 
16  # ID   14-3-3; BLOCK 
17  #  but not! 
18  # IDSA_METJA|Q58270  (  46) GGKRIRPYLTV  11 
19  ID = Str("ID   ") + Std.dbid(ToSep(sep = ";"), {"type": "primary"}) + \ 
20       Str(" BLOCK") + AnyEol() 
21   
22  # AC   IPB000001A; distance from previous block=(10,266) 
23  AC = Str("AC   ") + Std.dbid(ToSep(sep = ";"), {"type": "accession"}) + \ 
24       Str(" distance from previous block=(") + \ 
25       Integer("dist1") + Str(",") + Integer("dist2") + \ 
26       Str(")") + AnyEol() 
27   
28   
29  # DE   Kringle domain 
30  #  If the DE line is long, it doen't fold .. it's all on one line 
31  DE = Str("DE   ") + ToEol("description") 
32   
33   
34  # BL   CCY;  width=14; seqs=44; 99.5%=717; strength=1059 
35  BL = Str("BL   ") + ToSep("protomat_id", ";") + \ 
36       Str("  width=") + Digits("width") + \ 
37       Str("; seqs=") + Digits("numseqs") + \ 
38       Str("; 99.5%=") + Digits("protomat_count") + \ 
39       Str("; strength=") + Digits("strength") + \ 
40       AnyEol() 
41   
42   
43  # PLMN_BOVIN|P06868  (  60) CEEETDFVCRAFQY  26 
44  # ^^^^^^^^^^^^^^^^^ 
45  #                     ^^^^-- number of segments 
46  #                           ^^^^^^^^^^^^^^-- matching sequence 
47  #                                           ^^-- weight 
48  # 
49  identifier = (Std.dbxref_dbid(UntilSep(sep = "|."),  
50                            {"dbname": "swissprot", "type": "primary"}) + \ 
51                Str("|") + \ 
52                Std.dbxref_dbid(UntilSep(sep = " "),  
53                                {"dbname": "swissprot", "type": "accession"})) |\ 
54                Std.dbxref_dbid(UntilSep(sep = " ")) 
55                                 
56  segment = AssertNot(Re(r".. ")) + \ 
57            identifier + \ 
58            Re(r" *\( *") + \ 
59            Integer("position") + \ 
60            Re(r"\) *") + \ 
61            Word("matching_sequence") + Spaces() + \ 
62            Digits("weight") + AnyEol() 
63   
64  segment_block = Rep1(segment | AnyEol()) 
65   
66  end = Str("//") + AnyEol() 
67   
68  record = Group("record", 
69                 ID + AC + DE + BL + segment_block + end) 
70   
71  format_expression = header + Rep1(record) 
72  format = HeaderFooter("dataset", {"format": "blocks/12"}, 
73                        header, RecordReader.Until, ("ID ",), 
74                        record, RecordReader.EndsWith, ("//\n",), 
75                        None, None, None) 
76   
77  if __name__ == "__main__": 
78      import os 
79      from xml.sax import saxutils 
80      filename = "/home/dalke/ftps/databases/blocks/unix/blocks-12.0/blocks.dat.Z" 
81      infile = os.popen("zcat " + filename) 
82      parser = format.make_parser(debug_level = 0) 
83      parser.setContentHandler(saxutils.XMLGenerator()) 
84      parser.parseFile(infile) 
85