1
2
3
4 """Martel based parser to read LocusLink flat files.
5
6 This is a huge regular expression for LocusLink,
7 built using the 'regular expressions on steroids' capabilities of
8 Martel.
9
10 A description of the format can be found in the 'ligand.doc' file
11 from the Ligand distribution, available from:
12
13 http://www.ncbi.nih.gov/LocusLink
14
15
16 """
17
18
19 from Martel import Str
20 from Martel import Str1
21 from Martel import Alt
22 from Martel import Rep
23 from Martel import Group
24 from Martel import ToEol
25 from Martel import AnyEol
26 from Martel import Any
27 from Martel import Word
28 from Martel import Opt
29 from Martel import AssertNot
30
31 from Martel import RecordReader
32
33 import warnings
34 warnings.warn("Bio.LocusLink was deprecated, as NCBI's LocusLink was superceded by Entrez Gene. If you still need this module, please get in touch with the Biopython developers (biopython-dev@biopython.org) to avoid permanent removal of this module", DeprecationWarning)
35
36
37
38 INDENT = 12
39
40 blank_spaces = Rep(Str1(" "))
41 point = Str1(".")
42
43 white_space = Rep( Any( " " ) )
44 locus_keys = [ \
45 'LOCUSID', \
46 'LOCUS_CONFIRMED', \
47 'LOCUS_TYPE', \
48 'ORGANISM', \
49 'STATUS', \
50 'NM', \
51 'NP', \
52 'CDD', \
53 'PRODUCT', \
54 'ASSEMBLY', \
55 'CONTIG', \
56 'EVID', \
57 'XM', \
58 'XP', \
59 'ACCNUM', \
60 'TYPE', \
61 'PROT', \
62 'OFFICIAL_SYMBOL', \
63 'OFFICIAL_GENE_NAME', \
64 'PREFERRED_PRODUCT', \
65 'ALIAS_SYMBOL', \
66 'SUMMARY', \
67 'CHR', \
68 'STS', \
69 'COMP', \
70 'ALIAS_PROT', \
71 'UNIGENE', \
72 'BUTTON', \
73 'LINK', \
74 'OMIM', \
75 'MAP', \
76 'MAPLINK', \
77 'ECNUM', \
78 'PROTOTYPE', \
79 'DB_DESCR', \
80 'DB_LINK', \
81 'PMID', \
82 'GRIF', \
83 'SUBFUNC', \
84 'GO', \
85 'EXTANNOT'
86
87 ]
88
89 accnum_block_keys = [ \
90 'ACCNUM', \
91 'TYPE', \
92 'PROT' \
93 ]
94 phenotype = Str1( 'PHENOTYPE' )
95 db = Str1( 'DB' )
96 accnum_block_key = Str( *accnum_block_keys )
97
98
99
100 valid_locus_key = Str( *locus_keys )
109
111 return Group( entry_name, \
112 define_locus_line( entry_tag ))
113
114 accnum_block = Group( 'accnum_block', \
115 define_locus_line( 'ACCNUM' ) + \
116 define_locus_line( 'TYPE' ) + \
117 Opt(define_locus_line( 'PROT' ) ) )
118
119 phenotype_block = Group( 'phenotype_block', \
120 define_locus_line( 'PHENOTYPE' ) + \
121 Opt( define_locus_line( 'PHENOTYPE_ID' ) ) )
122
123 db_block = Group( 'db_block', \
124 define_locus_line( 'DB_DESCR' ) + \
125 define_locus_line( 'DB_LINK' ) )
126
127 begin_record_line = Str1( '>>' ) + ToEol()
128 locus_line = Group( 'locus_line', \
129 white_space + AssertNot( accnum_block_key ) + AssertNot( phenotype ) + AssertNot( db ) + Word() + white_space + Str1( ':' ) + ToEol() )
130
131 locus_record = begin_record_line + Rep( Alt( locus_line, accnum_block, phenotype_block, db_block ) )
132
133