Package Bio :: Package LocusLink :: Module locus_format
[hide private]
[frames] | no frames]

Source Code for Module Bio.LocusLink.locus_format

  1   
  2   
  3   
  4  """Martel based parser to read LocusLink flat files. 
  5   
  6  This is a huge regular expression for LocusLink, 
  7  built using the 'regular expressions on steroids' capabilities of 
  8  Martel. 
  9   
 10  A description of the format can be found in the 'ligand.doc' file 
 11  from the Ligand distribution, available from: 
 12   
 13   http://www.ncbi.nih.gov/LocusLink 
 14   
 15   
 16  """ 
 17   
 18  # Martel 
 19  from Martel import Str 
 20  from Martel import Str1 
 21  from Martel import Alt 
 22  from Martel import Rep 
 23  from Martel import Group 
 24  from Martel import ToEol 
 25  from Martel import AnyEol 
 26  from Martel import Any 
 27  from Martel import Word 
 28  from Martel import Opt 
 29  from Martel import AssertNot 
 30   
 31  from Martel import RecordReader 
 32   
 33  import warnings 
 34  warnings.warn("Bio.LocusLink was deprecated, as NCBI's LocusLink was superceded by Entrez Gene. If you still need this module, please get in touch with the Biopython developers (biopython-dev@biopython.org) to avoid permanent removal of this module", DeprecationWarning) 
 35   
 36   
 37  # --- First set up some helper constants and functions 
 38  INDENT = 12 
 39   
 40  blank_spaces = Rep(Str1(" ")) 
 41  point = Str1(".") 
 42   
 43  white_space = Rep( Any( "       " ) ) 
 44  locus_keys = [ \ 
 45          'LOCUSID', \ 
 46          'LOCUS_CONFIRMED', \ 
 47          'LOCUS_TYPE', \ 
 48          'ORGANISM', \ 
 49          'STATUS', \ 
 50          'NM', \ 
 51          'NP', \ 
 52          'CDD', \ 
 53          'PRODUCT', \ 
 54          'ASSEMBLY', \ 
 55          'CONTIG', \ 
 56          'EVID', \ 
 57          'XM', \ 
 58          'XP', \ 
 59          'ACCNUM', \ 
 60          'TYPE', \ 
 61          'PROT', \ 
 62          'OFFICIAL_SYMBOL', \ 
 63          'OFFICIAL_GENE_NAME', \ 
 64          'PREFERRED_PRODUCT', \ 
 65          'ALIAS_SYMBOL', \ 
 66          'SUMMARY', \ 
 67          'CHR', \ 
 68          'STS', \ 
 69          'COMP', \ 
 70          'ALIAS_PROT', \ 
 71          'UNIGENE', \ 
 72          'BUTTON', \ 
 73          'LINK', \ 
 74          'OMIM', \ 
 75          'MAP', \ 
 76          'MAPLINK', \ 
 77          'ECNUM', \ 
 78          'PROTOTYPE', \ 
 79          'DB_DESCR', \ 
 80          'DB_LINK', \ 
 81          'PMID', \ 
 82          'GRIF', \ 
 83          'SUBFUNC', \ 
 84          'GO', \ 
 85          'EXTANNOT' 
 86           
 87          ] 
 88   
 89  accnum_block_keys = [ \ 
 90      'ACCNUM', \ 
 91      'TYPE', \ 
 92      'PROT' \ 
 93      ] 
 94  phenotype = Str1( 'PHENOTYPE' ) 
 95  db = Str1( 'DB' ) 
 96  accnum_block_key = Str( *accnum_block_keys ) 
 97   
 98   
 99   
100  valid_locus_key = Str( *locus_keys ) 
101 -def define_locus_line( entry_tag ):
102 103 return( white_space + \ 104 Str1(entry_tag ) + \ 105 white_space + \ 106 Str1( ":" ) + \ 107 white_space + \ 108 ToEol() )
109
110 -def define_locus_group( entry_name, entry_tag ):
111 return Group( entry_name, \ 112 define_locus_line( entry_tag )) 113 114 accnum_block = Group( 'accnum_block', \ 115 define_locus_line( 'ACCNUM' ) + \ 116 define_locus_line( 'TYPE' ) + \ 117 Opt(define_locus_line( 'PROT' ) ) ) 118 119 phenotype_block = Group( 'phenotype_block', \ 120 define_locus_line( 'PHENOTYPE' ) + \ 121 Opt( define_locus_line( 'PHENOTYPE_ID' ) ) ) 122 123 db_block = Group( 'db_block', \ 124 define_locus_line( 'DB_DESCR' ) + \ 125 define_locus_line( 'DB_LINK' ) ) 126 127 begin_record_line = Str1( '>>' ) + ToEol() 128 locus_line = Group( 'locus_line', \ 129 white_space + AssertNot( accnum_block_key ) + AssertNot( phenotype ) + AssertNot( db ) + Word() + white_space + Str1( ':' ) + ToEol() ) 130 131 locus_record = begin_record_line + Rep( Alt( locus_line, accnum_block, phenotype_block, db_block ) ) 132 #locus_record = Rep( locus_line ) 133