Package Bio :: Package SeqIO :: Module SwissIO
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqIO.SwissIO

  1  # Copyright 2006-2009 by Peter Cock and Michiel de Hoon. 
  2  # All rights reserved. 
  3  # 
  4  # This code is part of the Biopython distribution and governed by its 
  5  # license.  Please see the LICENSE file that should have been included 
  6  # as part of this package. 
  7   
  8  """Bio.SeqIO support for the "swiss" (aka SwissProt/UniProt) file format. 
  9   
 10  You are expected to use this module via the Bio.SeqIO functions. 
 11  See also the Bio.SwissProt module which offers more than just accessing 
 12  the sequences as SeqRecord objects.""" 
 13   
 14  from Bio import Seq 
 15  from Bio import SeqRecord 
 16  from Bio import Alphabet 
 17  from Bio import SeqFeature 
 18  from Bio import SwissProt 
 19       
 20  #This is a generator function! 
21 -def SwissIterator(handle) :
22 """Breaks up a Swiss-Prot/UniProt file into SeqRecord objects. 23 24 Every section from the ID line to the terminating // becomes 25 a single SeqRecord with associated annotation and features. 26 27 This parser is for the flat file "swiss" format as used by: 28 * Swiss-Prot aka SwissProt 29 * TrEMBL 30 * UniProtKB aka UniProt Knowledgebase 31 32 It does NOT read their new XML file format. 33 http://www.expasy.org/sprot/ 34 35 For consistency with BioPerl and EMBOSS we call this the "swiss" 36 format. 37 """ 38 swiss_records = SwissProt.parse(handle) 39 for swiss_record in swiss_records: 40 # Convert the SwissProt record to a SeqRecord 41 seq = Seq.Seq(swiss_record.sequence, Alphabet.generic_protein) 42 record = SeqRecord.SeqRecord(seq, 43 id=swiss_record.accessions[0], 44 name=swiss_record.entry_name, 45 description=swiss_record.description, 46 ) 47 record.description = swiss_record.description 48 for cross_reference in swiss_record.cross_references: 49 if len(cross_reference) < 2: continue 50 database, accession = cross_reference[:2] 51 dbxref = "%s:%s" % (database, accession) 52 if not dbxref in record.dbxrefs: 53 record.dbxrefs.append(dbxref) 54 annotations = record.annotations 55 annotations['accessions'] = swiss_record.accessions 56 annotations['date'] = swiss_record.created[0] 57 annotations['date_last_sequence_update'] = swiss_record.sequence_update[0] 58 if swiss_record.annotation_update: 59 annotations['date_last_annotation_update'] = swiss_record.annotation_update[0] 60 if swiss_record.gene_name: 61 annotations['gene_name'] = swiss_record.gene_name 62 annotations['organism'] = swiss_record.organism.rstrip(".") 63 annotations['taxonomy'] = swiss_record.organism_classification 64 annotations['ncbi_taxid'] = swiss_record.taxonomy_id 65 if swiss_record.host_organism: 66 annotations['organism_host'] = [word.rstrip(".") for word in swiss_record.host_organism] 67 if swiss_record.comments: 68 annotations['comment'] = "\n".join(swiss_record.comments) 69 if swiss_record.references: 70 annotations['references'] = [] 71 for reference in swiss_record.references: 72 feature = SeqFeature.Reference() 73 feature.comment = " ".join(["%s=%s;" % (key, value) for key, value in reference.comments]) 74 for key, value in reference.references: 75 if key=='PubMed': 76 feature.pubmed_id = value 77 elif key=='MEDLINE': 78 feature.medline_id = value 79 elif key=='DOI': 80 pass 81 elif key=='AGRICOLA': 82 pass 83 else: 84 raise ValueError, "Unknown key %s found in references" % key 85 feature.authors = reference.authors 86 feature.title = reference.title 87 feature.journal = reference.location 88 annotations['references'].append(feature) 89 if swiss_record.keywords: 90 record.annotations['keywords'] = swiss_record.keywords 91 yield record
92 93 if __name__ == "__main__" : 94 print "Quick self test..." 95 96 example_filename = "../../Tests/SwissProt/sp008" 97 98 import os 99 if not os.path.isfile(example_filename): 100 print "Missing test file %s" % example_filename 101 else : 102 #Try parsing it! 103 handle = open(example_filename) 104 records = SwissIterator(handle) 105 for record in records: 106 print record.name 107 print record.id 108 print record.annotations['keywords'] 109 print repr(record.annotations['organism']) 110 print record.seq.tostring()[:20] + "..." 111 handle.close() 112