Package Bio :: Module SeqRecord
[hide private]
[frames] | no frames]

Source Code for Module Bio.SeqRecord

  1  # Stores data about the sequence 
  2   
  3  # NEEDS TO BE SYNCH WITH THE REST OF BIOPYTHON AND BIOPERL 
  4  # In particular, the SeqRecord and BioSQL.BioSeq.DBSeqRecord classes 
  5  # need to be in sync (this is the BioSQL "Database SeqRecord", see 
  6  # also BioSQL.BioSeq.DBSeq which is the "Database Seq" class) 
  7   
8 -class SeqRecord:
9 """A SeqRecord object holds a sequence and information about it. 10 11 Main attributes: 12 id - Identifier such as a locus tag (string) 13 seq - The sequence itself (Seq object) 14 15 Additional attributes: 16 name - Sequence name, e.g. gene name (string) 17 description - Additional text (string) 18 dbxrefs - List of database cross references (list of strings) 19 features - Any (sub)features defined (list of SeqFeature objects) 20 annotations - Further information about the whole sequence (dictionary) 21 Most entries are lists of strings. 22 """
23 - def __init__(self, seq, id = "<unknown id>", name = "<unknown name>", 24 description = "<unknown description>", dbxrefs = None, 25 features = None):
26 """Create a SeqRecord. 27 28 Arguments: 29 seq - Sequence, required (Seq object) 30 id - Sequence identifier, recommended (string) 31 name - Sequence name, optional (string) 32 description - Sequence description, optional (string) 33 dbxrefs - Database cross references, optional (list of strings) 34 features - Any (sub)features, optional (list of SeqFeature objects) 35 36 Note that while an id is optional, we strongly recommend you supply a 37 unique id string for each record. This is especially important 38 if you wish to write your sequences to a file. 39 40 You can create a 'blank' SeqRecord object, and then populated the 41 attributes later. Note that currently the annotations dictionary 42 cannot be specified when creating the SeqRecord.""" 43 self.seq = seq 44 self.id = id 45 self.name = name 46 self.description = description 47 if dbxrefs is None: 48 dbxrefs = [] 49 self.dbxrefs = dbxrefs 50 # annotations about the whole sequence 51 self.annotations = {} 52 53 # annotations about parts of the sequence 54 if features is None: 55 features = [] 56 self.features = features
57
58 - def __str__(self) :
59 """A human readable summary of the record and its annotation.""" 60 lines = [] 61 if self.id : lines.append("ID: %s" % self.id) 62 if self.name : lines.append("Name: %s" % self.name) 63 if self.description : lines.append("Description: %s" % self.description) 64 if self.dbxrefs : lines.append("Database cross-references: " \ 65 + ", ".join(self.dbxrefs)) 66 for a in self.annotations: 67 lines.append("/%s=%s" % (a, str(self.annotations[a]))) 68 #Don't want to include the entire sequence, 69 #and showing the alphabet is useful: 70 lines.append(repr(self.seq)) 71 return "\n".join(lines)
72
73 - def __repr__(self) :
74 """A concise summary of the record for debugging.""" 75 return self.__class__.__name__ \ 76 + "(seq=%s, id=%s, name=%s, description=%s, dbxrefs=%s)" \ 77 % tuple(map(repr, (self.seq, self.id, self.name, 78 self.description, self.dbxrefs)))
79
80 - def __len__(self) :
81 """Returns the length of the sequence.""" 82 return len(self.seq)
83
84 - def __nonzero__(self) :
85 """Returns True regardless of the length of the sequence. 86 87 This behaviour is for backwards compatibility, since until the 88 __len__ method was added, a SeqRecord always evaluated as True. 89 90 Note that in comparison, a Seq object will evaluate to False if it 91 has a zero length sequence. 92 93 WARNING: The SeqRecord may in future evaluate to False when its 94 sequence is of zero length (in order to better match the Seq 95 object behaviour)! 96 """ 97 return True
98 99 if __name__ == "__main__" : 100 #The following is a very quick example of how to create a SeqRecord object 101 from Bio.Seq import Seq 102 from Bio.Alphabet import generic_protein 103 record = SeqRecord(Seq("MASRGVNKVILVGNLGQDPEVRYMPNGGAVANITLATSESWRDKAT" \ 104 +"GEMKEQTEWHRVVLFGKLAEVASEYLRKGSQVYIEGQLRTRKWTDQ" \ 105 +"SGQDRYTTEVVVNVGGTMQMLGGRQGGGAPAGGNIGGGQPQGGWGQ" \ 106 +"PQQPQGGNQFSGGAQSRPQQSAPAAPSNEPPMDFDDDIPF", 107 generic_protein), 108 id="NP_418483.1", name="b4059", 109 description="ssDNA-binding protein", 110 dbxrefs=["ASAP:13298", "GI:16131885", "GeneID:948570"]) 111 112 #Note that annotations must be added AFTER creating the record 113 record.annotations["note"] = "This annotation was added later" 114 115 print str(record) 116 print repr(record) 117 assert 178 == len(record) 118 119 #One way to create a minimal record. 120 record2 = SeqRecord(Seq("")) 121 assert record2 #True eeven though length is zero 122 assert not len(record2) 123