Package Bio :: Package Medline
[hide private]
[frames] | no frames]

Source Code for Package Bio.Medline

  1  # Copyright 1999 by Jeffrey Chang.  All rights reserved. 
  2  # This code is part of the Biopython distribution and governed by its 
  3  # license.  Please see the LICENSE file that should have been included 
  4  # as part of this package. 
  5   
  6  """ 
  7  This module provides code to work with Medline. 
  8   
  9  Classes: 
 10  Record           A dictionary holding Medline data. 
 11   
 12  Functions: 
 13  read             Reads one Medline record 
 14  parse            Allows you to iterate over a bunch of Medline records 
 15  """ 
 16   
17 -class Record(dict):
18 """A dictionary holding information from a Medline record. 19 All data are stored under the mnemonic appearing in the Medline 20 file. These mnemonics have the following interpretations: 21 22 Mnemonic Description 23 AB Abstract 24 CI Copyright Information 25 AD Affiliation 26 IRAD Investigator Affiliation 27 AID Article Identifier 28 AU Author 29 FAU Full Author 30 CN Corporate Author 31 DCOM Date Completed 32 DA Date Created 33 LR Date Last Revised 34 DEP Date of Electronic Publication 35 DP Date of Publication 36 EDAT Entrez Date 37 GS Gene Symbol 38 GN General Note 39 GR Grant Number 40 IR Investigator Name 41 FIR Full Investigator Name 42 IS ISSN 43 IP Issue 44 TA Journal Title Abbreviation 45 JT Journal Title 46 LA Language 47 LID Location Identifier 48 MID Manuscript Identifier 49 MHDA MeSH Date 50 MH MeSH Terms 51 JID NLM Unique ID 52 RF Number of References 53 OAB Other Abstract 54 OCI Other Copyright Information 55 OID Other ID 56 OT Other Term 57 OTO Other Term Owner 58 OWN Owner 59 PG Pagination 60 PS Personal Name as Subject 61 FPS Full Personal Name as Subject 62 PL Place of Publication 63 PHST Publication History Status 64 PST Publication Status 65 PT Publication Type 66 PUBM Publishing Model 67 PMC PubMed Central Identifier 68 PMID PubMed Unique Identifier 69 RN Registry Number/EC Number 70 NM Substance Name 71 SI Secondary Source ID 72 SO Source 73 SFM Space Flight Mission 74 STAT Status 75 SB Subset 76 TI Title 77 TT Transliterated Title 78 VI Volume 79 CON Comment on 80 CIN Comment in 81 EIN Erratum in 82 EFR Erratum for 83 CRI Corrected and Republished in 84 CRF Corrected and Republished from 85 PRIN Partial retraction in 86 PROF Partial retraction of 87 RPI Republished in 88 RPF Republished from 89 RIN Retraction in 90 ROF Retraction of 91 UIN Update in 92 UOF Update of 93 SPIN Summary for patients in 94 ORI Original report in 95 """
96 - def __init__(self):
97 # The __init__ function can be removed when we remove the old parser 98 self.id = '' 99 self.pubmed_id = '' 100 101 self.mesh_headings = [] 102 self.mesh_tree_numbers = [] 103 self.mesh_subheadings = [] 104 105 self.abstract = '' 106 self.comments = [] 107 self.abstract_author = '' 108 self.english_abstract = '' 109 110 self.source = '' 111 self.publication_types = [] 112 self.number_of_references = '' 113 114 self.authors = [] 115 self.no_author = '' 116 self.address = '' 117 118 self.journal_title_code = '' 119 self.title_abbreviation = '' 120 self.issn = '' 121 self.journal_subsets = [] 122 self.country = '' 123 self.languages = [] 124 125 self.title = '' 126 self.transliterated_title = '' 127 self.call_number = '' 128 self.issue_part_supplement = '' 129 self.volume_issue = '' 130 self.publication_date = '' 131 self.year = '' 132 self.pagination = '' 133 134 self.special_list = '' 135 136 self.substance_name = '' 137 self.gene_symbols = [] 138 self.secondary_source_ids = [] 139 self.identifications = [] 140 self.registry_numbers = [] 141 142 self.personal_name_as_subjects = [] 143 144 self.record_originators = [] 145 self.entry_date = '' 146 self.entry_month = '' 147 self.class_update_date = '' 148 self.last_revision_date = '' 149 self.major_revision_date = '' 150 151 self.undefined = []
152 153
154 -def parse(handle):
155 """Read Medline records one by one from the handle. 156 157 The handle is either is a Medline file, a file-like object, or a list 158 of lines describing one or more Medline records. 159 160 Typical usage: 161 162 from Bio import Medline 163 handle = open("mymedlinefile") 164 records = Medline.parse(handle) 165 for record in record: 166 print record['TI'] 167 168 """ 169 # These keys point to string values 170 textkeys = ("ID", "PMID", "SO", "RF", "NI", "JC", "TA", "IS", "CY", "TT", 171 "CA", "IP", "VI", "DP", "YR", "PG", "LID", "DA", "LR", "OWN", 172 "STAT", "DCOM", "PUBM", "DEP", "PL", "JID", "SB", "PMC", 173 "EDAT", "MHDA", "PST", "AB", "AD", "EA", "TI", "JT") 174 handle = iter(handle) 175 # First skip blank lines 176 for line in handle: 177 line = line.rstrip() 178 if line: 179 break 180 else: 181 return 182 record = Record() 183 finished = False 184 while not finished: 185 if line[:6]==" ": # continuation line 186 record[key].append(line[6:]) 187 elif line: 188 key = line[:4].rstrip() 189 if not key in record: 190 record[key] = [] 191 record[key].append(line[6:]) 192 try: 193 line = handle.next() 194 except StopIteration: 195 finished = True 196 else: 197 line = line.rstrip() 198 if line: 199 continue 200 # Join each list of strings into one string. 201 for key in textkeys: 202 if key in record: 203 record[key] = " ".join(record[key]) 204 if record: 205 yield record 206 record = Record()
207
208 -def read(handle):
209 """Read a single Medline records from the handle. 210 211 The handle is either is a Medline file, a file-like object, or a list 212 of lines describing a Medline record. 213 214 Typical usage: 215 216 from Bio import Medline 217 handle = open("mymedlinefile") 218 record = Medline.read(handle) 219 print record['TI'] 220 221 """ 222 records = parse(handle) 223 return records.next()
224