1
2
3
4
5
6
7
8
9 """Connect with a BioSQL database and load Biopython like objects from it.
10
11 This provides interfaces for loading biological objects from a relational
12 database, and is compatible with the BioSQL standards.
13 """
14 import BioSeq
15 import Loader
16 import DBUtils
17
19 """Main interface for loading a existing BioSQL-style database.
20
21 This function is the easiest way to retrieve a connection to a
22 database, doing something like:
23
24 >>> from BioSeq import BioSeqDatabase
25 >>> server = BioSeqDatabase.open_database(user = "root", db="minidb")
26
27 the various options are:
28 driver -> The name of the database driver to use for connecting. The
29 driver should implement the python DB API. By default, the MySQLdb
30 driver is used.
31 user -> the username to connect to the database with.
32 password, passwd -> the password to connect with
33 host -> the hostname of the database
34 database or db -> the name of the database
35 """
36 module = __import__(driver)
37 connect = getattr(module, "connect")
38
39
40 kw = kwargs.copy()
41 if driver == "MySQLdb":
42 if kw.has_key("database"):
43 kw["db"] = kw["database"]
44 del kw["database"]
45 if kw.has_key("password"):
46 kw["passwd"] = kw["password"]
47 del kw["password"]
48 else:
49
50 if kw.has_key("db"):
51 kw["database"] = kw["db"]
52 del kw["db"]
53 if kw.has_key("passwd"):
54 kw["password"] = kw["passwd"]
55 del kw["passwd"]
56 if driver == "psycopg" and not kw.get("database"):
57 kw["database"] = "template1"
58 try:
59 conn = connect(**kw)
60 except module.InterfaceError:
61
62
63 if kw.has_key("database"):
64 kw["dbname"] = kw["database"]
65 del kw["database"]
66 elif kw.has_key("db"):
67 kw["dbname"] = kw["db"]
68 del kw["db"]
69
70 dsn = ' '.join(['='.join(i) for i in kw.items()])
71 conn = connect(dsn)
72
73 return DBServer(conn, module)
74
76 - def __init__(self, conn, module, module_name=None):
77 self.module = module
78 if module_name is None:
79 module_name = module.__name__
80 self.adaptor = Adaptor(conn, DBUtils.get_dbutils(module_name))
81 self.module_name = module_name
82
84 return self.__class__.__name__ + "(%r)" % self.adaptor.conn
93
100
101 - def new_database(self, db_name, authority=None, description=None):
102 """Add a new database to the server and return it.
103 """
104
105 sql = r"INSERT INTO biodatabase (name, authority, description)" \
106 r" VALUES (%s, %s, %s)"
107 self.adaptor.execute(sql, (db_name,authority, description))
108 return BioSeqDatabase(self.adaptor, db_name)
109
111 """Load a database schema into the given database.
112
113 This is used to create tables, etc when a database is first created.
114 sql_file should specify the complete path to a file containing
115 SQL entries for building the tables.
116 """
117
118
119
120
121
122 sql_handle = open(sql_file, "rb")
123 sql = r""
124 for line in sql_handle.xreadlines():
125 if line.find("--") == 0:
126 pass
127 elif line.find("#") == 0:
128 pass
129 elif line.strip():
130 sql += line.strip()
131 sql += ' '
132
133
134
135
136
137 if self.module_name in ["psycopg"]:
138 self.adaptor.cursor.execute(sql)
139
140
141 elif self.module_name in ["MySQLdb"]:
142 sql_parts = sql.split(";")
143 for sql_line in sql_parts[:-1]:
144 self.adaptor.cursor.execute(sql_line)
145 else:
146 raise ValueError("Module %s not supported by the loader." %
147 (self.module_name))
148
151 self.conn = conn
152 self.cursor = conn.cursor()
153 self.dbutils = dbutils
154
157
160
163
166
168 return self.conn.close()
169
171 self.cursor.execute(
172 r"select biodatabase_id from biodatabase where name = %s",
173 (dbname,))
174 rv = self.cursor.fetchall()
175 if not rv:
176 raise KeyError("Cannot find biodatabase with name %r" % dbname)
177
178
179 return rv[0][0]
180
182 sql = r"select bioentry_id from bioentry where name = %s"
183 fields = [name]
184 if dbid:
185 sql += " and biodatabase_id = %s"
186 fields.append(dbid)
187 self.cursor.execute(sql, fields)
188 rv = self.cursor.fetchall()
189 if not rv:
190 raise IndexError("Cannot find display id %r" % name)
191 if len(rv) > 1:
192 raise IndexError("More than one entry with display id %r" % name)
193 return rv[0][0]
194
196 sql = r"select bioentry_id from bioentry where accession = %s"
197 fields = [name]
198 if dbid:
199 sql += " and biodatabase_id = %s"
200 fields.append(dbid)
201 self.cursor.execute(sql, fields)
202 rv = self.cursor.fetchall()
203 if not rv:
204 raise IndexError("Cannot find accession %r" % name)
205 if len(rv) > 1:
206 raise IndexError("More than one entry with accession %r" % name)
207 return rv[0][0]
208
210 sql = r"select bioentry_id from bioentry where accession = %s"
211 fields = [name]
212 if dbid:
213 sql += " and biodatabase_id = %s"
214 fields.append(dbid)
215 return self.execute_and_fetch_col0(sql, fields)
216
218 acc_version = name.split(".")
219 if len(acc_version) > 2:
220 raise IndexError("Bad version %r" % name)
221 acc = acc_version[0]
222 if len(acc_version) == 2:
223 version = acc_version[1]
224 else:
225 version = "0"
226 sql = r"SELECT bioentry_id FROM bioentry WHERE accession = %s" \
227 r" AND version = %s"
228 fields = [acc, version]
229 if dbid:
230 sql += " and biodatabase_id = %s"
231 fields.append(dbid)
232 self.cursor.execute(sql, fields)
233 rv = self.cursor.fetchall()
234 if not rv:
235 raise IndexError("Cannot find version %r" % name)
236 if len(rv) > 1:
237 raise IndexError("More than one entry with version %r" % name)
238 return rv[0][0]
239
241
242 sql = "SELECT bioentry_id FROM bioentry WHERE identifier = %s"
243 fields = [identifier]
244 if dbid:
245 sql += " and biodatabase_id = %s"
246 fields.append(dbid)
247 self.cursor.execute(sql, fields)
248 rv = self.cursor.fetchall()
249 if not rv:
250 raise IndexError("Cannot find display id %r" % identifier)
251 return rv[0][0]
252
256
257 - def list_bioentry_ids(self, dbid):
258 return self.execute_and_fetch_col0(
259 "SELECT bioentry_id FROM bioentry WHERE biodatabase_id = %s",
260 (dbid,))
261
263 return self.execute_and_fetch_col0(
264 "SELECT name FROM bioentry WHERE biodatabase_id = %s",
265 (dbid,))
266
268 """Return ids given a SQL statement to select for them.
269
270 This assumes that the given SQL does a SELECT statement that
271 returns a list of items. This parses them out of the 2D list
272 they come as and just returns them in a list.
273 """
274 return self.cursor.execute_and_fetch_col0(sql, args)
275
277 self.cursor.execute(sql, args or ())
278 rv = self.cursor.fetchall()
279 assert len(rv) == 1, "Expected 1 response, got %d" % len(rv)
280 return rv[0]
281
282 - def execute(self, sql, args=None):
283 """Just execute an sql command.
284 """
285 self.cursor.execute(sql, args or ())
286
293
295 self.cursor.execute(sql, args or ())
296 return [field[0] for field in self.cursor.fetchall()]
297
301
302 _allowed_lookups = {
303
304 'primary_id': "fetch_seqid_by_identifier",
305 'gi': "fetch_seqid_by_identifier",
306 'display_id': "fetch_seqid_by_display_id",
307 'name': "fetch_seqid_by_display_id",
308 'accession': "fetch_seqid_by_accession",
309 'version': "fetch_seqid_by_version",
310 }
311
318 return "BioSeqDatabase(%r, %r)" % (self.adaptor, self.name)
319
328
337
346
348 """Gets a *list* of Bio::Seq objects by accession number
349
350 Example: seqs = db.get_Seq_by_acc('X77802')
351
352 """
353 seqids = self.adaptor.fetch_seqids_by_accession(self.dbid, name)
354 return [BioSeq.DBSeqRecord(self.adaptor, seqid) for seqid in seqids]
355
357
358
359
360
361 raise NotImplementedError("waiting for Python 2.2's iter")
362
364 """Array of all the primary_ids of the sequences in the database.
365
366 These maybe ids (display style) or accession numbers or
367 something else completely different - they *are not*
368 meaningful outside of this database implementation.
369 """
370 return self.adaptor.list_bioentry_ids(self.dbid)
371
380
382 if len(kwargs) != 1:
383 raise TypeError("single key/value parameter expected")
384 k, v = kwargs.items()[0]
385 if not _allowed_lookups.has_key(k):
386 raise TypeError("lookup() expects one of %s, not %r" % \
387 (repr(_allowed_lookups.keys())[1:-1], repr(k)))
388 lookup_name = _allowed_lookups[k]
389 lookup_func = getattr(self.adaptor, lookup_name)
390 seqid = lookup_func(self.dbid, v)
391 return BioSeq.DBSeqRecord(self.adaptor, seqid)
392
394 """Gets a Bio::Seq object by the primary (internal) id.
395
396 The primary id in these cases has to come from
397 $db->get_all_primary_ids. There is no other way to get (or
398 guess) the primary_ids in a database.
399 """
400 return self[seqid]
401
402 - def load(self, record_iterator):
403 """Load a set of SeqRecords into the BioSQL database.
404
405 record_iterator is either a list of SeqRecord objects, or an
406 Iterator object that returns SeqRecord objects (such as the
407 output from the Bio.SeqIO.parse() function), which will be
408 used to populate the database.
409
410 Example:
411 from Bio import SeqIO
412 count = db.load(SeqIO.parse(open(filename), format))
413
414 Returns the number of records loaded.
415 """
416 db_loader = Loader.DatabaseLoader(self.adaptor, self.dbid)
417 num_records = 0
418 for cur_record in record_iterator :
419 num_records += 1
420 db_loader.load_seqrecord(cur_record)
421 return num_records
422