Module database
[hide private]
[frames] | no frames]

Source Code for Module database

   1  ### BITPIM 
   2  ### 
   3  ### Copyright (C) 2004 Roger Binns <rogerb@rogerbinns.com> 
   4  ### 
   5  ### This program is free software; you can redistribute it and/or modify 
   6  ### it under the terms of the BitPim license as detailed in the LICENSE file. 
   7  ### 
   8  ### $Id: database.py 4697 2008-08-24 21:21:14Z djpham $ 
   9   
  10  """Interface to the database""" 
  11  from __future__ import with_statement 
  12  import os 
  13  import copy 
  14  import time 
  15  import sha 
  16  import random 
  17   
  18  import apsw 
  19   
  20  import common 
21 ### 22 ### The first section of this file deals with typical objects used to 23 ### represent data items and various methods for wrapping them. 24 ### 25 26 27 28 -class basedataobject(dict):
29 """A base object derived from dict that is used for various 30 records. Existing code can just continue to treat it as a dict. 31 New code can treat it as dict, as well as access via attribute 32 names (ie object["foo"] or object.foo). attribute name access 33 will always give a result includes None if the name is not in 34 the dict. 35 36 As a bonus this class includes checking of attribute names and 37 types in non-production runs. That will help catch typos etc. 38 For production runs we may be receiving data that was written out 39 by a newer version of BitPim so we don't check or error.""" 40 # which properties we know about 41 _knownproperties=[] 42 # which ones we know about that should be a list of dicts 43 _knownlistproperties={'serials': ['sourcetype', '*']} 44 # which ones we know about that should be a dict 45 _knowndictproperties={} 46 47 if __debug__: 48 # in debug code we check key name and value types 49
50 - def _check_property(self,name,value=None):
51 # check it 52 assert isinstance(name, (str, unicode)), "keys must be a string type" 53 assert name in self._knownproperties or name in self._knownlistproperties or name in self._knowndictproperties, "unknown property named '"+name+"'" 54 if value is None: return 55 if name in getattr(self, "_knownlistproperties"): 56 assert isinstance(value, list), "list properties ("+name+") must be given a list as value" 57 # each list member must be a dict 58 for v in value: 59 self._check_property_dictvalue(name,v) 60 return 61 if name in getattr(self, "_knowndictproperties"): 62 assert isinstance(value, dict), "dict properties ("+name+") must be given a dict as value" 63 self._check_property_dictvalue(name,value) 64 return 65 # the value must be a basetype supported by apsw/SQLite 66 assert isinstance(value, (str, unicode, buffer, int, long, float)), "only serializable types supported for values"
67
68 - def _check_property_dictvalue(self, name, value):
69 assert isinstance(value, dict), "item(s) in "+name+" (a list) must be dicts" 70 assert name in self._knownlistproperties or name in self._knowndictproperties 71 if name in self._knownlistproperties: 72 for key in value: 73 assert key in self._knownlistproperties[name] or '*' in self._knownlistproperties[name], "dict key "+key+" as member of item in list "+name+" is not known" 74 v=value[key] 75 assert isinstance(v, (str, unicode, buffer, int, long, float)), "only serializable types supported for values" 76 elif name in self._knowndictproperties: 77 for key in value: 78 assert key in self._knowndictproperties[name] or '*' in self._knowndictproperties[name], "dict key "+key+" as member of dict in item "+name+" is not known" 79 v=value[key] 80 assert isinstance(v, (str, unicode, buffer, int, long, float)), "only serializable types supported for values"
81 82
83 - def update(self, items):
84 assert isinstance(items, dict), "update only supports dicts" # Feel free to fix this code ... 85 for k in items: 86 self._check_property(k, items[k]) 87 super(basedataobject, self).update(items)
88
89 - def __getitem__(self, name):
90 # check when they are retrieved, not set. I did try 91 # catching the append method, but the layers of nested 92 # namespaces got too confused 93 self._check_property(name) 94 v=super(basedataobject, self).__getitem__(name) 95 self._check_property(name, v) 96 return v
97
98 - def __setitem__(self, name, value):
99 self._check_property(name, value) 100 super(basedataobject,self).__setitem__(name, value)
101
102 - def __setattr__(self, name, value):
103 # note that we map setattr to update the dict 104 self._check_property(name, value) 105 self.__setitem__(name, value)
106
107 - def __getattr__(self, name):
108 if name not in self._knownproperties and name not in self._knownlistproperties and name not in self._knowndictproperties: 109 raise AttributeError(name) 110 self._check_property(name) 111 if name in self.keys(): 112 return self[name] 113 return None
114
115 - def __delattr__(self, name):
116 self._check_property(name) 117 if name in self.keys(): 118 del self[name]
119 120 else: 121 # non-debug mode - we don't do any attribute name/value type 122 # checking as the data may (legitimately) be from a newer 123 # version of the program.
124 - def __setattr__(self, name, value):
125 # note that we map setattr to update the dict 126 super(basedataobject,self).__setitem__(name, value)
127
128 - def __getattr__(self, name):
129 # and getattr checks the dict 130 if name not in self._knownproperties and name not in self._knownlistproperties and name not in self._knowndictproperties: 131 raise AttributeError(name) 132 if name in self.keys(): 133 return self[name] 134 return None
135
136 - def __delattr__(self, name):
137 if name in self.keys(): 138 del self[name]
139 140 # various methods for manging serials
141 - def GetBitPimSerial(self):
142 "Returns the BitPim serial for this item" 143 if "serials" not in self: 144 raise KeyError("no bitpim serial present") 145 for v in self.serials: 146 if v["sourcetype"]=="bitpim": 147 return v["id"] 148 raise KeyError("no bitpim serial present")
149 150 # rng seeded at startup 151 _persistrandom=random.Random() 152 _shathingy=None
153 - def _getnextrandomid(self, item):
154 """Returns random ids used to give unique serial numbers to items 155 156 @param item: any object - its memory location is used to help randomness 157 @returns: a 20 character hexdigit string 158 """ 159 if basedataobject._shathingy is None: 160 basedataobject._shathingy=sha.new() 161 basedataobject._shathingy.update(`basedataobject._persistrandom.random()`) 162 basedataobject._shathingy.update(`id(self)`) 163 basedataobject._shathingy.update(`basedataobject._persistrandom.random()`) 164 basedataobject._shathingy.update(`id(item)`) 165 return basedataobject._shathingy.hexdigest()
166 167
168 - def EnsureBitPimSerial(self):
169 "Ensures this entry has a serial" 170 if self.serials is None: 171 self.serials=[] 172 for v in self.serials: 173 if v["sourcetype"]=="bitpim": 174 return 175 self.serials.append({'sourcetype': "bitpim", "id": self._getnextrandomid(self.serials)})
176
177 -class dataobjectfactory:
178 "Called by the code to read in objects when it needs a new object container"
179 - def __init__(self, dataobjectclass=basedataobject):
180 self.dataobjectclass=dataobjectclass
181 182 if __debug__:
183 - def newdataobject(self, values={}):
184 v=self.dataobjectclass() 185 if len(values): 186 v.update(values) 187 return v
188 else:
189 - def newdataobject(self, values={}):
190 return self.dataobjectclass(values)
191
192 193 -def extractbitpimserials(dict):
194 """Returns a new dict with keys being the bitpim serial for each 195 row. Each item must be derived from basedataobject""" 196 197 res={} 198 199 for record in dict.itervalues(): 200 res[record.GetBitPimSerial()]=record 201 202 return res
203
204 -def ensurebitpimserials(dict):
205 """Ensures that all records have a BitPim serial. Each item must 206 be derived from basedataobject""" 207 for record in dict.itervalues(): 208 record.EnsureBitPimSerial()
209
210 -def findentrywithbitpimserial(dict, serial):
211 """Returns the entry from dict whose bitpim serial matches serial""" 212 for record in dict.itervalues(): 213 if record.GetBitPimSerial()==serial: 214 return record 215 raise KeyError("not item with serial "+serial+" found")
216
217 -def ensurerecordtype(dict, factory):
218 for key,record in dict.iteritems(): 219 if not isinstance(record, basedataobject): 220 dict[key]=factory.newdataobject(record)
221 222 223 224 # a factory that uses dicts to allocate new data objects 225 dictdataobjectfactory=dataobjectfactory(dict) 226 227 228 229 230 ### 231 ### Actual database interaction is from this point on 232 ### 233 234 235 # Change this to True to see what is going on under the hood. It 236 # will produce a lot of output! 237 TRACE=False
238 239 -def ExclusiveWrapper(method):
240 """Wrap a method so that it has an exclusive lock on the database 241 (noone else can read or write) until it has finished""" 242 243 # note that the existing threading safety checks in apsw will 244 # catch any thread abuse issues. 245 def _transactionwrapper(*args, **kwargs): 246 # arg[0] should be a Database instance 247 assert isinstance(args[0], Database) 248 with args[0]: 249 return method(*args, **kwargs)
250 251 setattr(_transactionwrapper, "__doc__", getattr(method, "__doc__")) 252 return _transactionwrapper 253
254 -def sqlquote(s):
255 "returns an sqlite quoted string (the return value will begin and end with single quotes)" 256 return "'"+s.replace("'", "''")+"'"
257
258 -def idquote(s):
259 """returns an sqlite quoted identifier (eg for when a column name is also an SQL keyword 260 261 The value returned is quoted in square brackets""" 262 return '['+s+']'
263
264 -class IntegrityCheckFailed(Exception): pass
265
266 -class Database:
267 268 # Make this class a context manager so it can be used with WITH blocks
269 - def __enter__(self):
270 self.excounter+=1 271 self.transactionwrite=False 272 if self.excounter==1: 273 if TRACE: 274 print "BEGIN EXCLUSIVE TRANSACTION" 275 self.cursor.execute("BEGIN EXCLUSIVE TRANSACTION") 276 self._schemacache={} 277 return self
278
279 - def __exit__(self, ex_type, ex_value, tb):
280 self.excounter-=1 281 if self.excounter==0: 282 w=self.transactionwrite 283 if tb is None: 284 # no exception, so commit 285 cmd="COMMIT TRANSACTION" if w else "END TRANSACTION" 286 else: 287 # an exception occurred, so rollback 288 cmd="ROLLBACK TRANSACTION" if w else "END TRANSACTION" 289 if TRACE: 290 print cmd 291 self.cursor.execute(cmd)
292
293 - def __del__(self):
294 # connections have to be closed now 295 self.connection.close(True)
296
297 - def __init__(self, filename, virtualtables=None):
298 """ 299 @param filename: database filename 300 @param virtualtables: a list of dict specifying the virtual tables 301 Each dict is expected to have the following keys: 302 'tablename': the name of the virtual table 303 'modulename': the name of the module that implements this virtual 304 table 305 'moduleclass': the ModuleBase subclass that implements this 306 virtual table 307 'args': arguments passed to instantiaion of the module class 308 """ 309 self.connection=apsw.Connection(filename) 310 self.cursor=self.connection.cursor() 311 # first tell sqlite to use the pre 3.4 format. this will allow downgrades 312 self.cursor.execute("PRAGMA legacy_file_format=1") # nb you don't get an error for unknown pragmas 313 # we always do an integrity check second 314 icheck=[] 315 print "database integrity check" 316 for row in self.cursor.execute("PRAGMA integrity_check"): 317 icheck.extend(row) 318 print "database integrity check complete" 319 icheck="\n".join(icheck) 320 if icheck!="ok": 321 raise IntegrityCheckFailed(icheck) 322 # exclusive lock counter 323 self.excounter=0 324 # this should be set to true by any code that writes - it is 325 # used by the exclusivewrapper to tell if it should do a 326 # commit/rollback or just a plain end 327 self.transactionwrite=False 328 # a cache of the table schemas 329 self._schemacache={} 330 self.sql=self.cursor.execute 331 self.sqlmany=self.cursor.executemany 332 if TRACE: 333 self.cursor.setexectrace(self._sqltrace) 334 self.cursor.setrowtrace(self._rowtrace) 335 if virtualtables is not None: 336 # virtual tables are specified 337 for vtable in virtualtables: 338 # register the module 339 self.connection.createmodule(vtable['modulename'], 340 vtable['moduleclass'](*vtable['args'])) 341 if not self.doestableexist(vtable['tablename']): 342 # and declare the virtual table 343 self.sql('CREATE VIRTUAL TABLE %s USING %s;'%(idquote(vtable['tablename']), 344 idquote(vtable['modulename'])))
345
346 - def _sqltrace(self, cmd, bindings):
347 print "SQL:",cmd 348 if bindings: 349 print " bindings:",bindings 350 return True
351
352 - def _rowtrace(self, *row):
353 print "ROW:",row 354 return row
355
356 - def sql(self, statement, params=()):
357 "Executes statement and return a generator of the results" 358 # this is replaced in init 359 assert False
360
361 - def sqlmany(self, statement, params):
362 "execute statements repeatedly with params" 363 # this is replaced in init 364 assert False
365
366 - def doestableexist(self, tablename):
367 if tablename in self._schemacache: 368 return True 369 return bool(self.sql("select count(*) from sqlite_master where type='table' and name=%s" % (sqlquote(tablename),)).next()[0])
370
371 - def getcolumns(self, tablename, onlynames=False):
372 res=self._schemacache.get(tablename,None) 373 if res is None: 374 res=[] 375 for colnum,name,type, _, default, primarykey in self.sql("pragma table_info("+idquote(tablename)+")"): 376 if primarykey: 377 type+=" primary key" 378 res.append([colnum,name,type]) 379 self._schemacache[tablename]=res 380 if onlynames: 381 return [name for colnum,name,type in res] 382 return res
383 384 @ExclusiveWrapper
385 - def savemajordict(self, tablename, dict, timestamp=None):
386 """This is the entrypoint for saving a first level dictionary 387 such as the phonebook or calendar. 388 389 @param tablename: name of the table to use 390 @param dict: The dictionary of record. The key must be the uniqueid for each record. 391 The @L{extractbitpimserials} function can do the conversion for you for 392 phonebook and similar formatted records. 393 @param timestamp: the UTC time in seconds since the epoch. This is 394 """ 395 396 if timestamp is None: 397 timestamp=time.time() 398 399 # work on a shallow copy of dict 400 dict=dict.copy() 401 402 # make sure the table exists first 403 if not self.doestableexist(tablename): 404 # create table and include meta-fields 405 self.transactionwrite=True 406 self.sql("create table %s (__rowid__ integer primary key, __timestamp__, __deleted__ integer, __uid__ varchar)" % (idquote(tablename),)) 407 408 # get the latest values for each guid ... 409 current=self.getmajordictvalues(tablename) 410 # compare what we have, and update/mark deleted as appropriate ... 411 deleted=[k for k in current if k not in dict] 412 new=[k for k in dict if k not in current] 413 modified=[k for k in dict if k in current] # only potentially modified ... 414 415 # deal with modified first 416 dl=[] 417 for i,k in enumerate(modified): 418 if dict[k]==current[k]: 419 # unmodified! 420 del dict[k] 421 dl.append(i) 422 dl.reverse() 423 for i in dl: 424 del modified[i] 425 426 # add deleted entries back into dict 427 for d in deleted: 428 assert d not in dict 429 dict[d]=current[d] 430 dict[d]["__deleted__"]=1 431 432 # now we only have new, changed and deleted entries left in dict 433 434 # examine the keys in dict 435 dk=[] 436 for k in dict.keys(): 437 # make a copy since we modify values, but it doesn't matter about deleted since we own those 438 if k not in deleted: 439 dict[k]=dict[k].copy() 440 for kk in dict[k]: 441 if kk not in dk: 442 dk.append(kk) 443 # verify that they don't start with __ 444 assert len([k for k in dk if k.startswith("__") and not k=="__deleted__"])==0 445 # get database keys 446 dbkeys=self.getcolumns(tablename, onlynames=True) 447 # are any missing? 448 missing=[k for k in dk if k not in dbkeys] 449 if len(missing): 450 creates=[] 451 # for each missing key, we have to work out if the value 452 # is a list or dict type (which we indirect to another table) 453 for m in missing: 454 islist=None 455 isdict=None 456 isnotindirect=None 457 for r in dict.keys(): 458 record=dict[r] 459 v=record.get(m,None) 460 if v is None: 461 continue 462 if isinstance(v, list): 463 islist=record 464 elif isinstance(v,type({})): 465 isdict=record 466 else: 467 isnotindirect=record 468 # in devel code, we check every single value 469 # in production, we just use the first we find 470 if not __debug__: 471 break 472 if islist is None and isdict is None and isnotindirect is None: 473 # they have the key but no record has any values, so we ignore it 474 del dk[dk.index(m)] 475 continue 476 # don't do this type abuse at home ... 477 if int(islist is not None)+int(isdict is not None)+int(isnotindirect is not None)!=int(True): 478 # can't have it more than one way 479 raise ValueError("key %s for table %s has values with inconsistent types. eg LIST: %s, DICT: %s, NOTINDIRECT: %s" % (m,tablename,`islist`,`isdict`,`isnotindirect`)) 480 if islist is not None: 481 creates.append( (m, "indirectBLOB") ) 482 continue 483 if isdict: 484 creates.append( (m, "indirectdictBLOB")) 485 continue 486 if isnotindirect is not None: 487 creates.append( (m, "valueBLOB") ) 488 continue 489 assert False, "You can't possibly get here!" 490 if len(creates): 491 self._altertable(tablename, creates, [], createindex=1) 492 493 # write out indirect values 494 dbtkeys=self.getcolumns(tablename) 495 # for every indirect, we have to replace the value with a pointer 496 for _,n,t in dbtkeys: 497 if t in ("indirectBLOB", "indirectdictBLOB"): 498 indirects={} 499 for r in dict.keys(): 500 record=dict[r] 501 v=record.get(n,None) 502 if v is not None: 503 if not len(v): # set zero length lists/dicts to None 504 record[n]=None 505 else: 506 if t=="indirectdictBLOB": 507 indirects[r]=[v] # make it a one item dict list 508 else: 509 indirects[r]=v 510 if len(indirects): 511 self.updateindirecttable(tablename+"__"+n, indirects) 512 for r in indirects.keys(): 513 dict[r][n]=indirects[r] 514 515 # and now the main table 516 for k in dict.keys(): 517 record=dict[k] 518 record["__uid__"]=k 519 rk=[x for x,y in record.items() if y is not None] 520 rk.sort() 521 cmd=["insert into", idquote(tablename), "( [__timestamp__],"] 522 cmd.append(",".join([idquote(r) for r in rk])) 523 cmd.extend([")", "values", "(?,"]) 524 cmd.append(",".join(["?" for r in rk])) 525 cmd.append(")") 526 self.sql(" ".join(cmd), [timestamp]+[record[r] for r in rk]) 527 self.transactionwrite=True
528
529 - def updateindirecttable(self, tablename, indirects):
530 # this is mostly similar to savemajordict, except we only deal 531 # with lists of dicts, and we find existing records with the 532 # same value if possible 533 534 # does the table even exist? 535 if not self.doestableexist(tablename): 536 # create table and include meta-fields 537 self.sql("create table %s (__rowid__ integer primary key)" % (idquote(tablename),)) 538 self.transactionwrite=True 539 # get the list of keys from indirects 540 datakeys=[] 541 for i in indirects.keys(): 542 assert isinstance(indirects[i], list) 543 for v in indirects[i]: 544 assert isinstance(v, dict) 545 for k in v.keys(): 546 if k not in datakeys: 547 assert not k.startswith("__") 548 datakeys.append(k) 549 # get the keys from the table 550 dbkeys=self.getcolumns(tablename, onlynames=True) 551 # are any missing? 552 missing=[k for k in datakeys if k not in dbkeys] 553 if len(missing): 554 self._altertable(tablename, [(m,"valueBLOB") for m in missing], [], createindex=2) 555 # for each row we now work out the indirect information 556 for r in indirects: 557 res=tablename+"," 558 for record in indirects[r]: 559 cmd=["select __rowid__ from", idquote(tablename), "where"] 560 params=[] 561 coals=[] 562 for d in datakeys: 563 v=record.get(d,None) 564 if v is None: 565 coals.append(idquote(d)) 566 else: 567 if cmd[-1]!="where": 568 cmd.append("and") 569 cmd.extend([idquote(d), "= ?"]) 570 params.append(v) 571 assert cmd[-1]!="where" # there must be at least one non-none column! 572 if len(coals)==1: 573 cmd.extend(["and",coals[0],"isnull"]) 574 elif len(coals)>1: 575 cmd.extend(["and coalesce(",",".join(coals),") isnull"]) 576 577 found=None 578 for found in self.sql(" ".join(cmd), params): 579 # get matching row 580 found=found[0] 581 break 582 if found is None: 583 # add it 584 cmd=["insert into", idquote(tablename), "("] 585 params=[] 586 for k in record: 587 if cmd[-1]!="(": 588 cmd.append(",") 589 cmd.append(k) 590 params.append(record[k]) 591 cmd.extend([")", "values", "("]) 592 cmd.append(",".join(["?" for p in params])) 593 cmd.append("); select last_insert_rowid()") 594 found=self.sql(" ".join(cmd), params).next()[0] 595 self.transactionwrite=True 596 res+=`found`+"," 597 indirects[r]=res
598 599 @ExclusiveWrapper
600 - def getmajordictvalues(self, tablename, factory=dictdataobjectfactory, 601 at_time=None):
602 603 if not self.doestableexist(tablename): 604 return {} 605 606 res={} 607 uids=[u[0] for u in self.sql("select distinct __uid__ from %s" % (idquote(tablename),))] 608 schema=self.getcolumns(tablename) 609 for colnum,name,type in schema: 610 if name=='__deleted__': 611 deleted=colnum 612 elif name=='__uid__': 613 uid=colnum 614 # get all relevant rows 615 if isinstance(at_time, (int, float)): 616 sql_string="select * from %s where __uid__=? and __timestamp__<=%f order by __rowid__ desc limit 1" % (idquote(tablename), float(at_time)) 617 else: 618 sql_string="select * from %s where __uid__=? order by __rowid__ desc limit 1" % (idquote(tablename),) 619 indirects={} 620 for row in self.sqlmany(sql_string, [(u,) for u in uids]): 621 if row[deleted]: 622 continue 623 record=factory.newdataobject() 624 for colnum,name,type in schema: 625 if name.startswith("__") or type not in ("valueBLOB", "indirectBLOB", "indirectdictBLOB") or row[colnum] is None: 626 continue 627 if type=="valueBLOB": 628 record[name]=row[colnum] 629 continue 630 assert type=="indirectBLOB" or type=="indirectdictBLOB" 631 if name not in indirects: 632 indirects[name]=[] 633 indirects[name].append( (row[uid], row[colnum], type) ) 634 res[row[uid]]=record 635 # now get the indirects 636 for name,values in indirects.iteritems(): 637 for uid,v,type in values: 638 fieldvalue=self._getindirect(v) 639 if fieldvalue: 640 if type=="indirectBLOB": 641 res[uid][name]=fieldvalue 642 else: 643 res[uid][name]=fieldvalue[0] 644 return res
645
646 - def _getindirect(self, what):
647 """Gets a list of values (indirect) as described by what 648 @param what: what to get - eg phonebook_serials,1,3,5, 649 (note there is always a trailing comma) 650 """ 651 652 tablename,rows=what.split(',', 1) 653 schema=self.getcolumns(tablename) 654 655 res=[] 656 for row in self.sqlmany("select * from %s where __rowid__=?" % 657 (idquote(tablename),), [(int(long(r)),) for r in rows.split(',') if len(r)]): 658 record={} 659 for colnum,name,type in schema: 660 if name.startswith("__") or type not in ("valueBLOB", "indirectBLOB", "indirectdictBLOB") or row[colnum] is None: 661 continue 662 if type=="valueBLOB": 663 record[name]=row[colnum] 664 continue 665 assert type=="indirectBLOB" or type=="indirectdictBLOB" 666 assert False, "indirect in indirect not handled" 667 assert len(record),"Database._getindirect has zero len record" 668 res.append(record) 669 assert len(res), "Database._getindirect has zero len res" 670 return res
671
672 - def _altertable(self, tablename, columnstoadd, columnstodel, createindex=0):
673 """Alters the named table by deleting the specified columns, and 674 adding the listed columns 675 676 @param tablename: name of the table to alter 677 @param columnstoadd: a list of (name,type) of the columns to add 678 @param columnstodel: a list name of the columns to delete 679 @param createindex: what sort of index to create. 0 means none, 1 means on just __uid__ and 2 is on all data columns 680 """ 681 # indexes are automatically dropped when table is dropped so we don't need to 682 dbtkeys=[x for x in self.getcolumns(tablename) \ 683 if x[1] not in columnstodel] 684 # clean out cache entry since we are about to invalidate it 685 del self._schemacache[tablename] 686 self.transactionwrite=True 687 cmd=["create", "temporary", "table", idquote("backup_"+tablename), 688 "(", 689 ','.join(['%s %s'%(idquote(n), t) for _,n,t in dbtkeys]), 690 ")"] 691 self.sql(" ".join(cmd)) 692 # copy the values into the temporary table 693 self.sql("insert into %s select %s from %s" % (idquote("backup_"+tablename), 694 ','.join([idquote(n) for _,n,_ in dbtkeys]), 695 idquote(tablename))) 696 # drop the source table 697 self.sql("drop table %s" % (idquote(tablename),)) 698 # recreate the source table with new columns 699 del cmd[1] # remove temporary 700 cmd[2]=idquote(tablename) # change tablename 701 cmd[-2]=','.join(['%s %s'%(idquote(n), t) for _,n,t in dbtkeys]+\ 702 ['%s %s'%(idquote(n), t) for n,t in columnstoadd]) # new list of columns 703 self.sql(" ".join(cmd)) 704 # create index if needed 705 if createindex: 706 if createindex==1: 707 cmd=["create index", idquote("__index__"+tablename), "on", idquote(tablename), "(__uid__)"] 708 elif createindex==2: 709 cmd=["create index", idquote("__index__"+tablename), "on", idquote(tablename), "("] 710 cols=[] 711 for _,n,t in dbtkeys: 712 if not n.startswith("__"): 713 cols.append(idquote(n)) 714 for n,t in columnstoadd: 715 cols.append(idquote(n)) 716 cmd.extend([",".join(cols), ")"]) 717 else: 718 raise ValueError("bad createindex "+`createindex`) 719 self.sql(" ".join(cmd)) 720 # put values back in 721 cmd=["insert into", idquote(tablename), '(', 722 ','.join([idquote(n) for _,n,_ in dbtkeys]), 723 ")", "select * from", idquote("backup_"+tablename)] 724 self.sql(" ".join(cmd)) 725 self.sql("drop table "+idquote("backup_"+tablename))
726 727 @ExclusiveWrapper
728 - def deleteold(self, tablename, uids=None, minvalues=3, maxvalues=5, keepoldest=93):
729 """Deletes old entries from the database. The deletion is based 730 on either criterion of maximum values or age of values matching. 731 732 @param uids: You can limit the items deleted to this list of uids, 733 or None for all entries. 734 @param minvalues: always keep at least this number of values 735 @param maxvalues: maximum values to keep for any entry (you 736 can supply None in which case no old entries will be removed 737 based on how many there are). 738 @param keepoldest: values older than this number of days before 739 now are removed. You can also supply None in which case no 740 entries will be removed based on age. 741 @returns: number of rows removed,number of rows remaining 742 """ 743 if not self.doestableexist(tablename): 744 return (0,0) 745 746 timecutoff=0 747 if keepoldest is not None: 748 timecutoff=time.time()-(keepoldest*24*60*60) 749 if maxvalues is None: 750 maxvalues=sys.maxint-1 751 752 if uids is None: 753 uids=[u[0] for u in self.sql("select distinct __uid__ from %s" % (idquote(tablename),))] 754 755 deleterows=[] 756 757 for uid in uids: 758 deleting=False 759 for count, (rowid, deleted, timestamp) in enumerate( 760 self.sql("select __rowid__,__deleted__, __timestamp__ from %s where __uid__=? order by __rowid__ desc" % (idquote(tablename),), [uid])): 761 if count<minvalues: 762 continue 763 if deleting: 764 deleterows.append(rowid) 765 continue 766 if count>=maxvalues or timestamp<timecutoff: 767 deleting=True 768 if deleted: 769 # we are ok, this is an old value now deleted, so we can remove it 770 deleterows.append(rowid) 771 continue 772 # we don't want to delete current data (which may 773 # be very old and never updated) 774 if count>0: 775 deleterows.append(rowid) 776 continue 777 778 self.sqlmany("delete from %s where __rowid__=?" % (idquote(tablename),), [(r,) for r in deleterows]) 779 780 return len(deleterows), self.sql("select count(*) from "+idquote(tablename)).next()[0]
781 782 @ExclusiveWrapper
783 - def savelist(self, tablename, values):
784 """Just save a list of items (eg categories). There is no versioning or transaction history. 785 786 Internally the table has two fields. One is the actual value and the other indicates if 787 the item is deleted. 788 """ 789 790 # a tuple of the quoted table name 791 tn=(idquote(tablename),) 792 793 if not self.doestableexist(tablename): 794 self.sql("create table %s (__rowid__ integer primary key, item, __deleted__ integer)" % tn) 795 796 # some code to demonstrate my lack of experience with SQL .... 797 delete=[] 798 known=[] 799 revive=[] 800 for row, item, dead in self.sql("select __rowid__,item,__deleted__ from %s" % tn): 801 known.append(item) 802 if item in values: 803 # we need this row 804 if dead: 805 revive.append((row,)) 806 continue 807 if dead: 808 # don't need this entry and it is dead anyway 809 continue 810 delete.append((row,)) 811 create=[(v,) for v in values if v not in known] 812 813 # update table as appropriate 814 self.sqlmany("update %s set __deleted__=0 where __rowid__=?" % tn, revive) 815 self.sqlmany("update %s set __deleted__=1 where __rowid__=?" % tn, delete) 816 self.sqlmany("insert into %s (item, __deleted__) values (?,0)" % tn, create) 817 if __debug__: 818 vdup=values[:] 819 vdup.sort() 820 vv=self.loadlist(tablename) 821 vv.sort() 822 assert vdup==vv
823 824 @ExclusiveWrapper
825 - def loadlist(self, tablename):
826 """Loads a list of items (eg categories)""" 827 if not self.doestableexist(tablename): 828 return [] 829 return [v[0] for v in self.sql("select item from %s where __deleted__=0" % (idquote(tablename),))]
830 831 @ExclusiveWrapper
832 - def getchangescount(self, tablename):
833 """Return the number of additions, deletions, and modifications 834 made to this table over time. 835 Expected fields containted in this table: __timestamp__,__deleted__, 836 __uid__ 837 Assuming that both __rowid__ and __timestamp__ values are both ascending 838 """ 839 if not self.doestableexist(tablename): 840 return {} 841 tn=idquote(tablename) 842 # get the unique dates of changes 843 sql_cmd='select distinct __timestamp__ from %s' % tn 844 # setting up the return dict 845 res={} 846 for t in self.sql(sql_cmd): 847 res[t[0]]={ 'add': 0, 'del': 0, 'mod': 0 } 848 # go through the table and count the changes 849 existing_uid={} 850 sql_cmd='select __timestamp__,__uid__,__deleted__ from %s order by __timestamp__ asc' % tn 851 for e in self.sql(sql_cmd): 852 tt=e[0] 853 uid=e[1] 854 del_flg=e[2] 855 if existing_uid.has_key(uid): 856 if del_flg: 857 res[tt]['del']+=1 858 del existing_uid[uid] 859 else: 860 res[tt]['mod']+=1 861 else: 862 existing_uid[uid]=None 863 res[tt]['add']+=1 864 return res
865
866 -class ModuleBase(object):
867 """Base class to implement a specific Virtual Table module with apsw. 868 For more info: 869 http://www.sqlite.org/cvstrac/wiki/wiki?p=VirtualTables 870 http://www.sqlite.org/cvstrac/wiki/wiki?p=VirtualTableMethods 871 http://www.sqlite.org/cvstrac/wiki/wiki?p=VirtualTableBestIndexMethod 872 """
873 - def __init__(self, field_names):
874 self.connection=None 875 self.table_name=None 876 # the first field is ALWAYS __rowid__ to be consistent with Database 877 self.field_names=('__rowid__',)+field_names
878 - def Create(self, connection, modulename, databasename, vtablename, *args):
879 """Called when the virtual table is created. 880 @param connection: an instance of apsw.Connection 881 @param modulename: string name of the module being invoked 882 @param databasename: string name of this database 883 @param vtablename: string name of this new virtual table 884 @param args: additional arguments sent from the CREATE VIRTUAL TABLE 885 statement 886 @returns: a tuple of 2 values: an sql string describing the table, and 887 an object implementing it: Me! 888 """ 889 self.table_name=vtablename 890 fields=['__rowid__ integer primary key'] 891 for field in self.field_names[1:]: 892 fields.append(idquote(field)+' valueBLOB') 893 fields='(%s)'%','.join(fields) 894 return ('create table %s %s;'%(idquote(vtablename), fields), self)
895
896 - def Connect(self, connection, modulename, databasename, vtablename, *args):
897 """Connect to an existing virtual table, by default it is identical 898 to Create 899 """ 900 return self.Create(connection, modulename, databasename, vtablename, 901 *args)
902
903 - def Destroy(self):
904 """Release a connection to a virtual table and destroy the underlying 905 table implementation. By default, we do nothing. 906 """ 907 pass
908 - def Disconnect(self):
909 """Release a connection to a virtual table. By default, we do nothing. 910 """ 911 pass
912
913 - def BestIndex(self, constraints, orderby):
914 """Provide information on how to best access this table. 915 Must be overriden by subclass. 916 @param constraints: a tuple of (column #, op) defining a constraints 917 @param orderby: a tuple of (column #, desc) defining the order by 918 @returns a tuple of up to 5 values: 919 0: aConstraingUsage: a tuple of the same size as constraints. 920 Each item is either None, argv index(int), or (argv index, omit(Bool)). 921 1: idxNum(int) 922 2: idxStr(string) 923 3: orderByConsumed(Bool) 924 4: estimatedCost(float) 925 """ 926 raise NotImplementedError
927
928 - def Begin(self):
929 pass
930 - def Sync(self):
931 pass
932 - def Commit(self):
933 pass
934 - def Rollback(self):
935 pass
936
937 - def Open(self):
938 """Create/prepare a cursor used for subsequent reading. 939 @returns: the implementor object: Me! 940 """ 941 return self
942 - def Close(self):
943 """Close a cursor previously created by Open 944 By default, do nothing 945 """ 946 pass
947 - def Filter(self, idxNum, idxStr, argv):
948 """Begin a search of a virtual table. 949 @param idxNum: int value passed by BestIndex 950 @param idxStr: string valued passed by BestIndex 951 @param argv: constraint parameters requested by BestIndex 952 @returns: None 953 """ 954 raise NotImplementedError
955 - def Eof(self):
956 """Determines if the current cursor points to a valid row. 957 The Sqlite doc is wrong on this. 958 @returns: True if NOT valid row, False otherwise 959 """ 960 raise NotImplementedError
961 - def Column(self, N):
962 """Find the value for the N-th column of the current row. 963 @param N: the N-th column 964 @returns: value of the N-th column 965 """ 966 raise NotImplementedError
967 - def Next(self):
968 """Move the cursor to the next row. 969 @returns: None 970 """ 971 raise NotImplementedError
972 - def Rowid(self):
973 """Return the rowid of the current row. 974 @returns: the rowid(int) of the current row. 975 """ 976 raise NotImplementedError
977 - def UpdateDeleteRow(self, rowid):
978 """Delete row rowid 979 @param rowid: 980 @returns: None 981 """ 982 raise NotImplementedError
983 - def UpdateInsertRow(self, rowid, fields):
984 """Insert a new row of data into the table 985 @param rowid: if not None, use this rowid. If None, create a new rowid 986 @param fields: a tuple of the field values in the order declared in 987 Create/Connet 988 @returns: rowid of the new row. 989 """ 990 raise NotImplementedError
991 - def UpdateChangeRow(self, rowid, newrowid, fields):
992 """Change the row of the current rowid with the new rowid and new values 993 @param rowid: rowid of the current row 994 @param newrowid: new rowid 995 @param fields: a tuple of the field values in the order declared in 996 Create/Connect 997 @returns: rowid of the new row 998 """ 999 raise NotImplementedError
1000 1001 if __name__=='__main__': 1002 import common 1003 import sys 1004 import time 1005 import os 1006 1007 sys.excepthook=common.formatexceptioneh
1008 1009 1010 # our own hacked version for testing 1011 - class phonebookdataobject(basedataobject):
1012 # no change to _knownproperties (all of ours are list properties) 1013 _knownlistproperties=basedataobject._knownlistproperties.copy() 1014 _knownlistproperties.update( {'names': ['title', 'first', 'middle', 'last', 'full', 'nickname'], 1015 'categories': ['category'], 1016 'emails': ['email', 'type'], 1017 'urls': ['url', 'type'], 1018 'ringtones': ['ringtone', 'use'], 1019 'addresses': ['type', 'company', 'street', 'street2', 'city', 'state', 'postalcode', 'country'], 1020 'wallpapers': ['wallpaper', 'use'], 1021 'flags': ['secret'], 1022 'memos': ['memo'], 1023 'numbers': ['number', 'type', 'speeddial'], 1024 # serials is in parent object 1025 }) 1026 _knowndictproperties=basedataobject._knowndictproperties.copy() 1027 _knowndictproperties.update( {'repeat': ['daily', 'orange']} )
1028 1029 phonebookobjectfactory=dataobjectfactory(phonebookdataobject) 1030 1031 # use the phonebook out of the examples directory 1032 try: 1033 execfile(os.getenv("DBTESTFILE", "examples/phonebook-index.idx")) 1034 except UnicodeError: 1035 common.unicode_execfile(os.getenv("DBTESTFILE", "examples/phonebook-index.idx")) 1036 1037 ensurerecordtype(phonebook, phonebookobjectfactory) 1038 1039 phonebookmaster=phonebook
1040 1041 - def testfunc():
1042 global phonebook, TRACE, db 1043 1044 # note that iterations increases the size of the 1045 # database/journal and will make each one take longer and 1046 # longer as the db/journal gets bigger 1047 if len(sys.argv)>=2: 1048 iterations=int(sys.argv[1]) 1049 else: 1050 iterations=1 1051 if iterations >1: 1052 TRACE=False 1053 1054 db=Database("testdb") 1055 1056 1057 b4=time.time() 1058 1059 1060 for i in xrange(iterations): 1061 phonebook=phonebookmaster.copy() 1062 1063 # write it out 1064 db.savemajordict("phonebook", extractbitpimserials(phonebook)) 1065 1066 # check what we get back is identical 1067 v=db.getmajordictvalues("phonebook") 1068 assert v==extractbitpimserials(phonebook) 1069 1070 # do a deletion 1071 del phonebook[17] # james bond @ microsoft 1072 db.savemajordict("phonebook", extractbitpimserials(phonebook)) 1073 # and verify 1074 v=db.getmajordictvalues("phonebook") 1075 assert v==extractbitpimserials(phonebook) 1076 1077 # modify a value 1078 phonebook[15]['addresses'][0]['city']="Bananarama" 1079 db.savemajordict("phonebook", extractbitpimserials(phonebook)) 1080 # and verify 1081 v=db.getmajordictvalues("phonebook") 1082 assert v==extractbitpimserials(phonebook) 1083 1084 after=time.time() 1085 1086 print "time per iteration is",(after-b4)/iterations,"seconds" 1087 print "total time was",after-b4,"seconds for",iterations,"iterations" 1088 1089 if iterations>1: 1090 print "testing repeated reads" 1091 b4=time.time() 1092 for i in xrange(iterations*10): 1093 db.getmajordictvalues("phonebook") 1094 after=time.time() 1095 print "\ttime per iteration is",(after-b4)/(iterations*10),"seconds" 1096 print "\ttotal time was",after-b4,"seconds for",iterations*10,"iterations" 1097 print 1098 print "testing repeated writes" 1099 x=extractbitpimserials(phonebook) 1100 k=x.keys() 1101 b4=time.time() 1102 for i in xrange(iterations*10): 1103 # we remove 1/3rd of the entries on each iteration 1104 xcopy=x.copy() 1105 for l in range(i,i+len(k)/3): 1106 del xcopy[k[l%len(x)]] 1107 db.savemajordict("phonebook",xcopy) 1108 after=time.time() 1109 print "\ttime per iteration is",(after-b4)/(iterations*10),"seconds" 1110 print "\ttotal time was",after-b4,"seconds for",iterations*10,"iterations"
1111 1112 1113 1114 if len(sys.argv)==3:
1115 # also run under hotspot then 1116 - def profile(filename, command):
1117 import hotshot, hotshot.stats, os 1118 file=os.path.abspath(filename) 1119 profile=hotshot.Profile(file) 1120 profile.run(command) 1121 profile.close() 1122 del profile 1123 howmany=100 1124 stats=hotshot.stats.load(file) 1125 stats.strip_dirs() 1126 stats.sort_stats('time', 'calls') 1127 stats.print_stats(100) 1128 stats.sort_stats('cum', 'calls') 1129 stats.print_stats(100) 1130 stats.sort_stats('calls', 'time') 1131 stats.print_stats(100) 1132 sys.exit(0)
1133 1134 profile("dbprof", "testfunc()") 1135 1136 else: 1137 testfunc() 1138