PyXR

c:\projects\bitpim\src \ sqlite2_file.py



0001 ### BITPIM
0002 ###
0003 ### Copyright (C) 2006 Joe Pham <djpham@bitpim.org>
0004 ###
0005 ### This program is free software; you can redistribute it and/or modify
0006 ### it under the terms of the BitPim license as detailed in the LICENSE file.
0007 ###
0008 ### $Id: sqlite2_file.py 3460 2006-07-08 23:55:09Z djpham $
0009 
0010 """Handle reading data off an SQLit2 2.x data file"""
0011 
0012 # System modules
0013 import struct
0014 
0015 # SQLite2 constants
0016 # byte order format
0017 BO='<'  # default to LE
0018 signature='** This file contains an SQLite 2.1 database **\x00'
0019 signature_len=len(signature)
0020 LE_string='\x28\x75\xe3\xda'
0021 BE_string='\xda\xe3\x75\x28'
0022 Page_Length=1024
0023 Max_Cell_Data_Len=238
0024 
0025 # SQLite 2.x file handling stuff------------------------------------------------
0026 class InvalidFile(Exception):
0027     def __init__(self, bad_sig):
0028         global signature
0029         Exception.__init__(self, 'Invalid signature: expecting %s, got %s'%(signature, bad_sig))
0030         self.bad_sig=bad_sig
0031 
0032 class InvalidByteOrder(Exception):
0033     def __init__(self, bad_data):
0034         Exception.__init__(self, 'Invalid Byte Order String: %s'%bad_data)
0035         self.bad_data=bad_data
0036 
0037 class BadTable(Exception):
0038     def __init__(self, name):
0039         Exception.__init__(self, 'Failed to find table: %s'%name)
0040         self.name=name
0041 
0042 class Cell(object):
0043     def __init__(self, data):
0044         global BO, Max_Cell_Data_Len
0045         self.prev_key=struct.unpack('%cI'%BO, data[:4])[0]
0046         _key_size=struct.unpack('%cH'%BO, data[4:6])[0]
0047         self.next_cell=struct.unpack('%cH'%BO, data[6:8])[0]
0048         _keysize_hi=struct.unpack('B', data[8])[0]
0049         _datasize_hi=struct.unpack('B', data[9])[0]
0050         _data_size=struct.unpack('%cH'%BO, data[10:12])[0]
0051         _key_size+=_keysize_hi<<16
0052         _data_size+=_datasize_hi<<16
0053         self.key=struct.unpack('%cI'%BO, data[12:16])
0054         if _data_size>Max_Cell_Data_Len:
0055             self.data=data[16:250]
0056             self.overflow_page=struct.unpack('%cI'%BO, data[250:254])[0]
0057         else:
0058             self.data=data[16:16+_data_size]
0059             self.overflow_page=0
0060         self.data_size=_data_size
0061 
0062     def get_data(self, page, db_file):
0063         global Max_Cell_Data_Len
0064         _res=[]
0065         if self.prev_key:
0066             _res=db_file.get_data(self.prev_key)
0067         _my_data=self.data
0068         if self.overflow_page:
0069             _my_data+=db_file.get_data(self.overflow_page,
0070                                        self.data_size-Max_Cell_Data_Len)
0071         _res.append(_my_data)
0072         if self.next_cell:
0073             _res+=page.get_cell_data(self.next_cell, db_file)
0074         return _res
0075 
0076 class Page(object):
0077     def __init__(self, data):
0078         global BO, Page_Length
0079         self.prev_key=struct.unpack('%cI'%BO, data[:4])[0]
0080         self.cell0=struct.unpack('%cH'%BO, data[4:6])[0]
0081         self.freeblock=struct.unpack('%cH'%BO, data[6:8])[0]
0082         self.data=data[:Page_Length]
0083 
0084     def get_cell_data(self, offset, db_file):
0085         return Cell(self.data[offset:]).get_data(self, db_file)
0086                       
0087     def get_data(self, db_file, _=None):
0088         # return the payload of this page
0089         _res=[]
0090         if self.prev_key:
0091             _res=db_file.get_data(self.prev_key)
0092         _res+=self.get_cell_data(self.cell0, db_file)
0093         return _res
0094 
0095 class Page1(object):
0096     def __init__(self, data):
0097         global signature, BO, signature_len, LE_string, BE_string
0098         _sig=data[:signature_len]
0099         if _sig!=signature:
0100             raise InvalidFile(_sig)
0101         _idx=signature_len
0102         _bo_string=data[_idx:_idx+4]
0103         if _bo_string==LE_string:
0104             BO='<'
0105         elif _bo_string==BE_string:
0106             BO='>'
0107         else:
0108             raise InvalidByteOrder(_bo_string)
0109         _idx+=4
0110         self.first_free_page=struct.unpack('%cI'%BO, data[_idx:_idx+4])[0]
0111         _idx+=4
0112         self.freelist_pages=struct.unpack('%cI'%BO, data[_idx:_idx+4])[0]
0113 
0114 class OverflowPage(object):
0115     Max_Len=1020
0116     def __init__(self, data):
0117         global BO, Page_Length
0118         self.next=struct.unpack('%cI'%BO, data[:4])[0]
0119         self.data=data[4:Page_Length]
0120 
0121     def get_data(self, db_file, data_size=None):
0122         if data_size:
0123             if data_size>self.Max_Len:
0124                 _res=self.data
0125                 if self.next:
0126                     _res+=db_file.get_data(self.next, data_size-self.Max_Len)
0127             else:
0128                 _res=self.data[:data_size]
0129         else:
0130             _res=self.data
0131         return _res
0132 
0133 class DBFile(object):
0134     def __init__(self, data):
0135         self.data=data
0136         self.page1=Page1(data)
0137         self.tables=self._get_tables()
0138 
0139     def get_data(self, page_num, data_size=None):
0140         global Page_Length
0141         if not page_num:
0142             return []
0143         # We cheat here a bit since we know the page length is 1k
0144 ##        _pg_ofs=(page_num-1)*Page_Length
0145         _pg_ofs=(page_num-1)<<10
0146         if data_size:
0147             return OverflowPage(self.data[_pg_ofs:_pg_ofs+Page_Length]).get_data(self,
0148                                                                                  data_size)
0149         return Page(self.data[_pg_ofs:_pg_ofs+Page_Length]).get_data(self)
0150 
0151     def extract_data(self, data, numofcols):
0152         # extract one string of data in to a list of numofcols
0153         _data_len=len(data)
0154         if _data_len<256:
0155             _ofs_size=1
0156         elif _data_len<65536:
0157             _ofs_size=2
0158         else:
0159             _ofs_size=3
0160         _idx=(numofcols+1)*_ofs_size
0161         return data[_idx:].split('\x00')[:numofcols]
0162 
0163     def _get_tables(self):
0164         # build a list of tables in this DB
0165         _res={}
0166         # the master table is stored in page 2, and has 5 fields
0167         for _entry in self.get_data(2):
0168             _row=self.extract_data(_entry, 5)
0169             if _row[0]=='table':
0170                 _res[_row[1]]={ 'name': _row[1],
0171                                 'page': int(_row[3]),
0172                                 'numofcols': _row[4].count(',')+1 }
0173         return _res
0174 
0175     def get_table_data(self, table_name):
0176         # return a list of rows of this table, each row is a list of values
0177         if not self.tables.has_key(table_name):
0178             raise BadTable(table_name)
0179         _page=self.tables[table_name]['page']
0180         _numofcols=self.tables[table_name]['numofcols']
0181         _res=[]
0182         for _entry in self.get_data(_page):
0183             _res.append(self.extract_data(_entry, _numofcols))
0184         return _res
0185 

Generated by PyXR 0.9.4