Attachment 'pycdb.py'
Download 1 """
2 pycdb.py - Python implementation of cdb
3
4 by Yusuke Shinyama
5 * public domain *
6
7 Coding style fixes (and tcdb removal) by Johannes Berg
8 """
9
10 import os
11 from struct import pack, unpack
12 from array import array
13
14
15 def cdbhash(s, n=0L):
16 """calc hash value with a given key"""
17 return reduce(lambda h, c: ((h * 33) ^ ord(c)) & 0xffffffffL, s, n + 5381L)
18
19 if pack('=i', 1) == pack('>i', 1):
20 def decode(x):
21 a = array('I', x)
22 a.byteswap()
23 return a
24 def encode(a):
25 a.byteswap()
26 return a.tostring()
27 else:
28 def decode(x):
29 a = array('I', x)
30 return a
31 def encode(a):
32 return a.tostring()
33
34
35 def cdbiter(fp, eod):
36 kloc = 2048
37 while kloc < eod:
38 fp.seek(kloc)
39 (klen, vlen) = unpack('<II', fp.read(8))
40 k = fp.read(klen)
41 v = fp.read(vlen)
42 kloc += 8 + klen + vlen
43 yield (k, v)
44 fp.close()
45
46
47 class CDBReader:
48 def __init__(self, cdbname, docache=1):
49 self.name = cdbname
50 self._fp = file(cdbname, 'rb')
51 hash0 = decode(self._fp.read(2048))
52 self._hash0 = [ (hash0[i], hash0[i+1]) for i in xrange(0, 512, 2) ]
53 self._hash1 = [ None ] * 256
54 self._eod = hash0[0]
55 self._docache = docache
56 self._cache = {}
57 self._keyiter = None
58 self._eachiter = None
59
60 def __getstate__(self):
61 raise TypeError
62
63 def __setstate__(self, dict):
64 raise TypeError
65
66 def __getitem__(self, k):
67 k = str(k)
68 if k in self._cache:
69 return self._cache[k]
70 h = cdbhash(k)
71 h1 = h & 0xff
72 (pos_bucket, ncells) = self._hash0[h1]
73 if ncells == 0:
74 raise KeyError(k)
75 hs = self._hash1[h1]
76 if hs == None:
77 self._fp.seek(pos_bucket)
78 hs = decode(self._fp.read(ncells * 8))
79 self._hash1[h1] = hs
80 i = ((h >> 8) % ncells) * 2
81 n = ncells * 2
82 for _ in xrange(ncells):
83 p1 = hs[i + 1]
84 if p1 == 0: raise KeyError(k)
85 if hs[i] == h:
86 self._fp.seek(p1)
87 (klen, vlen) = unpack('<II', self._fp.read(8))
88 k1 = self._fp.read(klen)
89 if k1 == k:
90 v1 = self._fp.read(vlen)
91 if self._docache:
92 self._cache[k] = v1
93 return v1
94 i = (i + 2) % n
95 raise KeyError(k)
96
97 def get(self, k, failed=None):
98 try:
99 return self.__getitem__(k)
100 except KeyError:
101 return failed
102
103 def has_key(self, k):
104 try:
105 self.__getitem__(k)
106 return True
107 except KeyError:
108 return False
109
110 def __contains__(self, k):
111 return self.has_key(k)
112
113 def firstkey(self):
114 self._keyiter = None
115 return self.nextkey()
116
117 def nextkey(self):
118 if not self._keyiter:
119 self._keyiter = ( k for (k, v) in cdbiter(self._fp, self._eod) )
120 try:
121 return self._keyiter.next()
122 except StopIteration:
123 return None
124
125 def each(self):
126 if not self._eachiter:
127 self._eachiter = cdbiter(self._fp, self._eod)
128 try:
129 return self._eachiter.next()
130 except StopIteration:
131 return None
132
133 def iterkeys(self):
134 return ( k for (k, v) in cdbiter(self._fp, self._eod) )
135
136 def itervalues(self):
137 return ( v for (k, v) in cdbiter(self._fp, self._eod) )
138
139 def iteritems(self):
140 return cdbiter(self._fp, self._eod)
141
142
143 class CDBMaker:
144 def __init__(self, cdbname, tmpname):
145 self.fn = cdbname
146 self.fntmp = tmpname
147 self.numentries = 0
148 self._fp = file(tmpname, 'wb')
149 self._pos = 2048
150 self._bucket = [ array('I') for _ in xrange(256) ]
151
152 def __len__(self):
153 return self.numentries
154
155 def __getstate__(self):
156 raise TypeError
157
158 def __setstate__(self, dict):
159 raise TypeError
160
161 def add(self, k, v):
162 (k, v) = (str(k), str(v))
163 (klen, vlen) = (len(k), len(v))
164 self._fp.seek(self._pos)
165 self._fp.write(pack('<II', klen, vlen))
166 self._fp.write(k)
167 self._fp.write(v)
168 h = cdbhash(k)
169 b = self._bucket[h % 256]
170 b.append(h)
171 b.append(self._pos)
172 # sizeof(keylen)+sizeof(datalen)+sizeof(key)+sizeof(data)
173 self._pos += 8+klen+vlen
174 self.numentries += 1
175 return self
176
177 def finish(self):
178 self._fp.seek(self._pos)
179 pos_hash = self._pos
180 # write hashes
181 for b1 in self._bucket:
182 if not b1: continue
183 blen = len(b1)
184 a = array('I', [0] * blen * 2)
185 for j in xrange(0, blen, 2):
186 (h, p) = (b1[j], b1[j+1])
187 i = ((h >> 8) % blen) * 2
188 while a[i + 1]:
189 i = (i + 2) % len(a)
190 a[i] = h
191 a[i + 1] = p
192 self._fp.write(encode(a))
193 # write header
194 self._fp.seek(0)
195 a = array('I')
196 for b1 in self._bucket:
197 a.append(pos_hash)
198 a.append(len(b1))
199 pos_hash += len(b1)*8
200 self._fp.write(encode(a))
201 self._fp.close()
202 os.rename(self.fntmp, self.fn)
203
204 cdbmake = CDBMaker
205 init = CDBReader
Attached Files
To refer to attachments on a page, use attachment:filename, as shown below in the list of files. Do NOT use the URL of the [get] link, since this is subject to change and can break easily.You are not allowed to attach a file to this page.