1
2
3
4
5
6
7
8
9 """Directory cache implementation.
10
11 This module contains the implementation of a cache that uses individual
12 files stored in a dedicated cache directory to store the cached contents.
13 The cache class is L{directory_cache_t} which can be passed to the C{cache}
14 argument of the L{parse()} function.
15 """
16
17 import os, os.path, gzip, md5
18 import cPickle
19 import declarations_cache
20
22 """Entry of the index table in the directory cache index.
23
24 Each cached header file (i.e. each *.cache file) has a corresponding
25 index_entry_t object. This object is used to determine whether the
26 cache file with the declarations is still valid or not.
27
28 This class is a helper class for the directory_cache_t class.
29 """
30
31 - def __init__( self, filesigs, configsig ):
32 """Constructor.
33
34 filesigs is a list of tuples (fileid, sig)...
35 configsig is the signature of the configuration object.
36 """
37 self.filesigs = filesigs
38 self.configsig = configsig
39
40 - def __getstate__(self):
41 return (self.filesigs, self.configsig)
42
43 - def __setstate__(self, state):
44 self.filesigs, self.configsig = state
45
46
48 """Cache class that stores its data as multiple files inside a directory.
49
50 The cache stores one index file called "index.dat" which is always
51 read by the cache when the cache object is created. Each header file
52 will have its corresponding *.cache file that stores the declarations
53 found in the header file. The index file is used to determine whether
54 a *.cache file is still valid or not (by checking if one of the dependent
55 files (i.e. the header file itself and all included files) have been
56 modified since the last run).
57 """
58
59 - def __init__( self, dir="cache", compression=False, md5_sigs=True ):
60 """Constructor.
61
62 dir is the cache directory (it is created if it does not exist).
63 If compression is set to True the cache files will be compressed
64 using gzip.
65 md5_sigs determines whether file modifications is checked by
66 computing a md5 digest or by checking the modification date.
67 """
68 declarations_cache.cache_base_t.__init__(self)
69
70
71 self.__dir = os.path.abspath(dir)
72
73
74 self.__compression = compression
75
76
77
78
79 self.__md5_sigs = md5_sigs
80
81
82 self.__filename_rep = filename_repository_t(self.__md5_sigs)
83
84
85
86
87 self.__index = {}
88
89
90 self.__modified_flag = False
91
92
93 if os.path.isfile(self.__dir):
94 raise ValueError, "Cannot use %s as cache directory. There is already a file with that name."%self.__dir
95
96
97 if os.path.isdir(self.__dir):
98 self._load()
99 else:
100
101 os.mkdir(self.__dir)
102
104 """Save the index table to disk."""
105
106 self._save()
107
108
109 - def update(self, source_file, configuration, declarations, included_files):
110 """Replace a cache entry by a new value.
111
112 @param source_file: Header file name.
113 @type source_file: str
114 @param configuration: Configuration object.
115 @type configuration: L{config_t}
116 @param declarations: Declarations contained in the header file.
117 @type declarations: picklable object
118 @param included_files: Dependent files
119 @type included_files: list of str
120 """
121
122 source_file = os.path.normpath(source_file)
123 included_files = map(lambda p: os.path.normpath(p), included_files)
124
125
126
127 dependent_files = {}
128 for name in [source_file]+included_files:
129 dependent_files[name] = 1
130 dependent_files = dependent_files.keys()
131
132 key = self._create_cache_key(source_file)
133
134
135
136 self._remove_entry(source_file, key)
137
138
139
140
141 filesigs = []
142 for filename in dependent_files:
143 id_,sig = self.__filename_rep.acquire_filename(filename)
144 filesigs.append((id_,sig))
145
146 configsig = self._create_config_signature(configuration)
147 entry = index_entry_t(filesigs, configsig)
148 self.__index[key] = entry
149 self.__modified_flag = True
150
151
152 cachefilename = self._create_cache_filename(source_file)
153 self._write_file(cachefilename, declarations)
154
155
157 """Return the cached declarations or None.
158
159 @param source_file: Header file name
160 @type source_file: str
161 @param configuration: Configuration object
162 @type configuration: L{config_t}
163 @return: Cached declarations or None
164 """
165
166
167 key = self._create_cache_key(source_file)
168 entry = self.__index.get(key)
169 if entry==None:
170
171 return None
172
173
174
175
176
177
178
179
180
181
182
183
184 configsig = self._create_config_signature(configuration)
185 if configsig!=entry.configsig:
186
187 return None
188
189
190 for id_, sig in entry.filesigs:
191 if self.__filename_rep.is_file_modified(id_, sig):
192
193 return None
194
195
196 cachefilename = self._create_cache_filename(source_file)
197 decls = self._read_file(cachefilename)
198
199
200 return decls
201
203 """Load the cache.
204
205 Loads the file index.dat which contains the index table and
206 the file name repository.
207
208 This method is called by the constructor.
209 """
210
211 indexfilename = os.path.join(self.__dir, "index.dat")
212 if os.path.exists(indexfilename):
213 data = self._read_file(indexfilename)
214 self.__index = data[0]
215 self.__filename_rep = data[1]
216 if self.__filename_rep._md5_sigs!=self.__md5_sigs:
217 print "CACHE: Warning: md5_sigs stored in the cache is set to %s."%self.__filename_rep._md5_sigs
218 print " Please remove the cache to change this setting."
219 self.__md5_sigs = self.__filename_rep._md5_sigs
220 else:
221 self.__index = {}
222 self.__filename_rep = filename_repository_t(self.__md5_sigs)
223
224 self.__modified_flag = False
225
227 """Save the cache index if it was modified.
228
229 Saves the index table and the file name repository in the file
230 index.dat.
231 """
232 if self.__modified_flag:
233 self.__filename_rep.update_id_counter()
234 indexfilename = os.path.join(self.__dir, "index.dat")
235 self._write_file(indexfilename, (self.__index,self.__filename_rep))
236 self.__modified_flag = False
237
239 """Read a Python object from a cache file.
240
241 Reads a pickled object from disk and returns it.
242
243 @param filename: Name of the file that should be read.
244 @type filename: str
245 @returns: Unpickled file contents
246 """
247 if self.__compression:
248 f = gzip.GzipFile(filename, "rb")
249 else:
250 f = file(filename, "rb")
251 res = cPickle.load(f)
252 f.close()
253 return res
254
256 """Write a data item into a file.
257
258 The data object is written to a file using the pickle mechanism.
259
260 @param filename: Output file name
261 @type filename: str
262 @param data: A Python object that will be pickled
263 @type data: picklable object
264 """
265 if self.__compression:
266 f = gzip.GzipFile(filename, "wb")
267 else:
268 f = file(filename, "wb")
269 cPickle.dump(data, f, cPickle.HIGHEST_PROTOCOL)
270 f.close()
271
272 - def _remove_entry(self, source_file, key):
273 """Remove an entry from the cache.
274
275 source_file is the name of the header and key is its corresponding
276 cache key (obtained by a call to L{_create_cache_key()}).
277 The entry is removed from the index table, any referenced file
278 name is released and the cache file is deleted.
279
280 If key references a non-existing entry, the method returns
281 immediately.
282
283 @param source_file: Header file name
284 @type source_file: str
285 @param key: Key value for the specified header file
286 @type key: hashable object
287 """
288
289 entry = self.__index.get(key)
290 if entry==None:
291 return
292
293
294 for id_, sig in entry.filesigs:
295 self.__filename_rep.release_filename(id_)
296
297
298 del self.__index[key]
299 self.__modified_flag = True
300
301
302 cachefilename = self._create_cache_filename(source_file)
303 try:
304 os.remove(cachefilename)
305 except OSError, e:
306 print "Could not remove cache file (%s)"%e
307
308
310 """Return the cache key for a header file.
311
312 @param source_file: Header file name
313 @type source_file: str
314 @returns: Key for the given header file
315 @rtype: str
316 """
317 path, name = os.path.split(source_file)
318 return name+str(hash(path))
319
321 """Return the cache file name for a header file.
322
323 @param source_file: Header file name
324 @type source_file: str
325 @returns: Cache file name (*.cache)
326 @rtype: str
327 """
328 res = self._create_cache_key(source_file)+".cache"
329 return os.path.join(self.__dir, res)
330
350
351
352
353
355 """This is a record stored in the filename_repository_t class.
356
357 The class is an internal class used in the implementation of the
358 filename_repository_t class and it just serves as a container for
359 the file name and the reference count.
360 """
361
362 - def __init__( self, filename ):
363 """Constructor.
364
365 The reference count is initially set to 0.
366 """
367
368 self.filename = filename
369
370 self.refcount = 0
371
372
373
374
375
376 self.sig_valid = False
377 self.signature = None
378
379 - def __getstate__(self):
380
381 return (self.filename, self.refcount)
382
383 - def __setstate__(self, state):
384 self.filename, self.refcount = state
385 self.sig_valid = False
386 self.signature = None
387
388 - def inc_ref_count(self):
389 """Increase the reference count by 1."""
390 self.refcount += 1
391
392 - def dec_ref_count(self):
393 """Decrease the reference count by 1 and return the new count."""
394 self.refcount -= 1
395 return self.refcount
396
397
399 """File name repository.
400
401 This class stores file names and can check whether a file has been
402 modified or not since a previous call.
403 A file name is stored by calling acquire_filename() which returns
404 an ID and a signature of the file. The signature can later be used
405 to check if the file was modified by calling is_file_modified().
406 If the file name is no longer required release_filename() should be
407 called so that the entry can be removed from the repository.
408 """
409
411 """Constructor.
412 """
413
414
415
416
417 self._md5_sigs = md5_sigs
418
419
420 self.__id_lut = {}
421
422
423
424
425
426
427 self.__entries = {}
428
429
430 self.__next_id = 1
431
433 """Acquire a file name and return its id and its signature.
434 """
435 id_ = self.__id_lut.get(name)
436
437 if id_==None:
438
439 id_ = self.__next_id
440 self.__next_id += 1
441 self.__id_lut[name] = id_
442 entry = filename_entry_t(name)
443 self.__entries[id_] = entry
444 else:
445
446 entry = self.__entries[id_]
447
448 entry.inc_ref_count()
449 return id_, self._get_signature(entry)
450
452 """Release a file name.
453 """
454 entry = self.__entries.get(id_)
455 if entry==None:
456 raise ValueError, "Invalid filename id (%d)"%id_
457
458
459 if entry.dec_ref_count()==0:
460 del self.__entries[id_]
461 del self.__id_lut[entry.filename]
462
464 """Check if the file referred to by id_ has been modified.
465 """
466 entry = self.__entries.get(id_)
467 if entry==None:
468 raise ValueError, "Invalid filename id_ (%d)"%id_
469
470
471 if entry.sig_valid:
472
473 filesig = entry.signature
474 else:
475
476 filesig = self._get_signature(entry)
477 entry.signature = filesig
478 entry.sig_valid = True
479
480 return filesig!=signature
481
483 """Update the id_ counter so that it doesn't grow forever.
484 """
485 if len(self.__entries)==0:
486 self.__next_id = 1
487 else:
488 self.__next_id = max(self.__entries.keys())+1
489
491 """Return the signature of the file stored in entry.
492 """
493 if self._md5_sigs:
494
495 if not os.path.exists(entry.filename):
496 return None
497 try:
498 f = file(entry.filename)
499 except IOError, e:
500 print "Cannot determine md5 digest:",e
501 return None
502 data = f.read()
503 f.close()
504 return md5.new(data).digest()
505 else:
506
507 try:
508 return os.path.getmtime(entry.filename)
509 except OSError, e:
510 return None
511
513 """Dump contents for debugging/testing.
514 """
515
516 print 70*"-"
517 print "ID lookup table:"
518 for name in self.__id_lut:
519 id_ = self.__id_lut[name]
520 print " %s -> %d"%(name, id_)
521
522 print 70*"-"
523 print "%-4s %-60s %s"%("ID", "Filename", "Refcount")
524 print 70*"-"
525 for id_ in self.__entries:
526 entry = self.__entries[id_]
527 print "%04d %-60s %d"%(id_, entry.filename, entry.refcount)
528