1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 """
39 Implements the standard 'collect' action.
40 @sort: executeCollect
41 @author: Kenneth J. Pronovici <pronovic@ieee.org>
42 """
43
44
45
46
47
48
49
50 import os
51 import logging
52 import pickle
53
54
55 from CedarBackup2.filesystem import BackupFileList, FilesystemList
56 from CedarBackup2.util import isStartOfWeek, changeOwnership, displayBytes, buildNormalizedPath
57 from CedarBackup2.actions.constants import DIGEST_EXTENSION, COLLECT_INDICATOR
58 from CedarBackup2.actions.util import writeIndicatorFile
59
60
61
62
63
64
65 logger = logging.getLogger("CedarBackup2.log.actions.collect")
66
67
68
69
70
71
72
73
74
75
76
78 """
79 Executes the collect backup action.
80
81 @note: When the collect action is complete, we will write a collect
82 indicator to the collect directory, so it's obvious that the collect action
83 has completed. The stage process uses this indicator to decide whether a
84 peer is ready to be staged.
85
86 @param configPath: Path to configuration file on disk.
87 @type configPath: String representing a path on disk.
88
89 @param options: Program command-line options.
90 @type options: Options object.
91
92 @param config: Program configuration.
93 @type config: Config object.
94
95 @raise ValueError: Under many generic error conditions
96 @raise TarError: If there is a problem creating a tar file
97 """
98 logger.debug("Executing the 'collect' action.")
99 if config.options is None or config.collect is None:
100 raise ValueError("Collect configuration is not properly filled in.")
101 if ((config.collect.collectFiles is None or len(config.collect.collectFiles) < 1) and
102 (config.collect.collectDirs is None or len(config.collect.collectDirs) < 1)):
103 raise ValueError("There must be at least one collect file or collect directory.")
104 fullBackup = options.full
105 logger.debug("Full backup flag is [%s]", fullBackup)
106 todayIsStart = isStartOfWeek(config.options.startingDay)
107 resetDigest = fullBackup or todayIsStart
108 logger.debug("Reset digest flag is [%s]", resetDigest)
109 if config.collect.collectFiles is not None:
110 for collectFile in config.collect.collectFiles:
111 logger.debug("Working with collect file [%s]", collectFile.absolutePath)
112 collectMode = _getCollectMode(config, collectFile)
113 archiveMode = _getArchiveMode(config, collectFile)
114 digestPath = _getDigestPath(config, collectFile.absolutePath)
115 tarfilePath = _getTarfilePath(config, collectFile.absolutePath, archiveMode)
116 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
117 logger.debug("File meets criteria to be backed up today.")
118 _collectFile(config, collectFile.absolutePath, tarfilePath,
119 collectMode, archiveMode, resetDigest, digestPath)
120 else:
121 logger.debug("File will not be backed up, per collect mode.")
122 logger.info("Completed collecting file [%s]", collectFile.absolutePath)
123 if config.collect.collectDirs is not None:
124 for collectDir in config.collect.collectDirs:
125 logger.debug("Working with collect directory [%s]", collectDir.absolutePath)
126 collectMode = _getCollectMode(config, collectDir)
127 archiveMode = _getArchiveMode(config, collectDir)
128 ignoreFile = _getIgnoreFile(config, collectDir)
129 linkDepth = _getLinkDepth(collectDir)
130 dereference = _getDereference(collectDir)
131 recursionLevel = _getRecursionLevel(collectDir)
132 (excludePaths, excludePatterns) = _getExclusions(config, collectDir)
133 if fullBackup or (collectMode in ['daily', 'incr', ]) or (collectMode == 'weekly' and todayIsStart):
134 logger.debug("Directory meets criteria to be backed up today.")
135 _collectDirectory(config, collectDir.absolutePath,
136 collectMode, archiveMode, ignoreFile, linkDepth, dereference,
137 resetDigest, excludePaths, excludePatterns, recursionLevel)
138 else:
139 logger.debug("Directory will not be backed up, per collect mode.")
140 logger.info("Completed collecting directory [%s]", collectDir.absolutePath)
141 writeIndicatorFile(config.collect.targetDir, COLLECT_INDICATOR,
142 config.options.backupUser, config.options.backupGroup)
143 logger.info("Executed the 'collect' action successfully.")
144
145
146
147
148
149
150
151
152
153
154 -def _collectFile(config, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
155 """
156 Collects a configured collect file.
157
158 The indicated collect file is collected into the indicated tarfile.
159 For files that are collected incrementally, we'll use the indicated
160 digest path and pay attention to the reset digest flag (basically, the reset
161 digest flag ignores any existing digest, but a new digest is always
162 rewritten).
163
164 The caller must decide what the collect and archive modes are, since they
165 can be on both the collect configuration and the collect file itself.
166
167 @param config: Config object.
168 @param absolutePath: Absolute path of file to collect.
169 @param tarfilePath: Path to tarfile that should be created.
170 @param collectMode: Collect mode to use.
171 @param archiveMode: Archive mode to use.
172 @param resetDigest: Reset digest flag.
173 @param digestPath: Path to digest file on disk, if needed.
174 """
175 backupList = BackupFileList()
176 backupList.addFile(absolutePath)
177 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
178
179
180
181
182
183
184 -def _collectDirectory(config, absolutePath, collectMode, archiveMode,
185 ignoreFile, linkDepth, dereference, resetDigest,
186 excludePaths, excludePatterns, recursionLevel):
187 """
188 Collects a configured collect directory.
189
190 The indicated collect directory is collected into the indicated tarfile.
191 For directories that are collected incrementally, we'll use the indicated
192 digest path and pay attention to the reset digest flag (basically, the reset
193 digest flag ignores any existing digest, but a new digest is always
194 rewritten).
195
196 The caller must decide what the collect and archive modes are, since they
197 can be on both the collect configuration and the collect directory itself.
198
199 @param config: Config object.
200 @param absolutePath: Absolute path of directory to collect.
201 @param collectMode: Collect mode to use.
202 @param archiveMode: Archive mode to use.
203 @param ignoreFile: Ignore file to use.
204 @param linkDepth: Link depth value to use.
205 @param dereference: Dereference flag to use.
206 @param resetDigest: Reset digest flag.
207 @param excludePaths: List of absolute paths to exclude.
208 @param excludePatterns: List of patterns to exclude.
209 @param recursionLevel: Recursion level (zero for no recursion)
210 """
211 if recursionLevel == 0:
212
213 logger.info("Collecting directory [%s]", absolutePath)
214 tarfilePath = _getTarfilePath(config, absolutePath, archiveMode)
215 digestPath = _getDigestPath(config, absolutePath)
216
217 backupList = BackupFileList()
218 backupList.ignoreFile = ignoreFile
219 backupList.excludePaths = excludePaths
220 backupList.excludePatterns = excludePatterns
221 backupList.addDirContents(absolutePath, linkDepth=linkDepth, dereference=dereference)
222
223 _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath)
224 else:
225
226 subdirs = FilesystemList()
227 subdirs.excludeFiles = True
228 subdirs.excludeLinks = True
229 subdirs.excludePaths = excludePaths
230 subdirs.excludePatterns = excludePatterns
231 subdirs.addDirContents(path=absolutePath, recursive=False, addSelf=False)
232
233
234 for subdir in subdirs:
235 _collectDirectory(config, subdir, collectMode, archiveMode,
236 ignoreFile, linkDepth, dereference, resetDigest,
237 excludePaths, excludePatterns, recursionLevel-1)
238 excludePaths.append(subdir)
239
240
241 _collectDirectory(config, absolutePath, collectMode, archiveMode,
242 ignoreFile, linkDepth, dereference, resetDigest,
243 excludePaths, excludePatterns, 0)
244
245
246
247
248
249
250 -def _executeBackup(config, backupList, absolutePath, tarfilePath, collectMode, archiveMode, resetDigest, digestPath):
251 """
252 Execute the backup process for the indicated backup list.
253
254 This function exists mainly to consolidate functionality between the
255 L{_collectFile} and L{_collectDirectory} functions. Those functions build
256 the backup list; this function causes the backup to execute properly and
257 also manages usage of the digest file on disk as explained in their
258 comments.
259
260 For collect files, the digest file will always just contain the single file
261 that is being backed up. This might little wasteful in terms of the number
262 of files that we keep around, but it's consistent and easy to understand.
263
264 @param config: Config object.
265 @param backupList: List to execute backup for
266 @param absolutePath: Absolute path of directory or file to collect.
267 @param tarfilePath: Path to tarfile that should be created.
268 @param collectMode: Collect mode to use.
269 @param archiveMode: Archive mode to use.
270 @param resetDigest: Reset digest flag.
271 @param digestPath: Path to digest file on disk, if needed.
272 """
273 if collectMode != 'incr':
274 logger.debug("Collect mode is [%s]; no digest will be used.", collectMode)
275 if len(backupList) == 1 and backupList[0] == absolutePath:
276 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize()))
277 else:
278 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize()))
279 if len(backupList) > 0:
280 backupList.generateTarfile(tarfilePath, archiveMode, True)
281 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup)
282 else:
283 if resetDigest:
284 logger.debug("Based on resetDigest flag, digest will be cleared.")
285 oldDigest = {}
286 else:
287 logger.debug("Based on resetDigest flag, digest will loaded from disk.")
288 oldDigest = _loadDigest(digestPath)
289 (removed, newDigest) = backupList.removeUnchanged(oldDigest, captureDigest=True)
290 logger.debug("Removed %d unchanged files based on digest values.", removed)
291 if len(backupList) == 1 and backupList[0] == absolutePath:
292 logger.info("Backing up file [%s] (%s).", absolutePath, displayBytes(backupList.totalSize()))
293 else:
294 logger.info("Backing up %d files in [%s] (%s).", len(backupList), absolutePath, displayBytes(backupList.totalSize()))
295 if len(backupList) > 0:
296 backupList.generateTarfile(tarfilePath, archiveMode, True)
297 changeOwnership(tarfilePath, config.options.backupUser, config.options.backupGroup)
298 _writeDigest(config, newDigest, digestPath)
299
300
301
302
303
304
306 """
307 Loads the indicated digest path from disk into a dictionary.
308
309 If we can't load the digest successfully (either because it doesn't exist or
310 for some other reason), then an empty dictionary will be returned - but the
311 condition will be logged.
312
313 @param digestPath: Path to the digest file on disk.
314
315 @return: Dictionary representing contents of digest path.
316 """
317 if not os.path.isfile(digestPath):
318 digest = {}
319 logger.debug("Digest [%s] does not exist on disk.", digestPath)
320 else:
321 try:
322 digest = pickle.load(open(digestPath, "r"))
323 logger.debug("Loaded digest [%s] from disk: %d entries.", digestPath, len(digest))
324 except:
325 digest = {}
326 logger.error("Failed loading digest [%s] from disk.", digestPath)
327 return digest
328
329
330
331
332
333
335 """
336 Writes the digest dictionary to the indicated digest path on disk.
337
338 If we can't write the digest successfully for any reason, we'll log the
339 condition but won't throw an exception.
340
341 @param config: Config object.
342 @param digest: Digest dictionary to write to disk.
343 @param digestPath: Path to the digest file on disk.
344 """
345 try:
346 pickle.dump(digest, open(digestPath, "w"))
347 changeOwnership(digestPath, config.options.backupUser, config.options.backupGroup)
348 logger.debug("Wrote new digest [%s] to disk: %d entries.", digestPath, len(digest))
349 except:
350 logger.error("Failed to write digest [%s] to disk.", digestPath)
351
352
353
354
355
356
357
358
359
360
362 """
363 Gets the collect mode that should be used for a collect directory or file.
364 If possible, use the one on the file or directory, otherwise take from collect section.
365 @param config: Config object.
366 @param item: C{CollectFile} or C{CollectDir} object
367 @return: Collect mode to use.
368 """
369 if item.collectMode is None:
370 collectMode = config.collect.collectMode
371 else:
372 collectMode = item.collectMode
373 logger.debug("Collect mode is [%s]", collectMode)
374 return collectMode
375
376
377
378
379
380
382 """
383 Gets the archive mode that should be used for a collect directory or file.
384 If possible, use the one on the file or directory, otherwise take from collect section.
385 @param config: Config object.
386 @param item: C{CollectFile} or C{CollectDir} object
387 @return: Archive mode to use.
388 """
389 if item.archiveMode is None:
390 archiveMode = config.collect.archiveMode
391 else:
392 archiveMode = item.archiveMode
393 logger.debug("Archive mode is [%s]", archiveMode)
394 return archiveMode
395
396
397
398
399
400
402 """
403 Gets the ignore file that should be used for a collect directory or file.
404 If possible, use the one on the file or directory, otherwise take from collect section.
405 @param config: Config object.
406 @param item: C{CollectFile} or C{CollectDir} object
407 @return: Ignore file to use.
408 """
409 if item.ignoreFile is None:
410 ignoreFile = config.collect.ignoreFile
411 else:
412 ignoreFile = item.ignoreFile
413 logger.debug("Ignore file is [%s]", ignoreFile)
414 return ignoreFile
415
416
417
418
419
420
422 """
423 Gets the link depth that should be used for a collect directory.
424 If possible, use the one on the directory, otherwise set a value of 0 (zero).
425 @param item: C{CollectDir} object
426 @return: Link depth to use.
427 """
428 if item.linkDepth is None:
429 linkDepth = 0
430 else:
431 linkDepth = item.linkDepth
432 logger.debug("Link depth is [%d]", linkDepth)
433 return linkDepth
434
435
436
437
438
439
441 """
442 Gets the dereference flag that should be used for a collect directory.
443 If possible, use the one on the directory, otherwise set a value of False.
444 @param item: C{CollectDir} object
445 @return: Dereference flag to use.
446 """
447 if item.dereference is None:
448 dereference = False
449 else:
450 dereference = item.dereference
451 logger.debug("Dereference flag is [%s]", dereference)
452 return dereference
453
454
455
456
457
458
472
473
474
475
476
477
479 """
480 Gets the digest path associated with a collect directory or file.
481 @param config: Config object.
482 @param absolutePath: Absolute path to generate digest for
483 @return: Absolute path to the digest associated with the collect directory or file.
484 """
485 normalized = buildNormalizedPath(absolutePath)
486 filename = "%s.%s" % (normalized, DIGEST_EXTENSION)
487 digestPath = os.path.join(config.options.workingDir, filename)
488 logger.debug("Digest path is [%s]", digestPath)
489 return digestPath
490
491
492
493
494
495
497 """
498 Gets the tarfile path (including correct extension) associated with a collect directory.
499 @param config: Config object.
500 @param absolutePath: Absolute path to generate tarfile for
501 @param archiveMode: Archive mode to use for this tarfile.
502 @return: Absolute path to the tarfile associated with the collect directory.
503 """
504 if archiveMode == 'tar':
505 extension = "tar"
506 elif archiveMode == 'targz':
507 extension = "tar.gz"
508 elif archiveMode == 'tarbz2':
509 extension = "tar.bz2"
510 normalized = buildNormalizedPath(absolutePath)
511 filename = "%s.%s" % (normalized, extension)
512 tarfilePath = os.path.join(config.collect.targetDir, filename)
513 logger.debug("Tarfile path is [%s]", tarfilePath)
514 return tarfilePath
515
516
517
518
519
520
555