Package CedarBackup2 :: Package tools :: Module amazons3
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.tools.amazons3

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2014 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python (>= 2.5) 
  29  # Project  : Cedar Backup, release 2 
  30  # Revision : $Id: amazons3.py 1086 2014-10-07 22:29:07Z pronovic $ 
  31  # Purpose  : Cedar Backup tool to synchronize an Amazon S3 bucket. 
  32  # 
  33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  34   
  35  ######################################################################## 
  36  # Notes 
  37  ######################################################################## 
  38   
  39  """ 
  40  Synchonizes a local directory with an Amazon S3 bucket. 
  41   
  42  No configuration is required; all necessary information is taken from the 
  43  command-line.  The only thing configuration would help with is the path 
  44  resolver interface, and it doesn't seem worth it to require configuration just 
  45  to get that. 
  46   
  47  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  48  """ 
  49   
  50  ######################################################################## 
  51  # Imported modules and constants 
  52  ######################################################################## 
  53   
  54  # System modules 
  55  import sys 
  56  import os 
  57  import logging 
  58  import getopt 
  59  import json 
  60  import chardet 
  61  import warnings 
  62   
  63  # Cedar Backup modules  
  64  from CedarBackup2.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT 
  65  from CedarBackup2.filesystem import FilesystemList 
  66  from CedarBackup2.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE 
  67  from CedarBackup2.util import Diagnostics, splitCommandLine, encodePath 
  68  from CedarBackup2.util import executeCommand 
  69   
  70   
  71  ######################################################################## 
  72  # Module-wide constants and variables 
  73  ######################################################################## 
  74   
  75  logger = logging.getLogger("CedarBackup2.log.tools.amazons3") 
  76   
  77  AWS_COMMAND   = [ "aws" ] 
  78   
  79  SHORT_SWITCHES     = "hVbql:o:m:OdsDvw" 
  80  LONG_SWITCHES      = [ 'help', 'version', 'verbose', 'quiet',  
  81                         'logfile=', 'owner=', 'mode=',  
  82                         'output', 'debug', 'stack', 'diagnostics',  
  83                         'verifyOnly', 'ignoreWarnings', ] 
  84   
  85   
  86  ####################################################################### 
  87  # Options class 
  88  ####################################################################### 
  89   
90 -class Options(object):
91 92 ###################### 93 # Class documentation 94 ###################### 95 96 """ 97 Class representing command-line options for the cback-amazons3-sync script. 98 99 The C{Options} class is a Python object representation of the command-line 100 options of the cback script. 101 102 The object representation is two-way: a command line string or a list of 103 command line arguments can be used to create an C{Options} object, and then 104 changes to the object can be propogated back to a list of command-line 105 arguments or to a command-line string. An C{Options} object can even be 106 created from scratch programmatically (if you have a need for that). 107 108 There are two main levels of validation in the C{Options} class. The first 109 is field-level validation. Field-level validation comes into play when a 110 given field in an object is assigned to or updated. We use Python's 111 C{property} functionality to enforce specific validations on field values, 112 and in some places we even use customized list classes to enforce 113 validations on list members. You should expect to catch a C{ValueError} 114 exception when making assignments to fields if you are programmatically 115 filling an object. 116 117 The second level of validation is post-completion validation. Certain 118 validations don't make sense until an object representation of options is 119 fully "complete". We don't want these validations to apply all of the time, 120 because it would make building up a valid object from scratch a real pain. 121 For instance, we might have to do things in the right order to keep from 122 throwing exceptions, etc. 123 124 All of these post-completion validations are encapsulated in the 125 L{Options.validate} method. This method can be called at any time by a 126 client, and will always be called immediately after creating a C{Options} 127 object from a command line and before exporting a C{Options} object back to 128 a command line. This way, we get acceptable ease-of-use but we also don't 129 accept or emit invalid command lines. 130 131 @note: Lists within this class are "unordered" for equality comparisons. 132 133 @sort: __init__, __repr__, __str__, __cmp__ 134 """ 135 136 ############## 137 # Constructor 138 ############## 139
140 - def __init__(self, argumentList=None, argumentString=None, validate=True):
141 """ 142 Initializes an options object. 143 144 If you initialize the object without passing either C{argumentList} or 145 C{argumentString}, the object will be empty and will be invalid until it 146 is filled in properly. 147 148 No reference to the original arguments is saved off by this class. Once 149 the data has been parsed (successfully or not) this original information 150 is discarded. 151 152 The argument list is assumed to be a list of arguments, not including the 153 name of the command, something like C{sys.argv[1:]}. If you pass 154 C{sys.argv} instead, things are not going to work. 155 156 The argument string will be parsed into an argument list by the 157 L{util.splitCommandLine} function (see the documentation for that 158 function for some important notes about its limitations). There is an 159 assumption that the resulting list will be equivalent to C{sys.argv[1:]}, 160 just like C{argumentList}. 161 162 Unless the C{validate} argument is C{False}, the L{Options.validate} 163 method will be called (with its default arguments) after successfully 164 parsing any passed-in command line. This validation ensures that 165 appropriate actions, etc. have been specified. Keep in mind that even if 166 C{validate} is C{False}, it might not be possible to parse the passed-in 167 command line, so an exception might still be raised. 168 169 @note: The command line format is specified by the L{_usage} function. 170 Call L{_usage} to see a usage statement for the cback script. 171 172 @note: It is strongly suggested that the C{validate} option always be set 173 to C{True} (the default) unless there is a specific need to read in 174 invalid command line arguments. 175 176 @param argumentList: Command line for a program. 177 @type argumentList: List of arguments, i.e. C{sys.argv} 178 179 @param argumentString: Command line for a program. 180 @type argumentString: String, i.e. "cback --verbose stage store" 181 182 @param validate: Validate the command line after parsing it. 183 @type validate: Boolean true/false. 184 185 @raise getopt.GetoptError: If the command-line arguments could not be parsed. 186 @raise ValueError: If the command-line arguments are invalid. 187 """ 188 self._help = False 189 self._version = False 190 self._verbose = False 191 self._quiet = False 192 self._logfile = None 193 self._owner = None 194 self._mode = None 195 self._output = False 196 self._debug = False 197 self._stacktrace = False 198 self._diagnostics = False 199 self._verifyOnly = False 200 self._ignoreWarnings = False 201 self._sourceDir = None 202 self._s3BucketUrl = None 203 if argumentList is not None and argumentString is not None: 204 raise ValueError("Use either argumentList or argumentString, but not both.") 205 if argumentString is not None: 206 argumentList = splitCommandLine(argumentString) 207 if argumentList is not None: 208 self._parseArgumentList(argumentList) 209 if validate: 210 self.validate()
211 212 213 ######################### 214 # String representations 215 ######################### 216
217 - def __repr__(self):
218 """ 219 Official string representation for class instance. 220 """ 221 return self.buildArgumentString(validate=False)
222
223 - def __str__(self):
224 """ 225 Informal string representation for class instance. 226 """ 227 return self.__repr__()
228 229 230 ############################# 231 # Standard comparison method 232 ############################# 233
234 - def __cmp__(self, other):
235 """ 236 Definition of equals operator for this class. 237 Lists within this class are "unordered" for equality comparisons. 238 @param other: Other object to compare to. 239 @return: -1/0/1 depending on whether self is C{<}, C{=} or C{>} other. 240 """ 241 if other is None: 242 return 1 243 if self.help != other.help: 244 if self.help < other.help: 245 return -1 246 else: 247 return 1 248 if self.version != other.version: 249 if self.version < other.version: 250 return -1 251 else: 252 return 1 253 if self.verbose != other.verbose: 254 if self.verbose < other.verbose: 255 return -1 256 else: 257 return 1 258 if self.quiet != other.quiet: 259 if self.quiet < other.quiet: 260 return -1 261 else: 262 return 1 263 if self.logfile != other.logfile: 264 if self.logfile < other.logfile: 265 return -1 266 else: 267 return 1 268 if self.owner != other.owner: 269 if self.owner < other.owner: 270 return -1 271 else: 272 return 1 273 if self.mode != other.mode: 274 if self.mode < other.mode: 275 return -1 276 else: 277 return 1 278 if self.output != other.output: 279 if self.output < other.output: 280 return -1 281 else: 282 return 1 283 if self.debug != other.debug: 284 if self.debug < other.debug: 285 return -1 286 else: 287 return 1 288 if self.stacktrace != other.stacktrace: 289 if self.stacktrace < other.stacktrace: 290 return -1 291 else: 292 return 1 293 if self.diagnostics != other.diagnostics: 294 if self.diagnostics < other.diagnostics: 295 return -1 296 else: 297 return 1 298 if self.verifyOnly != other.verifyOnly: 299 if self.verifyOnly < other.verifyOnly: 300 return -1 301 else: 302 return 1 303 if self.ignoreWarnings != other.ignoreWarnings: 304 if self.ignoreWarnings < other.ignoreWarnings: 305 return -1 306 else: 307 return 1 308 if self.sourceDir != other.sourceDir: 309 if self.sourceDir < other.sourceDir: 310 return -1 311 else: 312 return 1 313 if self.s3BucketUrl != other.s3BucketUrl: 314 if self.s3BucketUrl < other.s3BucketUrl: 315 return -1 316 else: 317 return 1 318 return 0
319 320 321 ############# 322 # Properties 323 ############# 324
325 - def _setHelp(self, value):
326 """ 327 Property target used to set the help flag. 328 No validations, but we normalize the value to C{True} or C{False}. 329 """ 330 if value: 331 self._help = True 332 else: 333 self._help = False
334
335 - def _getHelp(self):
336 """ 337 Property target used to get the help flag. 338 """ 339 return self._help
340
341 - def _setVersion(self, value):
342 """ 343 Property target used to set the version flag. 344 No validations, but we normalize the value to C{True} or C{False}. 345 """ 346 if value: 347 self._version = True 348 else: 349 self._version = False
350
351 - def _getVersion(self):
352 """ 353 Property target used to get the version flag. 354 """ 355 return self._version
356
357 - def _setVerbose(self, value):
358 """ 359 Property target used to set the verbose flag. 360 No validations, but we normalize the value to C{True} or C{False}. 361 """ 362 if value: 363 self._verbose = True 364 else: 365 self._verbose = False
366
367 - def _getVerbose(self):
368 """ 369 Property target used to get the verbose flag. 370 """ 371 return self._verbose
372
373 - def _setQuiet(self, value):
374 """ 375 Property target used to set the quiet flag. 376 No validations, but we normalize the value to C{True} or C{False}. 377 """ 378 if value: 379 self._quiet = True 380 else: 381 self._quiet = False
382
383 - def _getQuiet(self):
384 """ 385 Property target used to get the quiet flag. 386 """ 387 return self._quiet
388
389 - def _setLogfile(self, value):
390 """ 391 Property target used to set the logfile parameter. 392 @raise ValueError: If the value cannot be encoded properly. 393 """ 394 if value is not None: 395 if len(value) < 1: 396 raise ValueError("The logfile parameter must be a non-empty string.") 397 self._logfile = encodePath(value)
398
399 - def _getLogfile(self):
400 """ 401 Property target used to get the logfile parameter. 402 """ 403 return self._logfile
404
405 - def _setOwner(self, value):
406 """ 407 Property target used to set the owner parameter. 408 If not C{None}, the owner must be a C{(user,group)} tuple or list. 409 Strings (and inherited children of strings) are explicitly disallowed. 410 The value will be normalized to a tuple. 411 @raise ValueError: If the value is not valid. 412 """ 413 if value is None: 414 self._owner = None 415 else: 416 if isinstance(value, str): 417 raise ValueError("Must specify user and group tuple for owner parameter.") 418 if len(value) != 2: 419 raise ValueError("Must specify user and group tuple for owner parameter.") 420 if len(value[0]) < 1 or len(value[1]) < 1: 421 raise ValueError("User and group tuple values must be non-empty strings.") 422 self._owner = (value[0], value[1])
423
424 - def _getOwner(self):
425 """ 426 Property target used to get the owner parameter. 427 The parameter is a tuple of C{(user, group)}. 428 """ 429 return self._owner
430
431 - def _setMode(self, value):
432 """ 433 Property target used to set the mode parameter. 434 """ 435 if value is None: 436 self._mode = None 437 else: 438 try: 439 if isinstance(value, str): 440 value = int(value, 8) 441 else: 442 value = int(value) 443 except TypeError: 444 raise ValueError("Mode must be an octal integer >= 0, i.e. 644.") 445 if value < 0: 446 raise ValueError("Mode must be an octal integer >= 0. i.e. 644.") 447 self._mode = value
448
449 - def _getMode(self):
450 """ 451 Property target used to get the mode parameter. 452 """ 453 return self._mode
454
455 - def _setOutput(self, value):
456 """ 457 Property target used to set the output flag. 458 No validations, but we normalize the value to C{True} or C{False}. 459 """ 460 if value: 461 self._output = True 462 else: 463 self._output = False
464
465 - def _getOutput(self):
466 """ 467 Property target used to get the output flag. 468 """ 469 return self._output
470
471 - def _setDebug(self, value):
472 """ 473 Property target used to set the debug flag. 474 No validations, but we normalize the value to C{True} or C{False}. 475 """ 476 if value: 477 self._debug = True 478 else: 479 self._debug = False
480
481 - def _getDebug(self):
482 """ 483 Property target used to get the debug flag. 484 """ 485 return self._debug
486
487 - def _setStacktrace(self, value):
488 """ 489 Property target used to set the stacktrace flag. 490 No validations, but we normalize the value to C{True} or C{False}. 491 """ 492 if value: 493 self._stacktrace = True 494 else: 495 self._stacktrace = False
496
497 - def _getStacktrace(self):
498 """ 499 Property target used to get the stacktrace flag. 500 """ 501 return self._stacktrace
502
503 - def _setDiagnostics(self, value):
504 """ 505 Property target used to set the diagnostics flag. 506 No validations, but we normalize the value to C{True} or C{False}. 507 """ 508 if value: 509 self._diagnostics = True 510 else: 511 self._diagnostics = False
512
513 - def _getDiagnostics(self):
514 """ 515 Property target used to get the diagnostics flag. 516 """ 517 return self._diagnostics
518
519 - def _setVerifyOnly(self, value):
520 """ 521 Property target used to set the verifyOnly flag. 522 No validations, but we normalize the value to C{True} or C{False}. 523 """ 524 if value: 525 self._verifyOnly = True 526 else: 527 self._verifyOnly = False
528
529 - def _getVerifyOnly(self):
530 """ 531 Property target used to get the verifyOnly flag. 532 """ 533 return self._verifyOnly
534
535 - def _setIgnoreWarnings(self, value):
536 """ 537 Property target used to set the ignoreWarnings flag. 538 No validations, but we normalize the value to C{True} or C{False}. 539 """ 540 if value: 541 self._ignoreWarnings = True 542 else: 543 self._ignoreWarnings = False
544
545 - def _getIgnoreWarnings(self):
546 """ 547 Property target used to get the ignoreWarnings flag. 548 """ 549 return self._ignoreWarnings
550
551 - def _setSourceDir(self, value):
552 """ 553 Property target used to set the sourceDir parameter. 554 """ 555 if value is not None: 556 if len(value) < 1: 557 raise ValueError("The sourceDir parameter must be a non-empty string.") 558 self._sourceDir = value
559
560 - def _getSourceDir(self):
561 """ 562 Property target used to get the sourceDir parameter. 563 """ 564 return self._sourceDir
565
566 - def _setS3BucketUrl(self, value):
567 """ 568 Property target used to set the s3BucketUrl parameter. 569 """ 570 if value is not None: 571 if len(value) < 1: 572 raise ValueError("The s3BucketUrl parameter must be a non-empty string.") 573 self._s3BucketUrl = value
574
575 - def _getS3BucketUrl(self):
576 """ 577 Property target used to get the s3BucketUrl parameter. 578 """ 579 return self._s3BucketUrl
580 581 help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.") 582 version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.") 583 verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.") 584 quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.") 585 logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.") 586 owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.") 587 mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.") 588 output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.") 589 debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.") 590 stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.") 591 diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.") 592 verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.") 593 ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.") 594 sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.") 595 s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.") 596 597 598 ################## 599 # Utility methods 600 ################## 601
602 - def validate(self):
603 """ 604 Validates command-line options represented by the object. 605 606 Unless C{--help} or C{--version} are supplied, at least one action must 607 be specified. Other validations (as for allowed values for particular 608 options) will be taken care of at assignment time by the properties 609 functionality. 610 611 @note: The command line format is specified by the L{_usage} function. 612 Call L{_usage} to see a usage statement for the cback script. 613 614 @raise ValueError: If one of the validations fails. 615 """ 616 if not self.help and not self.version and not self.diagnostics: 617 if self.sourceDir is None or self.s3BucketUrl is None: 618 raise ValueError("Source directory and S3 bucket URL are both required.")
619
620 - def buildArgumentList(self, validate=True):
621 """ 622 Extracts options into a list of command line arguments. 623 624 The original order of the various arguments (if, indeed, the object was 625 initialized with a command-line) is not preserved in this generated 626 argument list. Besides that, the argument list is normalized to use the 627 long option names (i.e. --version rather than -V). The resulting list 628 will be suitable for passing back to the constructor in the 629 C{argumentList} parameter. Unlike L{buildArgumentString}, string 630 arguments are not quoted here, because there is no need for it. 631 632 Unless the C{validate} parameter is C{False}, the L{Options.validate} 633 method will be called (with its default arguments) against the 634 options before extracting the command line. If the options are not valid, 635 then an argument list will not be extracted. 636 637 @note: It is strongly suggested that the C{validate} option always be set 638 to C{True} (the default) unless there is a specific need to extract an 639 invalid command line. 640 641 @param validate: Validate the options before extracting the command line. 642 @type validate: Boolean true/false. 643 644 @return: List representation of command-line arguments. 645 @raise ValueError: If options within the object are invalid. 646 """ 647 if validate: 648 self.validate() 649 argumentList = [] 650 if self._help: 651 argumentList.append("--help") 652 if self.version: 653 argumentList.append("--version") 654 if self.verbose: 655 argumentList.append("--verbose") 656 if self.quiet: 657 argumentList.append("--quiet") 658 if self.logfile is not None: 659 argumentList.append("--logfile") 660 argumentList.append(self.logfile) 661 if self.owner is not None: 662 argumentList.append("--owner") 663 argumentList.append("%s:%s" % (self.owner[0], self.owner[1])) 664 if self.mode is not None: 665 argumentList.append("--mode") 666 argumentList.append("%o" % self.mode) 667 if self.output: 668 argumentList.append("--output") 669 if self.debug: 670 argumentList.append("--debug") 671 if self.stacktrace: 672 argumentList.append("--stack") 673 if self.diagnostics: 674 argumentList.append("--diagnostics") 675 if self.verifyOnly: 676 argumentList.append("--verifyOnly") 677 if self.ignoreWarnings: 678 argumentList.append("--ignoreWarnings") 679 if self.sourceDir is not None: 680 argumentList.append(self.sourceDir) 681 if self.s3BucketUrl is not None: 682 argumentList.append(self.s3BucketUrl) 683 return argumentList
684
685 - def buildArgumentString(self, validate=True):
686 """ 687 Extracts options into a string of command-line arguments. 688 689 The original order of the various arguments (if, indeed, the object was 690 initialized with a command-line) is not preserved in this generated 691 argument string. Besides that, the argument string is normalized to use 692 the long option names (i.e. --version rather than -V) and to quote all 693 string arguments with double quotes (C{"}). The resulting string will be 694 suitable for passing back to the constructor in the C{argumentString} 695 parameter. 696 697 Unless the C{validate} parameter is C{False}, the L{Options.validate} 698 method will be called (with its default arguments) against the options 699 before extracting the command line. If the options are not valid, then 700 an argument string will not be extracted. 701 702 @note: It is strongly suggested that the C{validate} option always be set 703 to C{True} (the default) unless there is a specific need to extract an 704 invalid command line. 705 706 @param validate: Validate the options before extracting the command line. 707 @type validate: Boolean true/false. 708 709 @return: String representation of command-line arguments. 710 @raise ValueError: If options within the object are invalid. 711 """ 712 if validate: 713 self.validate() 714 argumentString = "" 715 if self._help: 716 argumentString += "--help " 717 if self.version: 718 argumentString += "--version " 719 if self.verbose: 720 argumentString += "--verbose " 721 if self.quiet: 722 argumentString += "--quiet " 723 if self.logfile is not None: 724 argumentString += "--logfile \"%s\" " % self.logfile 725 if self.owner is not None: 726 argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1]) 727 if self.mode is not None: 728 argumentString += "--mode %o " % self.mode 729 if self.output: 730 argumentString += "--output " 731 if self.debug: 732 argumentString += "--debug " 733 if self.stacktrace: 734 argumentString += "--stack " 735 if self.diagnostics: 736 argumentString += "--diagnostics " 737 if self.verifyOnly: 738 argumentString += "--verifyOnly " 739 if self.ignoreWarnings: 740 argumentString += "--ignoreWarnings " 741 if self.sourceDir is not None: 742 argumentString += "\"%s\" " % self.sourceDir 743 if self.s3BucketUrl is not None: 744 argumentString += "\"%s\" " % self.s3BucketUrl 745 return argumentString
746
747 - def _parseArgumentList(self, argumentList):
748 """ 749 Internal method to parse a list of command-line arguments. 750 751 Most of the validation we do here has to do with whether the arguments 752 can be parsed and whether any values which exist are valid. We don't do 753 any validation as to whether required elements exist or whether elements 754 exist in the proper combination (instead, that's the job of the 755 L{validate} method). 756 757 For any of the options which supply parameters, if the option is 758 duplicated with long and short switches (i.e. C{-l} and a C{--logfile}) 759 then the long switch is used. If the same option is duplicated with the 760 same switch (long or short), then the last entry on the command line is 761 used. 762 763 @param argumentList: List of arguments to a command. 764 @type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]} 765 766 @raise ValueError: If the argument list cannot be successfully parsed. 767 """ 768 switches = { } 769 opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES) 770 for o, a in opts: # push the switches into a hash 771 switches[o] = a 772 if switches.has_key("-h") or switches.has_key("--help"): 773 self.help = True 774 if switches.has_key("-V") or switches.has_key("--version"): 775 self.version = True 776 if switches.has_key("-b") or switches.has_key("--verbose"): 777 self.verbose = True 778 if switches.has_key("-q") or switches.has_key("--quiet"): 779 self.quiet = True 780 if switches.has_key("-l"): 781 self.logfile = switches["-l"] 782 if switches.has_key("--logfile"): 783 self.logfile = switches["--logfile"] 784 if switches.has_key("-o"): 785 self.owner = switches["-o"].split(":", 1) 786 if switches.has_key("--owner"): 787 self.owner = switches["--owner"].split(":", 1) 788 if switches.has_key("-m"): 789 self.mode = switches["-m"] 790 if switches.has_key("--mode"): 791 self.mode = switches["--mode"] 792 if switches.has_key("-O") or switches.has_key("--output"): 793 self.output = True 794 if switches.has_key("-d") or switches.has_key("--debug"): 795 self.debug = True 796 if switches.has_key("-s") or switches.has_key("--stack"): 797 self.stacktrace = True 798 if switches.has_key("-D") or switches.has_key("--diagnostics"): 799 self.diagnostics = True 800 if switches.has_key("-v") or switches.has_key("--verifyOnly"): 801 self.verifyOnly = True 802 if switches.has_key("-w") or switches.has_key("--ignoreWarnings"): 803 self.ignoreWarnings = True 804 try: 805 (self.sourceDir, self.s3BucketUrl) = remaining 806 except ValueError: 807 pass
808 809 810 ####################################################################### 811 # Public functions 812 ####################################################################### 813 814 ################# 815 # cli() function 816 ################# 817
818 -def cli():
819 """ 820 Implements the command-line interface for the C{cback-amazons3-sync} script. 821 822 Essentially, this is the "main routine" for the cback-amazons3-sync script. It does 823 all of the argument processing for the script, and then also implements the 824 tool functionality. 825 826 This function looks pretty similiar to C{CedarBackup2.cli.cli()}. It's not 827 easy to refactor this code to make it reusable and also readable, so I've 828 decided to just live with the duplication. 829 830 A different error code is returned for each type of failure: 831 832 - C{1}: The Python interpreter version is < 2.5 833 - C{2}: Error processing command-line arguments 834 - C{3}: Error configuring logging 835 - C{5}: Backup was interrupted with a CTRL-C or similar 836 - C{6}: Error executing other parts of the script 837 838 @note: This script uses print rather than logging to the INFO level, because 839 it is interactive. Underlying Cedar Backup functionality uses the logging 840 mechanism exclusively. 841 842 @return: Error code as described above. 843 """ 844 try: 845 if map(int, [sys.version_info[0], sys.version_info[1]]) < [2, 5]: 846 sys.stderr.write("Python version 2.5 or greater required.\n") 847 return 1 848 except: 849 # sys.version_info isn't available before 2.0 850 sys.stderr.write("Python version 2.5 or greater required.\n") 851 return 1 852 853 try: 854 options = Options(argumentList=sys.argv[1:]) 855 except Exception, e: 856 _usage() 857 sys.stderr.write(" *** Error: %s\n" % e) 858 return 2 859 860 if options.help: 861 _usage() 862 return 0 863 if options.version: 864 _version() 865 return 0 866 if options.diagnostics: 867 _diagnostics() 868 return 0 869 870 try: 871 logfile = setupLogging(options) 872 except Exception, e: 873 sys.stderr.write("Error setting up logging: %s\n" % e) 874 return 3 875 876 logger.info("Cedar Backup Amazon S3 sync run started.") 877 logger.info("Options were [%s]" % options) 878 logger.info("Logfile is [%s]" % logfile) 879 Diagnostics().logDiagnostics(method=logger.info) 880 881 if options.stacktrace: 882 _executeAction(options) 883 else: 884 try: 885 _executeAction(options) 886 except KeyboardInterrupt: 887 logger.error("Backup interrupted.") 888 logger.info("Cedar Backup Amazon S3 sync run completed with status 5.") 889 return 5 890 except Exception, e: 891 logger.error("Error executing backup: %s" % e) 892 logger.info("Cedar Backup Amazon S3 sync run completed with status 6.") 893 return 6 894 895 logger.info("Cedar Backup Amazon S3 sync run completed with status 0.") 896 return 0
897 898 899 ####################################################################### 900 # Utility functions 901 ####################################################################### 902 903 #################### 904 # _usage() function 905 #################### 906
907 -def _usage(fd=sys.stderr):
908 """ 909 Prints usage information for the cback-amazons3-sync script. 910 @param fd: File descriptor used to print information. 911 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 912 """ 913 fd.write("\n") 914 fd.write(" Usage: cback-amazons3-sync [switches] sourceDir s3bucketUrl\n") 915 fd.write("\n") 916 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 917 fd.write("\n") 918 fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n") 919 fd.write(" bucket. After the sync is complete, a validation step is taken. An\n") 920 fd.write(" error is reported if the contents of the bucket do not match the\n") 921 fd.write(" source directory, or if the indicated size for any file differs.\n") 922 fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n") 923 fd.write("\n") 924 fd.write(" The following arguments are required:\n") 925 fd.write("\n") 926 fd.write(" sourceDir The local source directory on disk (must exist)\n") 927 fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n") 928 fd.write("\n") 929 fd.write(" The following switches are accepted:\n") 930 fd.write("\n") 931 fd.write(" -h, --help Display this usage/help listing\n") 932 fd.write(" -V, --version Display version information\n") 933 fd.write(" -b, --verbose Print verbose output as well as logging to disk\n") 934 fd.write(" -q, --quiet Run quietly (display no output to the screen)\n") 935 fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE) 936 fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1])) 937 fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE) 938 fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n") 939 fd.write(" -d, --debug Write debugging information to the log (implies --output)\n") 940 fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n") # exactly 80 characters in width! 941 fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n") 942 fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n") 943 fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n") 944 fd.write("\n") 945 fd.write(" Typical usage would be something like:\n") 946 fd.write("\n") 947 fd.write(" cback-amazons3-sync /home/myuser s3://example.com-backup/myuser\n") 948 fd.write("\n") 949 fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n") 950 fd.write("\n")
951 952 953 ###################### 954 # _version() function 955 ###################### 956
957 -def _version(fd=sys.stdout):
958 """ 959 Prints version information for the cback script. 960 @param fd: File descriptor used to print information. 961 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 962 """ 963 fd.write("\n") 964 fd.write(" Cedar Backup Amazon S3 sync tool.\n") 965 fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE)) 966 fd.write("\n") 967 fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL)) 968 fd.write(" See CREDITS for a list of included code and other contributors.\n") 969 fd.write(" This is free software; there is NO warranty. See the\n") 970 fd.write(" GNU General Public License version 2 for copying conditions.\n") 971 fd.write("\n") 972 fd.write(" Use the --help option for usage information.\n") 973 fd.write("\n")
974 975 976 ########################## 977 # _diagnostics() function 978 ########################## 979
980 -def _diagnostics(fd=sys.stdout):
981 """ 982 Prints runtime diagnostics information. 983 @param fd: File descriptor used to print information. 984 @note: The C{fd} is used rather than C{print} to facilitate unit testing. 985 """ 986 fd.write("\n") 987 fd.write("Diagnostics:\n") 988 fd.write("\n") 989 Diagnostics().printDiagnostics(fd=fd, prefix=" ") 990 fd.write("\n")
991 992 993 ############################ 994 # _executeAction() function 995 ############################ 996
997 -def _executeAction(options):
998 """ 999 Implements the guts of the cback-amazons3-sync tool. 1000 1001 @param options: Program command-line options. 1002 @type options: Options object. 1003 1004 @raise Exception: Under many generic error conditions 1005 """ 1006 sourceFiles = _buildSourceFiles(options.sourceDir) 1007 if not options.ignoreWarnings: 1008 _checkSourceFiles(options.sourceDir, sourceFiles) 1009 if not options.verifyOnly: 1010 _synchronizeBucket(options.sourceDir, options.s3BucketUrl) 1011 _verifyBucketContents(options.sourceDir, sourceFiles, options.s3BucketUrl)
1012 1013 1014 ################################ 1015 # _buildSourceFiles() function 1016 ################################ 1017
1018 -def _buildSourceFiles(sourceDir):
1019 """ 1020 Build a list of files in a source directory 1021 @param sourceDir: Local source directory 1022 @return: FilesystemList with contents of source directory 1023 """ 1024 if not os.path.isdir(sourceDir): 1025 raise ValueError("Source directory does not exist on disk.") 1026 sourceFiles = FilesystemList() 1027 sourceFiles.addDirContents(sourceDir) 1028 return sourceFiles
1029 1030 1031 ############################### 1032 # _checkSourceFiles() function 1033 ############################### 1034
1035 -def _checkSourceFiles(sourceDir, sourceFiles):
1036 """ 1037 Check source files, trying to guess which ones will have encoding problems. 1038 @param sourceDir: Local source directory 1039 @param sourceDir: Local source directory 1040 @raises ValueError: If a problem file is found 1041 @see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/} 1042 @see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux} 1043 @see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/} 1044 """ 1045 with warnings.catch_warnings(): 1046 warnings.simplefilter("ignore") # So we don't print unicode warnings from comparisons 1047 1048 encoding = Diagnostics().encoding 1049 1050 failed = False 1051 for entry in sourceFiles: 1052 result = chardet.detect(entry) 1053 source = entry.decode(result["encoding"]) 1054 try: 1055 target = source.encode(encoding) 1056 if source != target: 1057 logger.error("Inconsistent encoding for [%s]: got %s, but need %s" % (entry, result["encoding"], encoding)) 1058 failed = True 1059 except UnicodeEncodeError: 1060 logger.error("Inconsistent encoding for [%s]: got %s, but need %s" % (entry, result["encoding"], encoding)) 1061 failed = True 1062 1063 if not failed: 1064 logger.info("Completed checking source filename encoding (no problems found).") 1065 else: 1066 logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.") 1067 logger.error("You may be able to fix this by setting a more sensible locale in your environment.") 1068 logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.") 1069 logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings") 1070 raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
1071 1072 1073 ################################ 1074 # _synchronizeBucket() function 1075 ################################ 1076
1077 -def _synchronizeBucket(sourceDir, s3BucketUrl):
1078 """ 1079 Synchronize a local directory to an Amazon S3 bucket. 1080 @param sourceDir: Local source directory 1081 @param s3BucketUrl: Target S3 bucket URL 1082 """ 1083 logger.info("Synchronizing local source directory up to Amazon S3.") 1084 args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ] 1085 result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0] 1086 if result != 0: 1087 raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
1088 1089 1090 ################################### 1091 # _verifyBucketContents() function 1092 ################################### 1093
1094 -def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
1095 """ 1096 Verify that a source directory is equivalent to an Amazon S3 bucket. 1097 @param sourceDir: Local source directory 1098 @param sourceFiles: Filesystem list containing contents of source directory 1099 @param s3BucketUrl: Target S3 bucket URL 1100 """ 1101 # As of this writing, the documentation for the S3 API that we're using 1102 # below says that up to 1000 elements at a time are returned, and that we 1103 # have to manually handle pagination by looking for the IsTruncated element. 1104 # However, in practice, this is not true. I have been testing with 1105 # "aws-cli/1.4.4 Python/2.7.3 Linux/3.2.0-4-686-pae", installed through PIP. 1106 # No matter how many items exist in my bucket and prefix, I get back a 1107 # single JSON result. I've tested with buckets containing nearly 6000 1108 # elements. 1109 # 1110 # If I turn on debugging, it's clear that underneath, something in the API 1111 # is executing multiple list-object requests against AWS, and stiching 1112 # results together to give me back the final JSON result. The debug output 1113 # clearly incldues multiple requests, and each XML response (except for the 1114 # final one) contains <IsTruncated>true</IsTruncated>. 1115 # 1116 # This feature is not mentioned in the offical changelog for any of the 1117 # releases going back to 1.0.0. It appears to happen in the botocore 1118 # library, but I'll admit I can't actually find the code that implements it. 1119 # For now, all I can do is rely on this behavior and hope that the 1120 # documentation is out-of-date. I'm not going to write code that tries to 1121 # parse out IsTruncated if I can't actually test that code. 1122 1123 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1) 1124 1125 query = "Contents[].{Key: Key, Size: Size}" 1126 args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ] 1127 (result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True) 1128 if result != 0: 1129 raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result) 1130 1131 contents = { } 1132 for entry in json.loads("".join(data)): 1133 key = entry["Key"].replace(prefix, "") 1134 size = long(entry["Size"]) 1135 contents[key] = size 1136 1137 failed = False 1138 for entry in sourceFiles: 1139 if os.path.isfile(entry): 1140 key = entry.replace(sourceDir, "") 1141 size = long(os.stat(entry).st_size) 1142 if not key in contents: 1143 logger.error("File was apparently not uploaded: [%s]" % entry) 1144 failed = True 1145 else: 1146 if size != contents[key]: 1147 logger.error("File size differs [%s]: expected %s bytes but got %s bytes" % (entry, size, contents[key])) 1148 failed = True 1149 1150 if not failed: 1151 logger.info("Completed verifying Amazon S3 bucket contents (no problems found).") 1152 else: 1153 logger.error("There were differences between source directory and target S3 bucket.") 1154 raise ValueError("There were differences between source directory and target S3 bucket.")
1155 1156 1157 ######################################################################### 1158 # Main routine 1159 ######################################################################## 1160 1161 if __name__ == "__main__": 1162 sys.exit(cli()) 1163