1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38 """
39 Synchonizes a local directory with an Amazon S3 bucket.
40
41 No configuration is required; all necessary information is taken from the
42 command-line. The only thing configuration would help with is the path
43 resolver interface, and it doesn't seem worth it to require configuration just
44 to get that.
45
46 @author: Kenneth J. Pronovici <pronovic@ieee.org>
47 """
48
49
50
51
52
53
54 import sys
55 import os
56 import logging
57 import getopt
58 import json
59 import warnings
60 from functools import total_ordering
61 from pathlib import Path
62 import chardet
63
64
65 from CedarBackup3.release import AUTHOR, EMAIL, VERSION, DATE, COPYRIGHT
66 from CedarBackup3.filesystem import FilesystemList
67 from CedarBackup3.cli import setupLogging, DEFAULT_LOGFILE, DEFAULT_OWNERSHIP, DEFAULT_MODE
68 from CedarBackup3.util import Diagnostics, splitCommandLine, encodePath
69 from CedarBackup3.util import executeCommand
70
71
72
73
74
75
76 logger = logging.getLogger("CedarBackup3.log.tools.amazons3")
77
78 AWS_COMMAND = [ "aws" ]
79
80 SHORT_SWITCHES = "hVbql:o:m:OdsDvw"
81 LONG_SWITCHES = [ 'help', 'version', 'verbose', 'quiet',
82 'logfile=', 'owner=', 'mode=',
83 'output', 'debug', 'stack', 'diagnostics',
84 'verifyOnly', 'ignoreWarnings', ]
85
86
87
88
89
90
91 @total_ordering
92 -class Options(object):
93
94
95
96
97
98 """
99 Class representing command-line options for the cback3-amazons3-sync script.
100
101 The C{Options} class is a Python object representation of the command-line
102 options of the cback3-amazons3-sync script.
103
104 The object representation is two-way: a command line string or a list of
105 command line arguments can be used to create an C{Options} object, and then
106 changes to the object can be propogated back to a list of command-line
107 arguments or to a command-line string. An C{Options} object can even be
108 created from scratch programmatically (if you have a need for that).
109
110 There are two main levels of validation in the C{Options} class. The first
111 is field-level validation. Field-level validation comes into play when a
112 given field in an object is assigned to or updated. We use Python's
113 C{property} functionality to enforce specific validations on field values,
114 and in some places we even use customized list classes to enforce
115 validations on list members. You should expect to catch a C{ValueError}
116 exception when making assignments to fields if you are programmatically
117 filling an object.
118
119 The second level of validation is post-completion validation. Certain
120 validations don't make sense until an object representation of options is
121 fully "complete". We don't want these validations to apply all of the time,
122 because it would make building up a valid object from scratch a real pain.
123 For instance, we might have to do things in the right order to keep from
124 throwing exceptions, etc.
125
126 All of these post-completion validations are encapsulated in the
127 L{Options.validate} method. This method can be called at any time by a
128 client, and will always be called immediately after creating a C{Options}
129 object from a command line and before exporting a C{Options} object back to
130 a command line. This way, we get acceptable ease-of-use but we also don't
131 accept or emit invalid command lines.
132
133 @note: Lists within this class are "unordered" for equality comparisons.
134
135 @sort: __init__, __repr__, __str__, __cmp__, __eq__, __lt__, __gt__
136 """
137
138
139
140
141
142 - def __init__(self, argumentList=None, argumentString=None, validate=True):
143 """
144 Initializes an options object.
145
146 If you initialize the object without passing either C{argumentList} or
147 C{argumentString}, the object will be empty and will be invalid until it
148 is filled in properly.
149
150 No reference to the original arguments is saved off by this class. Once
151 the data has been parsed (successfully or not) this original information
152 is discarded.
153
154 The argument list is assumed to be a list of arguments, not including the
155 name of the command, something like C{sys.argv[1:]}. If you pass
156 C{sys.argv} instead, things are not going to work.
157
158 The argument string will be parsed into an argument list by the
159 L{util.splitCommandLine} function (see the documentation for that
160 function for some important notes about its limitations). There is an
161 assumption that the resulting list will be equivalent to C{sys.argv[1:]},
162 just like C{argumentList}.
163
164 Unless the C{validate} argument is C{False}, the L{Options.validate}
165 method will be called (with its default arguments) after successfully
166 parsing any passed-in command line. This validation ensures that
167 appropriate actions, etc. have been specified. Keep in mind that even if
168 C{validate} is C{False}, it might not be possible to parse the passed-in
169 command line, so an exception might still be raised.
170
171 @note: The command line format is specified by the L{_usage} function.
172 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script.
173
174 @note: It is strongly suggested that the C{validate} option always be set
175 to C{True} (the default) unless there is a specific need to read in
176 invalid command line arguments.
177
178 @param argumentList: Command line for a program.
179 @type argumentList: List of arguments, i.e. C{sys.argv}
180
181 @param argumentString: Command line for a program.
182 @type argumentString: String, i.e. "cback3-amazons3-sync --verbose stage store"
183
184 @param validate: Validate the command line after parsing it.
185 @type validate: Boolean true/false.
186
187 @raise getopt.GetoptError: If the command-line arguments could not be parsed.
188 @raise ValueError: If the command-line arguments are invalid.
189 """
190 self._help = False
191 self._version = False
192 self._verbose = False
193 self._quiet = False
194 self._logfile = None
195 self._owner = None
196 self._mode = None
197 self._output = False
198 self._debug = False
199 self._stacktrace = False
200 self._diagnostics = False
201 self._verifyOnly = False
202 self._ignoreWarnings = False
203 self._sourceDir = None
204 self._s3BucketUrl = None
205 if argumentList is not None and argumentString is not None:
206 raise ValueError("Use either argumentList or argumentString, but not both.")
207 if argumentString is not None:
208 argumentList = splitCommandLine(argumentString)
209 if argumentList is not None:
210 self._parseArgumentList(argumentList)
211 if validate:
212 self.validate()
213
214
215
216
217
218
224
226 """
227 Informal string representation for class instance.
228 """
229 return self.__repr__()
230
231
232
233
234
235
237 """Equals operator, iplemented in terms of original Python 2 compare operator."""
238 return self.__cmp__(other) == 0
239
241 """Less-than operator, iplemented in terms of original Python 2 compare operator."""
242 return self.__cmp__(other) < 0
243
245 """Greater-than operator, iplemented in terms of original Python 2 compare operator."""
246 return self.__cmp__(other) > 0
247
333
334
335
336
337
338
340 """
341 Property target used to set the help flag.
342 No validations, but we normalize the value to C{True} or C{False}.
343 """
344 if value:
345 self._help = True
346 else:
347 self._help = False
348
350 """
351 Property target used to get the help flag.
352 """
353 return self._help
354
356 """
357 Property target used to set the version flag.
358 No validations, but we normalize the value to C{True} or C{False}.
359 """
360 if value:
361 self._version = True
362 else:
363 self._version = False
364
366 """
367 Property target used to get the version flag.
368 """
369 return self._version
370
372 """
373 Property target used to set the verbose flag.
374 No validations, but we normalize the value to C{True} or C{False}.
375 """
376 if value:
377 self._verbose = True
378 else:
379 self._verbose = False
380
382 """
383 Property target used to get the verbose flag.
384 """
385 return self._verbose
386
388 """
389 Property target used to set the quiet flag.
390 No validations, but we normalize the value to C{True} or C{False}.
391 """
392 if value:
393 self._quiet = True
394 else:
395 self._quiet = False
396
398 """
399 Property target used to get the quiet flag.
400 """
401 return self._quiet
402
404 """
405 Property target used to set the logfile parameter.
406 @raise ValueError: If the value cannot be encoded properly.
407 """
408 if value is not None:
409 if len(value) < 1:
410 raise ValueError("The logfile parameter must be a non-empty string.")
411 self._logfile = encodePath(value)
412
414 """
415 Property target used to get the logfile parameter.
416 """
417 return self._logfile
418
420 """
421 Property target used to set the owner parameter.
422 If not C{None}, the owner must be a C{(user,group)} tuple or list.
423 Strings (and inherited children of strings) are explicitly disallowed.
424 The value will be normalized to a tuple.
425 @raise ValueError: If the value is not valid.
426 """
427 if value is None:
428 self._owner = None
429 else:
430 if isinstance(value, str):
431 raise ValueError("Must specify user and group tuple for owner parameter.")
432 if len(value) != 2:
433 raise ValueError("Must specify user and group tuple for owner parameter.")
434 if len(value[0]) < 1 or len(value[1]) < 1:
435 raise ValueError("User and group tuple values must be non-empty strings.")
436 self._owner = (value[0], value[1])
437
439 """
440 Property target used to get the owner parameter.
441 The parameter is a tuple of C{(user, group)}.
442 """
443 return self._owner
444
446 """
447 Property target used to set the mode parameter.
448 """
449 if value is None:
450 self._mode = None
451 else:
452 try:
453 if isinstance(value, str):
454 value = int(value, 8)
455 else:
456 value = int(value)
457 except TypeError:
458 raise ValueError("Mode must be an octal integer >= 0, i.e. 644.")
459 if value < 0:
460 raise ValueError("Mode must be an octal integer >= 0. i.e. 644.")
461 self._mode = value
462
464 """
465 Property target used to get the mode parameter.
466 """
467 return self._mode
468
470 """
471 Property target used to set the output flag.
472 No validations, but we normalize the value to C{True} or C{False}.
473 """
474 if value:
475 self._output = True
476 else:
477 self._output = False
478
480 """
481 Property target used to get the output flag.
482 """
483 return self._output
484
486 """
487 Property target used to set the debug flag.
488 No validations, but we normalize the value to C{True} or C{False}.
489 """
490 if value:
491 self._debug = True
492 else:
493 self._debug = False
494
496 """
497 Property target used to get the debug flag.
498 """
499 return self._debug
500
502 """
503 Property target used to set the stacktrace flag.
504 No validations, but we normalize the value to C{True} or C{False}.
505 """
506 if value:
507 self._stacktrace = True
508 else:
509 self._stacktrace = False
510
512 """
513 Property target used to get the stacktrace flag.
514 """
515 return self._stacktrace
516
518 """
519 Property target used to set the diagnostics flag.
520 No validations, but we normalize the value to C{True} or C{False}.
521 """
522 if value:
523 self._diagnostics = True
524 else:
525 self._diagnostics = False
526
528 """
529 Property target used to get the diagnostics flag.
530 """
531 return self._diagnostics
532
534 """
535 Property target used to set the verifyOnly flag.
536 No validations, but we normalize the value to C{True} or C{False}.
537 """
538 if value:
539 self._verifyOnly = True
540 else:
541 self._verifyOnly = False
542
544 """
545 Property target used to get the verifyOnly flag.
546 """
547 return self._verifyOnly
548
550 """
551 Property target used to set the ignoreWarnings flag.
552 No validations, but we normalize the value to C{True} or C{False}.
553 """
554 if value:
555 self._ignoreWarnings = True
556 else:
557 self._ignoreWarnings = False
558
560 """
561 Property target used to get the ignoreWarnings flag.
562 """
563 return self._ignoreWarnings
564
566 """
567 Property target used to set the sourceDir parameter.
568 """
569 if value is not None:
570 if len(value) < 1:
571 raise ValueError("The sourceDir parameter must be a non-empty string.")
572 self._sourceDir = value
573
575 """
576 Property target used to get the sourceDir parameter.
577 """
578 return self._sourceDir
579
581 """
582 Property target used to set the s3BucketUrl parameter.
583 """
584 if value is not None:
585 if len(value) < 1:
586 raise ValueError("The s3BucketUrl parameter must be a non-empty string.")
587 self._s3BucketUrl = value
588
590 """
591 Property target used to get the s3BucketUrl parameter.
592 """
593 return self._s3BucketUrl
594
595 help = property(_getHelp, _setHelp, None, "Command-line help (C{-h,--help}) flag.")
596 version = property(_getVersion, _setVersion, None, "Command-line version (C{-V,--version}) flag.")
597 verbose = property(_getVerbose, _setVerbose, None, "Command-line verbose (C{-b,--verbose}) flag.")
598 quiet = property(_getQuiet, _setQuiet, None, "Command-line quiet (C{-q,--quiet}) flag.")
599 logfile = property(_getLogfile, _setLogfile, None, "Command-line logfile (C{-l,--logfile}) parameter.")
600 owner = property(_getOwner, _setOwner, None, "Command-line owner (C{-o,--owner}) parameter, as tuple C{(user,group)}.")
601 mode = property(_getMode, _setMode, None, "Command-line mode (C{-m,--mode}) parameter.")
602 output = property(_getOutput, _setOutput, None, "Command-line output (C{-O,--output}) flag.")
603 debug = property(_getDebug, _setDebug, None, "Command-line debug (C{-d,--debug}) flag.")
604 stacktrace = property(_getStacktrace, _setStacktrace, None, "Command-line stacktrace (C{-s,--stack}) flag.")
605 diagnostics = property(_getDiagnostics, _setDiagnostics, None, "Command-line diagnostics (C{-D,--diagnostics}) flag.")
606 verifyOnly = property(_getVerifyOnly, _setVerifyOnly, None, "Command-line verifyOnly (C{-v,--verifyOnly}) flag.")
607 ignoreWarnings = property(_getIgnoreWarnings, _setIgnoreWarnings, None, "Command-line ignoreWarnings (C{-w,--ignoreWarnings}) flag.")
608 sourceDir = property(_getSourceDir, _setSourceDir, None, "Command-line sourceDir, source of sync.")
609 s3BucketUrl = property(_getS3BucketUrl, _setS3BucketUrl, None, "Command-line s3BucketUrl, target of sync.")
610
611
612
613
614
615
617 """
618 Validates command-line options represented by the object.
619
620 Unless C{--help} or C{--version} are supplied, at least one action must
621 be specified. Other validations (as for allowed values for particular
622 options) will be taken care of at assignment time by the properties
623 functionality.
624
625 @note: The command line format is specified by the L{_usage} function.
626 Call L{_usage} to see a usage statement for the cback3-amazons3-sync script.
627
628 @raise ValueError: If one of the validations fails.
629 """
630 if not self.help and not self.version and not self.diagnostics:
631 if self.sourceDir is None or self.s3BucketUrl is None:
632 raise ValueError("Source directory and S3 bucket URL are both required.")
633
635 """
636 Extracts options into a list of command line arguments.
637
638 The original order of the various arguments (if, indeed, the object was
639 initialized with a command-line) is not preserved in this generated
640 argument list. Besides that, the argument list is normalized to use the
641 long option names (i.e. --version rather than -V). The resulting list
642 will be suitable for passing back to the constructor in the
643 C{argumentList} parameter. Unlike L{buildArgumentString}, string
644 arguments are not quoted here, because there is no need for it.
645
646 Unless the C{validate} parameter is C{False}, the L{Options.validate}
647 method will be called (with its default arguments) against the
648 options before extracting the command line. If the options are not valid,
649 then an argument list will not be extracted.
650
651 @note: It is strongly suggested that the C{validate} option always be set
652 to C{True} (the default) unless there is a specific need to extract an
653 invalid command line.
654
655 @param validate: Validate the options before extracting the command line.
656 @type validate: Boolean true/false.
657
658 @return: List representation of command-line arguments.
659 @raise ValueError: If options within the object are invalid.
660 """
661 if validate:
662 self.validate()
663 argumentList = []
664 if self._help:
665 argumentList.append("--help")
666 if self.version:
667 argumentList.append("--version")
668 if self.verbose:
669 argumentList.append("--verbose")
670 if self.quiet:
671 argumentList.append("--quiet")
672 if self.logfile is not None:
673 argumentList.append("--logfile")
674 argumentList.append(self.logfile)
675 if self.owner is not None:
676 argumentList.append("--owner")
677 argumentList.append("%s:%s" % (self.owner[0], self.owner[1]))
678 if self.mode is not None:
679 argumentList.append("--mode")
680 argumentList.append("%o" % self.mode)
681 if self.output:
682 argumentList.append("--output")
683 if self.debug:
684 argumentList.append("--debug")
685 if self.stacktrace:
686 argumentList.append("--stack")
687 if self.diagnostics:
688 argumentList.append("--diagnostics")
689 if self.verifyOnly:
690 argumentList.append("--verifyOnly")
691 if self.ignoreWarnings:
692 argumentList.append("--ignoreWarnings")
693 if self.sourceDir is not None:
694 argumentList.append(self.sourceDir)
695 if self.s3BucketUrl is not None:
696 argumentList.append(self.s3BucketUrl)
697 return argumentList
698
700 """
701 Extracts options into a string of command-line arguments.
702
703 The original order of the various arguments (if, indeed, the object was
704 initialized with a command-line) is not preserved in this generated
705 argument string. Besides that, the argument string is normalized to use
706 the long option names (i.e. --version rather than -V) and to quote all
707 string arguments with double quotes (C{"}). The resulting string will be
708 suitable for passing back to the constructor in the C{argumentString}
709 parameter.
710
711 Unless the C{validate} parameter is C{False}, the L{Options.validate}
712 method will be called (with its default arguments) against the options
713 before extracting the command line. If the options are not valid, then
714 an argument string will not be extracted.
715
716 @note: It is strongly suggested that the C{validate} option always be set
717 to C{True} (the default) unless there is a specific need to extract an
718 invalid command line.
719
720 @param validate: Validate the options before extracting the command line.
721 @type validate: Boolean true/false.
722
723 @return: String representation of command-line arguments.
724 @raise ValueError: If options within the object are invalid.
725 """
726 if validate:
727 self.validate()
728 argumentString = ""
729 if self._help:
730 argumentString += "--help "
731 if self.version:
732 argumentString += "--version "
733 if self.verbose:
734 argumentString += "--verbose "
735 if self.quiet:
736 argumentString += "--quiet "
737 if self.logfile is not None:
738 argumentString += "--logfile \"%s\" " % self.logfile
739 if self.owner is not None:
740 argumentString += "--owner \"%s:%s\" " % (self.owner[0], self.owner[1])
741 if self.mode is not None:
742 argumentString += "--mode %o " % self.mode
743 if self.output:
744 argumentString += "--output "
745 if self.debug:
746 argumentString += "--debug "
747 if self.stacktrace:
748 argumentString += "--stack "
749 if self.diagnostics:
750 argumentString += "--diagnostics "
751 if self.verifyOnly:
752 argumentString += "--verifyOnly "
753 if self.ignoreWarnings:
754 argumentString += "--ignoreWarnings "
755 if self.sourceDir is not None:
756 argumentString += "\"%s\" " % self.sourceDir
757 if self.s3BucketUrl is not None:
758 argumentString += "\"%s\" " % self.s3BucketUrl
759 return argumentString
760
762 """
763 Internal method to parse a list of command-line arguments.
764
765 Most of the validation we do here has to do with whether the arguments
766 can be parsed and whether any values which exist are valid. We don't do
767 any validation as to whether required elements exist or whether elements
768 exist in the proper combination (instead, that's the job of the
769 L{validate} method).
770
771 For any of the options which supply parameters, if the option is
772 duplicated with long and short switches (i.e. C{-l} and a C{--logfile})
773 then the long switch is used. If the same option is duplicated with the
774 same switch (long or short), then the last entry on the command line is
775 used.
776
777 @param argumentList: List of arguments to a command.
778 @type argumentList: List of arguments to a command, i.e. C{sys.argv[1:]}
779
780 @raise ValueError: If the argument list cannot be successfully parsed.
781 """
782 switches = { }
783 opts, remaining = getopt.getopt(argumentList, SHORT_SWITCHES, LONG_SWITCHES)
784 for o, a in opts:
785 switches[o] = a
786 if "-h" in switches or "--help" in switches:
787 self.help = True
788 if "-V" in switches or "--version" in switches:
789 self.version = True
790 if "-b" in switches or "--verbose" in switches:
791 self.verbose = True
792 if "-q" in switches or "--quiet" in switches:
793 self.quiet = True
794 if "-l" in switches:
795 self.logfile = switches["-l"]
796 if "--logfile" in switches:
797 self.logfile = switches["--logfile"]
798 if "-o" in switches:
799 self.owner = switches["-o"].split(":", 1)
800 if "--owner" in switches:
801 self.owner = switches["--owner"].split(":", 1)
802 if "-m" in switches:
803 self.mode = switches["-m"]
804 if "--mode" in switches:
805 self.mode = switches["--mode"]
806 if "-O" in switches or "--output" in switches:
807 self.output = True
808 if "-d" in switches or "--debug" in switches:
809 self.debug = True
810 if "-s" in switches or "--stack" in switches:
811 self.stacktrace = True
812 if "-D" in switches or "--diagnostics" in switches:
813 self.diagnostics = True
814 if "-v" in switches or "--verifyOnly" in switches:
815 self.verifyOnly = True
816 if "-w" in switches or "--ignoreWarnings" in switches:
817 self.ignoreWarnings = True
818 try:
819 (self.sourceDir, self.s3BucketUrl) = remaining
820 except ValueError:
821 pass
822
823
824
825
826
827
828
829
830
831
832 -def cli():
833 """
834 Implements the command-line interface for the C{cback3-amazons3-sync} script.
835
836 Essentially, this is the "main routine" for the cback3-amazons3-sync script. It does
837 all of the argument processing for the script, and then also implements the
838 tool functionality.
839
840 This function looks pretty similiar to C{CedarBackup3.cli.cli()}. It's not
841 easy to refactor this code to make it reusable and also readable, so I've
842 decided to just live with the duplication.
843
844 A different error code is returned for each type of failure:
845
846 - C{1}: The Python interpreter version is < 3.4
847 - C{2}: Error processing command-line arguments
848 - C{3}: Error configuring logging
849 - C{5}: Backup was interrupted with a CTRL-C or similar
850 - C{6}: Error executing other parts of the script
851
852 @note: This script uses print rather than logging to the INFO level, because
853 it is interactive. Underlying Cedar Backup functionality uses the logging
854 mechanism exclusively.
855
856 @return: Error code as described above.
857 """
858 try:
859 if list(map(int, [sys.version_info[0], sys.version_info[1]])) < [3, 4]:
860 sys.stderr.write("Python 3 version 3.4 or greater required.\n")
861 return 1
862 except:
863
864 sys.stderr.write("Python 3 version 3.4 or greater required.\n")
865 return 1
866
867 try:
868 options = Options(argumentList=sys.argv[1:])
869 except Exception as e:
870 _usage()
871 sys.stderr.write(" *** Error: %s\n" % e)
872 return 2
873
874 if options.help:
875 _usage()
876 return 0
877 if options.version:
878 _version()
879 return 0
880 if options.diagnostics:
881 _diagnostics()
882 return 0
883
884 if options.stacktrace:
885 logfile = setupLogging(options)
886 else:
887 try:
888 logfile = setupLogging(options)
889 except Exception as e:
890 sys.stderr.write("Error setting up logging: %s\n" % e)
891 return 3
892
893 logger.info("Cedar Backup Amazon S3 sync run started.")
894 logger.info("Options were [%s]", options)
895 logger.info("Logfile is [%s]", logfile)
896 Diagnostics().logDiagnostics(method=logger.info)
897
898 if options.stacktrace:
899 _executeAction(options)
900 else:
901 try:
902 _executeAction(options)
903 except KeyboardInterrupt:
904 logger.error("Backup interrupted.")
905 logger.info("Cedar Backup Amazon S3 sync run completed with status 5.")
906 return 5
907 except Exception as e:
908 logger.error("Error executing backup: %s", e)
909 logger.info("Cedar Backup Amazon S3 sync run completed with status 6.")
910 return 6
911
912 logger.info("Cedar Backup Amazon S3 sync run completed with status 0.")
913 return 0
914
915
916
917
918
919
920
921
922
923
924 -def _usage(fd=sys.stderr):
925 """
926 Prints usage information for the cback3-amazons3-sync script.
927 @param fd: File descriptor used to print information.
928 @note: The C{fd} is used rather than C{print} to facilitate unit testing.
929 """
930 fd.write("\n")
931 fd.write(" Usage: cback3-amazons3-sync [switches] sourceDir s3bucketUrl\n")
932 fd.write("\n")
933 fd.write(" Cedar Backup Amazon S3 sync tool.\n")
934 fd.write("\n")
935 fd.write(" This Cedar Backup utility synchronizes a local directory to an Amazon S3\n")
936 fd.write(" bucket. After the sync is complete, a validation step is taken. An\n")
937 fd.write(" error is reported if the contents of the bucket do not match the\n")
938 fd.write(" source directory, or if the indicated size for any file differs.\n")
939 fd.write(" This tool is a wrapper over the AWS CLI command-line tool.\n")
940 fd.write("\n")
941 fd.write(" The following arguments are required:\n")
942 fd.write("\n")
943 fd.write(" sourceDir The local source directory on disk (must exist)\n")
944 fd.write(" s3BucketUrl The URL to the target Amazon S3 bucket\n")
945 fd.write("\n")
946 fd.write(" The following switches are accepted:\n")
947 fd.write("\n")
948 fd.write(" -h, --help Display this usage/help listing\n")
949 fd.write(" -V, --version Display version information\n")
950 fd.write(" -b, --verbose Print verbose output as well as logging to disk\n")
951 fd.write(" -q, --quiet Run quietly (display no output to the screen)\n")
952 fd.write(" -l, --logfile Path to logfile (default: %s)\n" % DEFAULT_LOGFILE)
953 fd.write(" -o, --owner Logfile ownership, user:group (default: %s:%s)\n" % (DEFAULT_OWNERSHIP[0], DEFAULT_OWNERSHIP[1]))
954 fd.write(" -m, --mode Octal logfile permissions mode (default: %o)\n" % DEFAULT_MODE)
955 fd.write(" -O, --output Record some sub-command (i.e. aws) output to the log\n")
956 fd.write(" -d, --debug Write debugging information to the log (implies --output)\n")
957 fd.write(" -s, --stack Dump Python stack trace instead of swallowing exceptions\n")
958 fd.write(" -D, --diagnostics Print runtime diagnostics to the screen and exit\n")
959 fd.write(" -v, --verifyOnly Only verify the S3 bucket contents, do not make changes\n")
960 fd.write(" -w, --ignoreWarnings Ignore warnings about problematic filename encodings\n")
961 fd.write("\n")
962 fd.write(" Typical usage would be something like:\n")
963 fd.write("\n")
964 fd.write(" cback3-amazons3-sync /home/myuser s3://example.com-backup/myuser\n")
965 fd.write("\n")
966 fd.write(" This will sync the contents of /home/myuser into the indicated bucket.\n")
967 fd.write("\n")
968
969
970
971
972
973
974 -def _version(fd=sys.stdout):
975 """
976 Prints version information for the cback3-amazons3-sync script.
977 @param fd: File descriptor used to print information.
978 @note: The C{fd} is used rather than C{print} to facilitate unit testing.
979 """
980 fd.write("\n")
981 fd.write(" Cedar Backup Amazon S3 sync tool.\n")
982 fd.write(" Included with Cedar Backup version %s, released %s.\n" % (VERSION, DATE))
983 fd.write("\n")
984 fd.write(" Copyright (c) %s %s <%s>.\n" % (COPYRIGHT, AUTHOR, EMAIL))
985 fd.write(" See CREDITS for a list of included code and other contributors.\n")
986 fd.write(" This is free software; there is NO warranty. See the\n")
987 fd.write(" GNU General Public License version 2 for copying conditions.\n")
988 fd.write("\n")
989 fd.write(" Use the --help option for usage information.\n")
990 fd.write("\n")
991
998 """
999 Prints runtime diagnostics information.
1000 @param fd: File descriptor used to print information.
1001 @note: The C{fd} is used rather than C{print} to facilitate unit testing.
1002 """
1003 fd.write("\n")
1004 fd.write("Diagnostics:\n")
1005 fd.write("\n")
1006 Diagnostics().printDiagnostics(fd=fd, prefix=" ")
1007 fd.write("\n")
1008
1029
1036 """
1037 Build a list of files in a source directory
1038 @param sourceDir: Local source directory
1039 @return: FilesystemList with contents of source directory
1040 """
1041 if not os.path.isdir(sourceDir):
1042 raise ValueError("Source directory does not exist on disk.")
1043 sourceFiles = FilesystemList()
1044 sourceFiles.addDirContents(sourceDir)
1045 return sourceFiles
1046
1053 """
1054 Check source files, trying to guess which ones will have encoding problems.
1055 @param sourceDir: Local source directory
1056 @param sourceDir: Local source directory
1057 @raises ValueError: If a problem file is found
1058 @see U{http://opensourcehacker.com/2011/09/16/fix-linux-filename-encodings-with-python/}
1059 @see U{http://serverfault.com/questions/82821/how-to-tell-the-language-encoding-of-a-filename-on-linux}
1060 @see U{http://randysofia.com/2014/06/06/aws-cli-and-your-locale/}
1061 """
1062 with warnings.catch_warnings():
1063 encoding = Diagnostics().encoding
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078 failed = False
1079 for entry in sourceFiles:
1080 path = bytes(Path(entry))
1081 result = chardet.detect(path)
1082 source = path.decode(result["encoding"])
1083 try:
1084 target = path.decode(encoding)
1085 if source != target:
1086 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", source, result["encoding"], encoding)
1087 failed = True
1088 except Exception:
1089 logger.error("Inconsistent encoding for [%s]: got %s, but need %s", source, result["encoding"], encoding)
1090 failed = True
1091
1092 if not failed:
1093 logger.info("Completed checking source filename encoding (no problems found).")
1094 else:
1095 logger.error("Some filenames have inconsistent encodings and will likely cause sync problems.")
1096 logger.error("You may be able to fix this by setting a more sensible locale in your environment.")
1097 logger.error("Aternately, you can rename the problem files to be valid in the indicated locale.")
1098 logger.error("To ignore this warning and proceed anyway, use --ignoreWarnings")
1099 raise ValueError("Some filenames have inconsistent encodings and will likely cause sync problems.")
1100
1107 """
1108 Synchronize a local directory to an Amazon S3 bucket.
1109 @param sourceDir: Local source directory
1110 @param s3BucketUrl: Target S3 bucket URL
1111 """
1112 logger.info("Synchronizing local source directory up to Amazon S3.")
1113 args = [ "s3", "sync", sourceDir, s3BucketUrl, "--delete", "--recursive", ]
1114 result = executeCommand(AWS_COMMAND, args, returnOutput=False)[0]
1115 if result != 0:
1116 raise IOError("Error [%d] calling AWS CLI synchronize bucket." % result)
1117
1118
1119
1120
1121
1122
1123 -def _verifyBucketContents(sourceDir, sourceFiles, s3BucketUrl):
1124 """
1125 Verify that a source directory is equivalent to an Amazon S3 bucket.
1126 @param sourceDir: Local source directory
1127 @param sourceFiles: Filesystem list containing contents of source directory
1128 @param s3BucketUrl: Target S3 bucket URL
1129 """
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152 (bucket, prefix) = s3BucketUrl.replace("s3://", "").split("/", 1)
1153
1154 query = "Contents[].{Key: Key, Size: Size}"
1155 args = [ "s3api", "list-objects", "--bucket", bucket, "--prefix", prefix, "--query", query, ]
1156 (result, data) = executeCommand(AWS_COMMAND, args, returnOutput=True)
1157 if result != 0:
1158 raise IOError("Error [%d] calling AWS CLI verify bucket contents." % result)
1159
1160 contents = { }
1161 for entry in json.loads("".join(data)):
1162 key = entry["Key"].replace(prefix, "")
1163 size = int(entry["Size"])
1164 contents[key] = size
1165
1166 failed = False
1167 for entry in sourceFiles:
1168 if os.path.isfile(entry):
1169 key = entry.replace(sourceDir, "")
1170 size = int(os.stat(entry).st_size)
1171 if not key in contents:
1172 logger.error("File was apparently not uploaded: [%s]", entry)
1173 failed = True
1174 else:
1175 if size != contents[key]:
1176 logger.error("File size differs [%s]: expected %s bytes but got %s bytes", entry, size, contents[key])
1177 failed = True
1178
1179 if not failed:
1180 logger.info("Completed verifying Amazon S3 bucket contents (no problems found).")
1181 else:
1182 logger.error("There were differences between source directory and target S3 bucket.")
1183 raise ValueError("There were differences between source directory and target S3 bucket.")
1184
1185
1186
1187
1188
1189
1190 if __name__ == "__main__":
1191 sys.exit(cli())
1192