ODFPY  1.2.0
opendocument.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 # Copyright (C) 2006-2010 Søren Roug, European Environment Agency
3 #
4 # This library is free software; you can redistribute it and/or
5 # modify it under the terms of the GNU Lesser General Public
6 # License as published by the Free Software Foundation; either
7 # version 2.1 of the License, or (at your option) any later version.
8 #
9 # This library is distributed in the hope that it will be useful,
10 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 # Lesser General Public License for more details.
13 #
14 # You should have received a copy of the GNU Lesser General Public
15 # License along with this library; if not, write to the Free Software
16 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
17 #
18 # Contributor(s):
19 #
20 # Copyright (C) 2014 Georges Khaznadar <georgesk@debian.org>
21 # migration to Python3, JavaDOC comments and automatic
22 # build of documentation
23 #
24 
25 __doc__="""Use OpenDocument to generate your documents."""
26 
27 import zipfile, time, sys, mimetypes, copy, os.path
28 
29 # to allow Python3 to access modules in the same path
30 sys.path.append(os.path.dirname(__file__))
31 
32 # using BytesIO provides a cleaner interface than StringIO
33 # with both Python2 and Python3: the programmer must care to
34 # convert strings or unicode to bytes, which is valid for Python 2 and 3.
35 from io import StringIO, BytesIO
36 
37 from namespaces import *
38 import manifest
39 import meta
40 from office import *
41 import element
42 from attrconverters import make_NCName
43 from xml.sax.xmlreader import InputSource
44 from odfmanifest import manifestlist
45 
46 if sys.version_info[0] == 3:
47  unicode=str # unicode function does not exist
48 
49 __version__= TOOLSVERSION
50 
51 _XMLPROLOGUE = u"<?xml version='1.0' encoding='UTF-8'?>\n"
52 
53 #####
54 # file permission as an integer value.
55 # The following syntax would be invalid for Python3:
56 # UNIXPERMS = 0100644 << 16L # -rw-r--r--
57 #
58 # So it has been precomputed:
59 # 2175008768 is the same value as 0100644 << 16L == -rw-r--r--
60 ####
61 UNIXPERMS = 2175008768
62 
63 IS_FILENAME = 0
64 IS_IMAGE = 1
65 # We need at least Python 2.2
66 assert sys.version_info[0]>=2 and sys.version_info[1] >= 2
67 
68 #sys.setrecursionlimit(100)
69 #The recursion limit is set conservative so mistakes like
70 # s=content() s.addElement(s) won't eat up too much processor time.
71 
72 ###############
73 # mime-types => file extensions
74 ###############
75 odmimetypes = {
76  u'application/vnd.oasis.opendocument.text': u'.odt',
77  u'application/vnd.oasis.opendocument.text-template': u'.ott',
78  u'application/vnd.oasis.opendocument.graphics': u'.odg',
79  u'application/vnd.oasis.opendocument.graphics-template': u'.otg',
80  u'application/vnd.oasis.opendocument.presentation': u'.odp',
81  u'application/vnd.oasis.opendocument.presentation-template': u'.otp',
82  u'application/vnd.oasis.opendocument.spreadsheet': u'.ods',
83  u'application/vnd.oasis.opendocument.spreadsheet-template': u'.ots',
84  u'application/vnd.oasis.opendocument.chart': u'.odc',
85  u'application/vnd.oasis.opendocument.chart-template': u'.otc',
86  u'application/vnd.oasis.opendocument.image': u'.odi',
87  u'application/vnd.oasis.opendocument.image-template': u'.oti',
88  u'application/vnd.oasis.opendocument.formula': u'.odf',
89  u'application/vnd.oasis.opendocument.formula-template': u'.otf',
90  u'application/vnd.oasis.opendocument.text-master': u'.odm',
91  u'application/vnd.oasis.opendocument.text-web': u'.oth',
92 }
93 
94 
99 
106  def __init__(self, filename, mediatype, content=None):
107  assert(type(filename)==type(u""))
108  assert(type(mediatype)==type(u""))
109  assert(type(content)==type(b"") or content == None)
110 
111  self.mediatype = mediatype
112  self.filename = filename
113  self.content = content
114 
115 
123  thumbnail = None
124 
125 
131  def __init__(self, mimetype, add_generator=True):
132  assert(type(mimetype)==type(u""))
133  assert(isinstance(add_generator,True.__class__))
134 
135  self.mimetype = mimetype
136  self.childobjects = []
137  self._extra = []
138  self.folder = u"" # Always empty for toplevel documents
139  self.topnode = Document(mimetype=self.mimetype)
140  self.topnode.ownerDocument = self
141 
142  self.clear_caches()
143 
144  self.Pictures = {}
145  self.meta = Meta()
146  self.topnode.addElement(self.meta)
147  if add_generator:
148  self.meta.addElement(meta.Generator(text=TOOLSVERSION))
149  self.scripts = Scripts()
150  self.topnode.addElement(self.scripts)
152  self.topnode.addElement(self.fontfacedecls)
154  self.topnode.addElement(self.settings)
155  self.styles = Styles()
156  self.topnode.addElement(self.styles)
158  self.topnode.addElement(self.automaticstyles)
160  self.topnode.addElement(self.masterstyles)
161  self.body = Body()
162  self.topnode.addElement(self.body)
163 
164  def rebuild_caches(self, node=None):
165  if node is None: node = self.topnode
166  self.build_caches(node)
167  for e in node.childNodes:
168  if e.nodeType == element.Node.ELEMENT_NODE:
169  self.rebuild_caches(e)
170 
171 
175  def clear_caches(self):
176  self.element_dict = {}
177  self._styles_dict = {}
178  self._styles_ooo_fix = {}
179 
180 
185  def build_caches(self, elt):
186  # assert(isinstance(elt, element.Element))
187  # why do I need this more intricated assertion?
188  # with Python3, the type of elt pops out as odf.element.Element
189  # in one test ???
190  import odf.element
191  assert(isinstance(elt, element.Element) or isinstance(elt, odf.element.Element) )
192 
193  if elt.qname not in self.element_dict:
194  self.element_dict[elt.qname] = []
195  self.element_dict[elt.qname].append(elt)
196  if elt.qname == (STYLENS, u'style'):
197  self.__register_stylename(elt) # Add to style dictionary
198  styleref = elt.getAttrNS(TEXTNS,u'style-name')
199  if styleref is not None and styleref in self._styles_ooo_fix:
200  elt.setAttrNS(TEXTNS,u'style-name', self._styles_ooo_fix[styleref])
201 
202 
209  def __register_stylename(self, elt):
210  assert(isinstance(elt, element.Element))
211 
212  name = elt.getAttrNS(STYLENS, u'name')
213  if name is None:
214  return
215  if elt.parentNode.qname in ((OFFICENS,u'styles'), (OFFICENS,u'automatic-styles')):
216  if name in self._styles_dict:
217  newname = u'M'+name # Rename style
218  self._styles_ooo_fix[name] = newname
219  # From here on all references to the old name will refer to the new one
220  name = newname
221  elt.setAttrNS(STYLENS, u'name', name)
222  self._styles_dict[name] = elt
223 
224 
234  def toXml(self, filename=u''):
235  assert(type(filename)==type(u""))
236 
237  result=None
238  xml=StringIO()
239  if sys.version_info[0]==2:
240  xml.write(_XMLPROLOGUE)
241  else:
242  xml.write(_XMLPROLOGUE)
243  self.body.toXml(0, xml)
244  if not filename:
245  result=xml.getvalue()
246  else:
247  f=codecs.open(filename,'w', encoding='utf-8')
248  f.write(xml.getvalue())
249  f.close()
250  return result
251 
252 
257  def xml(self):
258  self.__replaceGenerator()
259  xml=StringIO()
260  if sys.version_info[0]==2:
261  xml.write(_XMLPROLOGUE)
262  else:
263  xml.write(_XMLPROLOGUE)
264  self.topnode.toXml(0, xml)
265  return xml.getvalue().encode("utf-8")
266 
267 
268 
273  def contentxml(self):
274  xml=StringIO()
275  xml.write(_XMLPROLOGUE)
276  x = DocumentContent()
277  x.write_open_tag(0, xml)
278  if self.scripts.hasChildNodes():
279  self.scripts.toXml(1, xml)
280  if self.fontfacedecls.hasChildNodes():
281  self.fontfacedecls.toXml(1, xml)
282  a = AutomaticStyles()
283  stylelist = self._used_auto_styles([self.styles, self.automaticstyles, self.body])
284  if len(stylelist) > 0:
285  a.write_open_tag(1, xml)
286  for s in stylelist:
287  s.toXml(2, xml)
288  a.write_close_tag(1, xml)
289  else:
290  a.toXml(1, xml)
291  self.body.toXml(1, xml)
292  x.write_close_tag(0, xml)
293  return xml.getvalue().encode("utf-8")
294 
295 
301  def __manifestxml(self):
302  xml=StringIO()
303  xml.write(_XMLPROLOGUE)
304  self.manifest.toXml(0,xml)
305  result=xml.getvalue()
306  assert(type(result)==type(u""))
307  return result
308 
309 
314  def metaxml(self):
315  self.__replaceGenerator()
316  x = DocumentMeta()
317  x.addElement(self.meta)
318  xml=StringIO()
319  xml.write(_XMLPROLOGUE)
320  x.toXml(0,xml)
321  result=xml.getvalue()
322  assert(type(result)==type(u""))
323  return result
324 
325 
330  def settingsxml(self):
331  x = DocumentSettings()
332  x.addElement(self.settings)
333  xml=StringIO()
334  if sys.version_info[0]==2:
335  xml.write(_XMLPROLOGUE)
336  else:
337  xml.write(_XMLPROLOGUE)
338  x.toXml(0,xml)
339  result=xml.getvalue()
340  assert(type(result)==type(u""))
341  return result
342 
343 
350  def _parseoneelement(self, top, stylenamelist):
351  for e in top.childNodes:
352  if e.nodeType == element.Node.ELEMENT_NODE:
353  for styleref in (
354  (CHARTNS,u'style-name'),
355  (DRAWNS,u'style-name'),
356  (DRAWNS,u'text-style-name'),
357  (PRESENTATIONNS,u'style-name'),
358  (STYLENS,u'data-style-name'),
359  (STYLENS,u'list-style-name'),
360  (STYLENS,u'page-layout-name'),
361  (STYLENS,u'style-name'),
362  (TABLENS,u'default-cell-style-name'),
363  (TABLENS,u'style-name'),
364  (TEXTNS,u'style-name') ):
365  if e.getAttrNS(styleref[0],styleref[1]):
366  stylename = e.getAttrNS(styleref[0],styleref[1])
367  if stylename not in stylenamelist:
368  # due to the polymorphism of e.getAttrNS(),
369  # a unicode type is enforced for elements
370  stylenamelist.append(unicode(stylename))
371  stylenamelist = self._parseoneelement(e, stylenamelist)
372  return stylenamelist
373 
374 
381  def _used_auto_styles(self, segments):
382  stylenamelist = []
383  for top in segments:
384  stylenamelist = self._parseoneelement(top, stylenamelist)
385  stylelist = []
386  for e in self.automaticstyles.childNodes:
387  if e.getAttrNS(STYLENS,u'name') in stylenamelist:
388  stylelist.append(e)
389 
390  # check the type of the returned data
391  ok=True
392  for e in stylelist: ok = ok and isinstance(e, element.Element)
393  assert(ok)
394 
395  return stylelist
396 
397 
402  def stylesxml(self):
403  xml=StringIO()
404  xml.write(_XMLPROLOGUE)
405  x = DocumentStyles()
406  x.write_open_tag(0, xml)
407  if self.fontfacedecls.hasChildNodes():
408  self.fontfacedecls.toXml(1, xml)
409  self.styles.toXml(1, xml)
410  a = AutomaticStyles()
411  a.write_open_tag(1, xml)
412  for s in self._used_auto_styles([self.masterstyles]):
413  s.toXml(2, xml)
414  a.write_close_tag(1, xml)
415  if self.masterstyles.hasChildNodes():
416  self.masterstyles.toXml(1, xml)
417  x.write_close_tag(0, xml)
418  result = xml.getvalue()
419 
420  assert(type(result)==type(u""))
421 
422  return result
423 
424 
436  def addPicture(self, filename, mediatype=None, content=None):
437  if content is None:
438  if mediatype is None:
439  mediatype, encoding = mimetypes.guess_type(filename)
440  if mediatype is None:
441  mediatype = u''
442  try: ext = filename[filename.rindex(u'.'):]
443  except: ext=u''
444  else:
445  ext = mimetypes.guess_extension(mediatype)
446  manifestfn = u"Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
447  self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
448  content=b"" # this value is only use by the assert further
449  filename=u"" # this value is only use by the assert further
450  else:
451  manifestfn = filename
452  self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
453 
454  assert(type(filename)==type(u""))
455  assert(type(content) == type(b""))
456 
457  return manifestfn
458 
459 
470  def addPictureFromFile(self, filename, mediatype=None):
471  if mediatype is None:
472  mediatype, encoding = mimetypes.guess_type(filename)
473  if mediatype is None:
474  mediatype = u''
475  try: ext = filename[filename.rindex(u'.'):]
476  except ValueError: ext=u''
477  else:
478  ext = mimetypes.guess_extension(mediatype)
479  manifestfn = u"Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
480  self.Pictures[manifestfn] = (IS_FILENAME, filename, mediatype)
481 
482  assert(type(filename)==type(u""))
483  assert(type(mediatype)==type(u""))
484 
485  return manifestfn
486 
487 
498  def addPictureFromString(self, content, mediatype):
499  assert(type(content)==type(b""))
500  assert(type(mediatype)==type(u""))
501 
502  ext = mimetypes.guess_extension(mediatype)
503  manifestfn = u"Pictures/%0.0f%s" % ((time.time()*10000000000), ext)
504  self.Pictures[manifestfn] = (IS_IMAGE, content, mediatype)
505  return manifestfn
506 
507 
513  def addThumbnail(self, filecontent=None):
514  assert(type(filecontent)==type(b""))
515 
516  if filecontent is None:
517  import thumbnail
518  self.thumbnail = thumbnail.thumbnail()
519  else:
520  self.thumbnail = filecontent
521 
522 
530  def addObject(self, document, objectname=None):
531  assert(isinstance(document, OpenDocument))
532  assert(type(objectname)==type(u"") or objectname == None)
533 
534  self.childobjects.append(document)
535  if objectname is None:
536  document.folder = u"%s/Object %d" % (self.folder, len(self.childobjects))
537  else:
538  document.folder = objectname
539  return u".%s" % document.folder
540 
541 
547  def _savePictures(self, anObject, folder):
548  assert(isinstance(anObject, OpenDocument))
549  assert(type(folder)==type(u""))
550 
551  hasPictures = False
552  for arcname, picturerec in anObject.Pictures.items():
553  what_it_is, fileobj, mediatype = picturerec
554  self.manifest.addElement(manifest.FileEntry(fullpath=u"%s%s" % ( folder ,arcname), mediatype=mediatype))
555  hasPictures = True
556  if what_it_is == IS_FILENAME:
557  self._z.write(fileobj, arcname, zipfile.ZIP_STORED)
558  else:
559  zi = zipfile.ZipInfo(str(arcname), self._now)
560  zi.compress_type = zipfile.ZIP_STORED
561  zi.external_attr = UNIXPERMS
562  self._z.writestr(zi, fileobj)
563  # According to section 17.7.3 in ODF 1.1, the pictures folder should not have a manifest entry
564 # if hasPictures:
565 # self.manifest.addElement(manifest.FileEntry(fullpath="%sPictures/" % folder, mediatype=""))
566  # Look in subobjects
567  subobjectnum = 1
568  for subobject in anObject.childobjects:
569  self._savePictures(subobject, u'%sObject %d/' % (folder, subobjectnum))
570  subobjectnum += 1
571 
572 
579  def __replaceGenerator(self):
580  for m in self.meta.childNodes[:]:
581  if m.qname == (METANS, u'generator'):
582  self.meta.removeChild(m)
583  self.meta.addElement(meta.Generator(text=TOOLSVERSION))
584 
585 
594  def save(self, outputfile, addsuffix=False):
595  assert(type(outputfile)==type(u"") or 'wb' in repr(outputfile) or 'BufferedWriter' in repr(outputfile) or 'BytesIO' in repr(outputfile))
596  assert(type(addsuffix)==type(True))
597 
598  if outputfile == u'-':
599  outputfp = zipfile.ZipFile(sys.stdout,"w")
600  else:
601  if addsuffix:
602  outputfile = outputfile + odmimetypes.get(self.mimetype,u'.xxx')
603  outputfp = zipfile.ZipFile(outputfile, "w")
604  self.__zipwrite(outputfp)
605  outputfp.close()
606 
607 
613  def write(self, outputfp):
614  assert('wb' in repr(outputfp) or 'BufferedWriter' in repr(outputfp) or 'BytesIO' in repr(outputfp))
615 
616  zipoutputfp = zipfile.ZipFile(outputfp,"w")
617  self.__zipwrite(zipoutputfp)
618 
619 
625  def __zipwrite(self, outputfp):
626  assert(isinstance(outputfp, zipfile.ZipFile))
627 
628  self._z = outputfp
629  self._now = time.localtime()[:6]
630  self.manifest = manifest.Manifest()
631 
632  # Write mimetype
633  zi = zipfile.ZipInfo('mimetype', self._now)
634  zi.compress_type = zipfile.ZIP_STORED
635  zi.external_attr = UNIXPERMS
636  self._z.writestr(zi, self.mimetype.encode("utf-8"))
637 
638  self._saveXmlObjects(self,u"")
639 
640  # Write pictures
641  self._savePictures(self,u"")
642 
643  # Write the thumbnail
644  if self.thumbnail is not None:
645  self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/", mediatype=u''))
646  self.manifest.addElement(manifest.FileEntry(fullpath=u"Thumbnails/thumbnail.png", mediatype=u''))
647  zi = zipfile.ZipInfo(u"Thumbnails/thumbnail.png", self._now)
648  zi.compress_type = zipfile.ZIP_DEFLATED
649  zi.external_attr = UNIXPERMS
650  self._z.writestr(zi, self.thumbnail)
651 
652  # Write any extra files
653  for op in self._extra:
654  if op.filename == u"META-INF/documentsignatures.xml": continue # Don't save signatures
655  self.manifest.addElement(manifest.FileEntry(fullpath=op.filename, mediatype=op.mediatype))
656  if sys.version_info[0]==3:
657  zi = zipfile.ZipInfo(op.filename, self._now)
658  else:
659  zi = zipfile.ZipInfo(op.filename.encode('utf-8'), self._now)
660  zi.compress_type = zipfile.ZIP_DEFLATED
661  zi.external_attr = UNIXPERMS
662  if op.content is not None:
663  self._z.writestr(zi, op.content)
664  # Write manifest
665  zi = zipfile.ZipInfo(u"META-INF/manifest.xml", self._now)
666  zi.compress_type = zipfile.ZIP_DEFLATED
667  zi.external_attr = UNIXPERMS
668  self._z.writestr(zi, self.__manifestxml() )
669  del self._z
670  del self._now
671  del self.manifest
672 
673 
674 
680  def _saveXmlObjects(self, anObject, folder):
681  assert(isinstance(anObject, OpenDocument))
682  assert(type(folder)==type(u""))
683 
684  if self == anObject:
685  self.manifest.addElement(manifest.FileEntry(fullpath=u"/", mediatype=anObject.mimetype))
686  else:
687  self.manifest.addElement(manifest.FileEntry(fullpath=folder, mediatype=anObject.mimetype))
688  # Write styles
689  self.manifest.addElement(manifest.FileEntry(fullpath=u"%sstyles.xml" % folder, mediatype=u"text/xml"))
690  zi = zipfile.ZipInfo(u"%sstyles.xml" % folder, self._now)
691  zi.compress_type = zipfile.ZIP_DEFLATED
692  zi.external_attr = UNIXPERMS
693  self._z.writestr(zi, anObject.stylesxml().encode("utf-8") )
694 
695  # Write content
696  self.manifest.addElement(manifest.FileEntry(fullpath=u"%scontent.xml" % folder, mediatype=u"text/xml"))
697  zi = zipfile.ZipInfo(u"%scontent.xml" % folder, self._now)
698  zi.compress_type = zipfile.ZIP_DEFLATED
699  zi.external_attr = UNIXPERMS
700  self._z.writestr(zi, anObject.contentxml() )
701 
702  # Write settings
703  if anObject.settings.hasChildNodes():
704  self.manifest.addElement(manifest.FileEntry(fullpath=u"%ssettings.xml" % folder, mediatype=u"text/xml"))
705  zi = zipfile.ZipInfo(u"%ssettings.xml" % folder, self._now)
706  zi.compress_type = zipfile.ZIP_DEFLATED
707  zi.external_attr = UNIXPERMS
708  self._z.writestr(zi, anObject.settingsxml() )
709 
710  # Write meta
711  if self == anObject:
712  self.manifest.addElement(manifest.FileEntry(fullpath=u"meta.xml", mediatype=u"text/xml"))
713  zi = zipfile.ZipInfo(u"meta.xml", self._now)
714  zi.compress_type = zipfile.ZIP_DEFLATED
715  zi.external_attr = UNIXPERMS
716  self._z.writestr(zi, anObject.metaxml() )
717 
718  # Write subobjects
719  subobjectnum = 1
720  for subobject in anObject.childobjects:
721  self._saveXmlObjects(subobject, u'%sObject %d/' % (folder, subobjectnum))
722  subobjectnum += 1
723 
724 # Document's DOM methods
725 
732  def createElement(self, elt):
733  assert(isinstance(elt, element.Element))
734 
735  # this old code is ambiguous: is 'element' the module or is it the
736  # local variable? To disambiguate this, the local variable has been
737  # renamed to 'elt'
738  #return element(check_grammar=False)
739  return elt(check_grammar=False)
740 
741 
747  def createTextNode(self, data):
748  assert(type(data)==type(u""))
749 
750  return element.Text(data)
751 
752 
758  def createCDATASection(self, data):
759  assert(type(data)==type(u""))
760 
761  return element.CDATASection(cdata)
762 
763 
768  def getMediaType(self):
769  assert (type(self.mimetype)==type(u""))
770 
771  return self.mimetype
772 
773 
779  def getStyleByName(self, name):
780  assert(type(name)==type(u""))
781 
782  ncname = make_NCName(name)
783  if self._styles_dict == {}:
784  self.rebuild_caches()
785  result=self._styles_dict.get(ncname, None)
786 
787  assert(isinstance(result, element.Element))
788  return result
789 
790 
797  def getElementsByType(self, elt):
798  import types
799  assert(isinstance (elt, types.FunctionType))
800 
801  obj = elt(check_grammar=False)
802  assert (isinstance(obj, element.Element))
803 
804  if self.element_dict == {}:
805  self.rebuild_caches()
806 
807  # This previous code was ambiguous
808  # was "element" the module name or the local variable?
809  # the local variable is renamed to "elt" to disambiguate the code
810  #return self.element_dict.get(obj.qname, [])
811 
812  result=self.element_dict.get(obj.qname, [])
813 
814  ok=True
815  for e in result: ok = ok and isinstance(e, element.Element)
816  assert(ok)
817 
818  return result
819 
820 # Convenience functions
821 
827  doc = OpenDocument(u'application/vnd.oasis.opendocument.chart')
828  doc.chart = Chart()
829  doc.body.addElement(doc.chart)
830  return doc
831 
832 
838  doc = OpenDocument(u'application/vnd.oasis.opendocument.graphics')
839  doc.drawing = Drawing()
840  doc.body.addElement(doc.drawing)
841  return doc
842 
843 
849  doc = OpenDocument(u'application/vnd.oasis.opendocument.image')
850  doc.image = Image()
851  doc.body.addElement(doc.image)
852  return doc
853 
854 
860  doc = OpenDocument(u'application/vnd.oasis.opendocument.presentation')
861  doc.presentation = Presentation()
862  doc.body.addElement(doc.presentation)
863  return doc
864 
865 
871  doc = OpenDocument(u'application/vnd.oasis.opendocument.spreadsheet')
872  doc.spreadsheet = Spreadsheet()
873  doc.body.addElement(doc.spreadsheet)
874  return doc
875 
876 
882  doc = OpenDocument(u'application/vnd.oasis.opendocument.text')
883  doc.text = Text()
884  doc.body.addElement(doc.text)
885  return doc
886 
887 
893  doc = OpenDocument(u'application/vnd.oasis.opendocument.text-master')
894  doc.text = Text()
895  doc.body.addElement(doc.text)
896  return doc
897 
898 
906 def __loadxmlparts(z, manifest, doc, objectpath):
907  assert(isinstance(z, zipfile.ZipFile))
908  assert(type(manifest)==type(dict()))
909  assert(isinstance(doc, OpenDocument))
910  assert(type(objectpath)==type(u""))
911 
912  from load import LoadParser
913  from xml.sax import make_parser, handler
914 
915  for xmlfile in (objectpath+u'settings.xml', objectpath+u'meta.xml', objectpath+u'content.xml', objectpath+u'styles.xml'):
916  if xmlfile not in manifest:
917  continue
918  ##########################################################
919  # this one is added to debug the bad behavior with Python2
920  # which raises exceptions of type SAXParseException
921  from xml.sax._exceptions import SAXParseException
922  ##########################################################
923  try:
924  xmlpart = z.read(xmlfile).decode("utf-8")
925  doc._parsing = xmlfile
926 
927  parser = make_parser()
928  parser.setFeature(handler.feature_namespaces, 1)
929  parser.setContentHandler(LoadParser(doc))
930  parser.setErrorHandler(handler.ErrorHandler())
931 
932  inpsrc = InputSource()
933  #################
934  # There may be a SAXParseException triggered because of
935  # a missing xmlns prefix like meta, config, etc.
936  # So i add such declarations when needed (GK, 2014/10/21).
937  # Is there any option to prevent xmlns checks by SAX?
938  xmlpart=__fixXmlPart(xmlpart)
939 
940  inpsrc.setByteStream(BytesIO(xmlpart.encode("utf-8")))
941  parser.parse(inpsrc)
942  del doc._parsing
943  except KeyError as v: pass
944  except SAXParseException:
945  print (u"====== SAX FAILED TO PARSE ==========\n", xmlpart)
946 
947 
955 def __fixXmlPart(xmlpart):
956  result=xmlpart
957  requestedPrefixes = (u'meta', u'config', u'dc', u'style',
958  u'svg', u'fo',u'draw', u'table',u'form')
959  for prefix in requestedPrefixes:
960  if u' xmlns:{prefix}'.format(prefix=prefix) not in xmlpart:
961  ###########################################
962  # fixed a bug triggered by math elements
963  # Notice: math elements are probably wrongly exported to XHTML
964  # 2016-02-19 G.K.
965  ###########################################
966  try:
967  pos=result.index(u" xmlns:")
968  toInsert=u' xmlns:{prefix}="urn:oasis:names:tc:opendocument:xmlns:{prefix}:1.0"'.format(prefix=prefix)
969  result=result[:pos]+toInsert+result[pos:]
970  except:
971  pass
972  return result
973 
974 
975 
982 def __detectmimetype(zipfd, odffile):
983  assert(isinstance(zipfd, zipfile.ZipFile))
984  assert(type(odffile)==type(u"") or 'rb' in repr(odffile) \
985  or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile))
986 
987  try:
988  mimetype = zipfd.read('mimetype').decode("utf-8")
989  return mimetype
990  except:
991  pass
992  # Fall-through to next mechanism
993  manifestpart = zipfd.read('META-INF/manifest.xml')
994  manifest = manifestlist(manifestpart)
995  for mentry,mvalue in manifest.items():
996  if mentry == "/":
997  assert(type(mvalue['media-type'])==type(u""))
998  return mvalue['media-type']
999 
1000  # Fall-through to last mechanism
1001  return u'application/vnd.oasis.opendocument.text'
1002 
1003 
1010 def load(odffile):
1011  assert(type(odffile)==type(u"") or 'rb' in repr(odffile) \
1012  or 'BufferedReader' in repr(odffile) or 'BytesIO' in repr(odffile))
1013 
1014  z = zipfile.ZipFile(odffile)
1015  mimetype = __detectmimetype(z, odffile)
1016  doc = OpenDocument(mimetype, add_generator=False)
1017 
1018  # Look in the manifest file to see if which of the four files there are
1019  manifestpart = z.read('META-INF/manifest.xml')
1020  manifest = manifestlist(manifestpart)
1021  __loadxmlparts(z, manifest, doc, u'')
1022  for mentry,mvalue in manifest.items():
1023  if mentry[:9] == u"Pictures/" and len(mentry) > 9:
1024  doc.addPicture(mvalue['full-path'], mvalue['media-type'], z.read(mentry))
1025  elif mentry == u"Thumbnails/thumbnail.png":
1026  doc.addThumbnail(z.read(mentry))
1027  elif mentry in (u'settings.xml', u'meta.xml', u'content.xml', u'styles.xml'):
1028  pass
1029  # Load subobjects into structure
1030  elif mentry[:7] == u"Object " and len(mentry) < 11 and mentry[-1] == u"/":
1031  subdoc = OpenDocument(mvalue['media-type'], add_generator=False)
1032  doc.addObject(subdoc, u"/" + mentry[:-1])
1033  __loadxmlparts(z, manifest, subdoc, mentry)
1034  elif mentry[:7] == u"Object ":
1035  pass # Don't load subobjects as opaque objects
1036  else:
1037  if mvalue['full-path'][-1] == u'/':
1038  doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], None))
1039  else:
1040  doc._extra.append(OpaqueObject(mvalue['full-path'], mvalue['media-type'], z.read(mentry)))
1041  # Add the SUN junk here to the struct somewhere
1042  # It is cached data, so it can be out-of-date
1043  z.close()
1044  b = doc.getElementsByType(Body)
1045  if mimetype[:39] == u'application/vnd.oasis.opendocument.text':
1046  doc.text = b[0].firstChild
1047  elif mimetype[:43] == u'application/vnd.oasis.opendocument.graphics':
1048  doc.graphics = b[0].firstChild
1049  elif mimetype[:47] == u'application/vnd.oasis.opendocument.presentation':
1050  doc.presentation = b[0].firstChild
1051  elif mimetype[:46] == u'application/vnd.oasis.opendocument.spreadsheet':
1052  doc.spreadsheet = b[0].firstChild
1053  elif mimetype[:40] == u'application/vnd.oasis.opendocument.chart':
1054  doc.chart = b[0].firstChild
1055  elif mimetype[:40] == u'application/vnd.oasis.opendocument.image':
1056  doc.image = b[0].firstChild
1057  elif mimetype[:42] == u'application/vnd.oasis.opendocument.formula':
1058  doc.formula = b[0].firstChild
1059 
1060  return doc
1061 
1062 # vim: set expandtab sw=4 :
def addObject(self, document, objectname=None)
Adds an object (subdocument).
def FontFaceDecls(args)
Definition: office.py:71
just a record to bear a filename, a mediatype and a bytes content
Definition: opendocument.py:98
def DocumentSettings(version="1.2", args)
Definition: office.py:59
def OpenDocumentDrawing()
Creates a drawing document.
def createTextNode(self, data)
Method to create a text node.
def Image(args)
Definition: draw.py:125
def Document(version="1.2", args)
Definition: office.py:50
A class to hold the content of an OpenDocument document Use the xml method to write the XML source to...
def OpenDocumentSpreadsheet()
Creates a spreadsheet document.
def addPicture(self, filename, mediatype=None, content=None)
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
def Presentation(args)
Definition: office.py:86
def toXml(self, filename=u'')
converts the document to a valid Xml format.
def OpenDocumentPresentation()
Creates a presentation document.
def MasterStyles(args)
Definition: office.py:80
def metaxml(self)
Generates the meta.xml file.
def contentxml(self)
Generates the content.xml file.
def _saveXmlObjects(self, anObject, folder)
save xml objects of an opendocument to some folder
def addPictureFromString(self, content, mediatype)
Add a picture from contents given as a Byte string.
def __zipwrite(self, outputfp)
Write the document to an open file pointer This is where the real work is done.
def getStyleByName(self, name)
Finds a style object based on the name.
def __register_stylename(self, elt)
Register a style.
def rebuild_caches(self, node=None)
def AutomaticStyles(args)
Definition: office.py:32
def stylesxml(self)
Generates the styles.xml file.
def OpenDocumentImage()
Creates an image document.
def addThumbnail(self, filecontent=None)
Add a fixed thumbnail The thumbnail in the library is big, so this is pretty useless.
def save(self, outputfile, addsuffix=False)
Save the document under the filename.
def load(odffile)
Load an ODF file into memory.
def clear_caches(self)
Clears internal caches.
def DocumentStyles(version="1.2", args)
Definition: office.py:62
def DocumentMeta(version="1.2", args)
Definition: office.py:56
def Chart(args)
Definition: chart.py:31
def __manifestxml(self)
Generates the manifest.xml file; The self.manifest isn&#39;t avaible unless the document is being saved...
def write(self, outputfp)
User API to write the ODF file to an open file descriptor Writes the ZIP format.
def addPictureFromFile(self, filename, mediatype=None)
Add a picture It uses the same convention as OOo, in that it saves the picture in the zipfile in the ...
def build_caches(self, elt)
Builds internal caches; called from element.py.
def OpenDocumentText()
Creates a text document.
def OpenDocumentTextMaster()
Creates a text master document.
def getMediaType(self)
Returns the media type.
def _savePictures(self, anObject, folder)
saves pictures contained in an object
def OpenDocumentChart()
Creates a chart document.
def __replaceGenerator(self)
Removes a previous &#39;generator&#39; stance and declares TOOLSVERSION as the new generator.
def Settings(args)
Definition: office.py:95
def manifestlist(manifestxml)
Definition: odfmanifest.py:95
def Drawing(args)
Definition: office.py:65
def Styles(args)
Definition: office.py:101
def __init__(self, filename, mediatype, content=None)
the constructor
def _used_auto_styles(self, segments)
Loop through the masterstyles elements, and find the automatic styles that are used.
def _parseoneelement(self, top, stylenamelist)
Finds references to style objects in master-styles and add the style name to the style list if not al...
def createCDATASection(self, data)
Method to create a CDATA section.
def xml(self)
Generates the full document as an XML "file".
Creates a arbitrary element and is intended to be subclassed not used on its own. ...
Definition: element.py:299
def Spreadsheet(args)
Definition: office.py:98
def Text(args)
Definition: form.py:104
def createElement(self, elt)
Inconvenient interface to create an element, but follows XML-DOM.
def __init__(self, mimetype, add_generator=True)
the constructor
def Scripts(args)
Definition: office.py:92
def Body(args)
Definition: office.py:38
def getElementsByType(self, elt)
Gets elements based on the type, which is function from text.py, draw.py etc.
def Meta(args)
Definition: office.py:83
def settingsxml(self)
Generates the settings.xml file.
def DocumentContent(version="1.2", args)
Definition: office.py:53