LibreOffice Module xmerge (master) 1
OfficeDocument.java
Go to the documentation of this file.
1/*
2 * This file is part of the LibreOffice project.
3 *
4 * This Source Code Form is subject to the terms of the Mozilla Public
5 * License, v. 2.0. If a copy of the MPL was not distributed with this
6 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
7 *
8 * This file incorporates work covered by the following license notice:
9 *
10 * Licensed to the Apache Software Foundation (ASF) under one or more
11 * contributor license agreements. See the NOTICE file distributed
12 * with this work for additional information regarding copyright
13 * ownership. The ASF licenses this file to you under the Apache
14 * License, Version 2.0 (the "License"); you may not use this file
15 * except in compliance with the License. You may obtain a copy of
16 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
17 */
18
19package org.openoffice.xmerge.converter.xml;
20
21import java.io.InputStream;
22import java.io.OutputStream;
23import java.io.Reader;
24import java.io.BufferedReader;
25import java.io.StringReader;
26import java.io.StringWriter;
27import java.io.InputStreamReader;
28import java.io.ByteArrayOutputStream;
29import java.io.ByteArrayInputStream;
30import java.io.IOException;
31import java.util.Iterator;
32import java.util.Map;
33import java.util.HashMap;
34
35import javax.xml.parsers.DocumentBuilderFactory;
36import javax.xml.parsers.DocumentBuilder;
37import javax.xml.parsers.ParserConfigurationException;
38
39import org.w3c.dom.Node;
40import org.w3c.dom.Element;
41import org.w3c.dom.Document;
42import org.w3c.dom.DOMImplementation;
43import org.w3c.dom.DocumentType;
44import org.w3c.dom.NodeList;
45import org.xml.sax.InputSource;
46import org.w3c.dom.NamedNodeMap;
47import org.xml.sax.SAXException;
48
49import javax.xml.transform.*;
50import javax.xml.transform.dom.*;
51import javax.xml.transform.stream.*;
52
54
58public abstract class OfficeDocument
59 implements org.openoffice.xmerge.Document, OfficeConstants {
60
62 private static DocumentBuilderFactory factory =
63 DocumentBuilderFactory.newInstance();
64
66 private Document contentDoc = null;
67
69 private Document metaDoc = null;
70
72 private Document settingsDoc = null;
73
75 private Document styleDoc = null;
76
78 private Document manifestDoc = null;
79
80 private String documentName = null;
81 private String fileName = null;
82
90 private OfficeZip zip = null;
91
93 private Map<String, EmbeddedObject> embeddedObjects = null;
94
100 public OfficeDocument(String name) {
101 this(name, true, false);
102 }
103
113 public OfficeDocument(String name, boolean namespaceAware, boolean validating) {
114 factory.setValidating(validating);
115 factory.setNamespaceAware(namespaceAware);
116 this.documentName = trimDocumentName(name);
117 this.fileName = documentName + getFileExtension();
118 }
119
127 private String trimDocumentName(String name) {
128 String temp = name.toLowerCase();
129 String ext = getFileExtension();
130
131 if (temp.endsWith(ext)) {
132 // strip the extension
133 int nlen = name.length();
134 int endIndex = nlen - ext.length();
135 name = name.substring(0,endIndex);
136 }
137
138 return name;
139 }
140
150 public Document getContentDOM() {
151
152 return contentDoc;
153 }
154
164 public Document getMetaDOM() {
165
166 return metaDoc;
167 }
168
178 public Document getSettingsDOM() {
179
180 return settingsDoc;
181 }
182
188 public void setContentDOM( Node newDom) {
189 contentDoc = (Document)newDom;
190 }
191
197 public void setMetaDOM (Node newDom) {
198 metaDoc = (Document)newDom;
199 }
200
206 public void setSettingsDOM (Node newDom) {
207 settingsDoc = (Document)newDom;
208 }
209
215 public void setStyleDOM (Node newDom) {
216 styleDoc = (Document)newDom;
217 }
218
230 public Document getStyleDOM() {
231
232 return styleDoc;
233 }
234
240 public String getName() {
241
242 return documentName;
243 }
244
251 public String getFileName() {
252
253 return fileName;
254 }
255
261 protected abstract String getFileExtension();
262
269 private Iterator<EmbeddedObject> getEmbeddedObjects() {
270
271 if (embeddedObjects == null && manifestDoc != null) {
272 embeddedObjects = new HashMap<String, EmbeddedObject>();
273
274 // Need to read the manifest file and construct a list of objects
275 NodeList nl = manifestDoc.getElementsByTagName(TAG_MANIFEST_FILE);
276
277 // Don't create the HashMap if there are no embedded objects
278 int len = nl.getLength();
279 for (int i = 0; i < len; i++) {
280 Node n = nl.item(i);
281
282 NamedNodeMap attrs = n.getAttributes();
283
284 String type = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_TYPE).getNodeValue();
285 String path = attrs.getNamedItem(ATTRIBUTE_MANIFEST_FILE_PATH).getNodeValue();
286
287 /*
288 * According to OpenOffice.org XML File Format document (ver. 1)
289 * there are only two types of embedded object:
290 *
291 * Objects with an XML representation.
292 * Objects without an XML representation.
293 *
294 * The former are represented by one or more XML files.
295 * The latter are in binary form.
296 */
297 if (type.startsWith("application/vnd.sun.xml"))
298 {
299 if (path.equals("/")) {
300 // Exclude the main document entries
301 continue;
302 }
303 // Take off the trailing '/'
304 String name = path.substring(0, path.length() - 1);
305 embeddedObjects.put(name, new EmbeddedXMLObject(name, type, zip));
306 }
307 else if (type.equals("text/xml")) {
308 // XML entries are either embedded StarOffice doc entries or main
309 // document entries
310 continue;
311 }
312 else { // FIX (HJ): allows empty MIME type
313 embeddedObjects.put(path, new EmbeddedBinaryObject(path, type, zip));
314 }
315 }
316 }
317
318 if (embeddedObjects == null) {
319 return null;
320 }
321
322 return embeddedObjects.values().iterator();
323 }
324
332 public void read(InputStream is) throws IOException {
333
334 Debug.log(Debug.INFO, "reading Office file");
335 DocumentBuilder builder = null;
336
337 try {
338 builder = factory.newDocumentBuilder();
339 } catch (ParserConfigurationException ex) {
340 throw new OfficeDocumentException(ex);
341 }
342
343 // read in Office zip file format
344 zip = new OfficeZip();
345 zip.read(is);
346
347 // grab the content.xml and
348 // parse it into contentDoc.
349 byte contentBytes[] = zip.getContentXMLBytes();
350 if (contentBytes == null) {
351 throw new OfficeDocumentException("Entry content.xml not found in file");
352 }
353 try {
354 contentDoc = parse(builder, contentBytes);
355 } catch (SAXException ex) {
356 throw new OfficeDocumentException(ex);
357 }
358
359 // if style.xml exists, grab the style.xml
360 // parse it into styleDoc.
361 byte styleBytes[] = zip.getStyleXMLBytes();
362 if (styleBytes != null) {
363 try {
364 styleDoc = parse(builder, styleBytes);
365 } catch (SAXException ex) {
366 throw new OfficeDocumentException(ex);
367 }
368 }
369
370 byte metaBytes[] = zip.getMetaXMLBytes();
371 if (metaBytes != null) {
372 try {
373 metaDoc = parse(builder, metaBytes);
374 } catch (SAXException ex) {
375 throw new OfficeDocumentException(ex);
376 }
377 }
378
379 byte settingsBytes[] = zip.getSettingsXMLBytes();
380 if (settingsBytes != null) {
381 try {
382 settingsDoc = parse(builder, settingsBytes);
383
384 } catch (SAXException ex) {
385 throw new OfficeDocumentException(ex);
386 }
387 }
388
389 // Read in the META-INF/manifest.xml file
390 byte manifestBytes[] = zip.getManifestXMLBytes();
391 if (manifestBytes != null) {
392 try {
393 manifestDoc = parse(builder, manifestBytes);
394 } catch (SAXException ex) {
395 throw new OfficeDocumentException(ex);
396 }
397 }
398 }
399
408 public void read(InputStream is, boolean isZip) throws IOException {
409
410 Debug.log(Debug.INFO, "reading Office file");
411
412 DocumentBuilder builder = null;
413
414 try {
415 builder = factory.newDocumentBuilder();
416 } catch (ParserConfigurationException ex) {
417 throw new OfficeDocumentException(ex);
418 }
419
420 if (isZip) {
421 read(is);
422 } else {
423 try {
424 Reader r = secondHack(is);
425 InputSource ins = new InputSource(r);
426 org.w3c.dom.Document newDoc = builder.parse(ins);
427 Element rootElement = newDoc.getDocumentElement();
428
429 NodeList nodeList;
430 Node tmpNode;
431 Node rootNode = rootElement;
432
433 /* content */
434 contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
435 rootElement = contentDoc.getDocumentElement();
436 rootNode = rootElement;
437
438 // FIX (HJ): Include office:font-decls in content DOM
439 nodeList = newDoc
440 .getElementsByTagName(TAG_OFFICE_FONT_DECLS);
441 if (nodeList.getLength() > 0) {
442 tmpNode = contentDoc.importNode(nodeList.item(0), true);
443 rootNode.appendChild(tmpNode);
444 }
445
446 nodeList = newDoc
447 .getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
448 if (nodeList.getLength() > 0) {
449 tmpNode = contentDoc.importNode(nodeList.item(0), true);
450 rootNode.appendChild(tmpNode);
451 }
452
453 nodeList = newDoc.getElementsByTagName(TAG_OFFICE_BODY);
454 if (nodeList.getLength() > 0) {
455 tmpNode = contentDoc.importNode(nodeList.item(0), true);
456 rootNode.appendChild(tmpNode);
457 }
458
459 /* Styles */
460 styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
461 rootElement = styleDoc.getDocumentElement();
462 rootNode = rootElement;
463
464 // FIX (HJ): Include office:font-decls in styles DOM
465 nodeList = newDoc
466 .getElementsByTagName(TAG_OFFICE_FONT_DECLS);
467 if (nodeList.getLength() > 0) {
468 tmpNode = styleDoc.importNode(nodeList.item(0), true);
469 rootNode.appendChild(tmpNode);
470 }
471
472 nodeList = newDoc.getElementsByTagName(TAG_OFFICE_STYLES);
473 if (nodeList.getLength() > 0) {
474 tmpNode = styleDoc.importNode(nodeList.item(0), true);
475 rootNode.appendChild(tmpNode);
476 }
477
478 // FIX (HJ): Include office:automatic-styles in styles DOM
479 nodeList = newDoc
480 .getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
481 if (nodeList.getLength() > 0) {
482 tmpNode = styleDoc.importNode(nodeList.item(0), true);
483 rootNode.appendChild(tmpNode);
484 }
485
486 // FIX (HJ): Include office:master-styles in styles DOM
487 nodeList = newDoc
488 .getElementsByTagName(TAG_OFFICE_MASTER_STYLES);
489 if (nodeList.getLength() > 0) {
490 tmpNode = styleDoc.importNode(nodeList.item(0), true);
491 rootNode.appendChild(tmpNode);
492 }
493
494 /* Settings */
495 settingsDoc = createDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
496 rootElement = settingsDoc.getDocumentElement();
497 rootNode = rootElement;
498 nodeList = newDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
499 if (nodeList.getLength() > 0) {
500 tmpNode = settingsDoc
501 .importNode(nodeList.item(0), true);
502 rootNode.appendChild(tmpNode);
503 }
504 /* Meta */
505 metaDoc = createDOM(TAG_OFFICE_DOCUMENT_META);
506 rootElement = metaDoc.getDocumentElement();
507 rootNode = rootElement;
508 nodeList = newDoc.getElementsByTagName(TAG_OFFICE_META);
509 if (nodeList.getLength() > 0) {
510 tmpNode = metaDoc.importNode(nodeList.item(0), true);
511 rootNode.appendChild(tmpNode);
512 }
513 } catch (SAXException ex) {
514 throw new OfficeDocumentException(ex);
515 }
516 }
517
518 }
519
531 static Document parse(DocumentBuilder builder, byte bytes[])
532 throws SAXException, IOException {
533
534 Document doc = null;
535
536 ByteArrayInputStream is = new ByteArrayInputStream(bytes);
537
538 // TODO: replace hack with a more appropriate fix.
539
540 Reader r = hack(is);
541 InputSource ins = new InputSource(r);
542 doc = builder.parse(ins);
543
544 return doc;
545 }
546
552 protected abstract String getDocumentMimeType();
553
561 public void write(OutputStream os) throws IOException {
562 if (zip == null) {
563 zip = new OfficeZip();
564 }
565
566 initManifestDOM();
567
568 Element domEntry;
569 Element manifestRoot = manifestDoc.getDocumentElement();
570
571 // The EmbeddedObjects come first.
572 Iterator<EmbeddedObject> embObjs = getEmbeddedObjects();
573 if (embObjs != null) {
574 while (embObjs.hasNext()) {
575 EmbeddedObject obj = embObjs.next();
576 obj.writeManifestData(manifestDoc);
577
578 obj.write(zip);
579 }
580 }
581
582 // Add in the entry for the Pictures directory. Always present.
583 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
584 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "Pictures/");
585 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "");
586 manifestRoot.appendChild(domEntry);
587
588 // Write content to the Zip file and then write any of the optional
589 // data, if it exists.
590 zip.setContentXMLBytes(docToBytes(contentDoc));
591
592 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
593 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "content.xml");
594 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
595
596 manifestRoot.appendChild(domEntry);
597
598 if (styleDoc != null) {
599 zip.setStyleXMLBytes(docToBytes(styleDoc));
600
601 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
602 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "styles.xml");
603 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
604 manifestRoot.appendChild(domEntry);
605 }
606
607 if (metaDoc != null) {
608 zip.setMetaXMLBytes(docToBytes(metaDoc));
609
610 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
611 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "meta.xml");
612 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
613 manifestRoot.appendChild(domEntry);
614 }
615
616 if (settingsDoc != null) {
617 zip.setSettingsXMLBytes(docToBytes(settingsDoc));
618
619 domEntry = manifestDoc.createElement(TAG_MANIFEST_FILE);
620 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "settings.xml");
621 domEntry.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, "text/xml");
622 manifestRoot.appendChild(domEntry);
623 }
624
625 zip.setManifestXMLBytes(docToBytes(manifestDoc));
626
627 zip.write(os);
628 }
629
638 public void write(OutputStream os, boolean isZip) throws IOException {
639
640 // Create an OfficeZip object if one does not exist.
641 if (isZip){
642 write(os);
643 } else {
644 try {
645 DocumentBuilderFactory builderFactory = DocumentBuilderFactory.newInstance();
646 DocumentBuilder builder= builderFactory.newDocumentBuilder();
647 DOMImplementation domImpl = builder.getDOMImplementation();
648 domImpl.createDocumentType("office:document","-//OpenOffice.org//DTD OfficeDocument 1.0//EN",null);
649 org.w3c.dom.Document newDoc = domImpl.createDocument("http://openoffice.org/2000/office","office:document",null);
650
651 Element rootElement=newDoc.getDocumentElement();
652 rootElement.setAttribute("xmlns:office","http://openoffice.org/2000/office");
653 rootElement.setAttribute("xmlns:style","http://openoffice.org/2000/style" );
654 rootElement.setAttribute("xmlns:text","http://openoffice.org/2000/text");
655 rootElement.setAttribute("xmlns:table","http://openoffice.org/2000/table");
656
657 rootElement.setAttribute("xmlns:draw","http://openoffice.org/2000/drawing");
658 rootElement.setAttribute("xmlns:fo","http://www.w3.org/1999/XSL/Format" );
659 rootElement.setAttribute("xmlns:xlink","http://www.w3.org/1999/xlink" );
660 rootElement.setAttribute("xmlns:dc","http://purl.org/dc/elements/1.1/" );
661 rootElement.setAttribute("xmlns:meta","http://openoffice.org/2000/meta" );
662 rootElement.setAttribute("xmlns:number","http://openoffice.org/2000/datastyle" );
663 rootElement.setAttribute("xmlns:svg","http://www.w3.org/2000/svg" );
664 rootElement.setAttribute("xmlns:chart","http://openoffice.org/2000/chart" );
665 rootElement.setAttribute("xmlns:dr3d","http://openoffice.org/2000/dr3d" );
666 rootElement.setAttribute("xmlns:math","http://www.w3.org/1998/Math/MathML" );
667 rootElement.setAttribute("xmlns:form","http://openoffice.org/2000/form" );
668 rootElement.setAttribute("xmlns:script","http://openoffice.org/2000/script" );
669 rootElement.setAttribute("xmlns:config","http://openoffice.org/2001/config" );
670 // #i41033# OASIS format needs the "office:class" set.
671 if(getDocumentMimeType().equals(SXC_MIME_TYPE))
672 rootElement.setAttribute("office:class","spreadsheet" );
673 else if(getDocumentMimeType().equals(SXW_MIME_TYPE))
674 rootElement.setAttribute("office:class","text" );
675 rootElement.setAttribute("office:version","1.0");
676
677 NodeList nodeList;
678 Node tmpNode;
679 Node rootNode = rootElement;
680 if (metaDoc !=null) {
681 nodeList= metaDoc.getElementsByTagName(TAG_OFFICE_META);
682 if (nodeList.getLength()>0) {
683 tmpNode = newDoc.importNode(nodeList.item(0),true);
684 rootNode.appendChild(tmpNode);
685 }
686 } if (styleDoc !=null) {
687 nodeList= styleDoc.getElementsByTagName(TAG_OFFICE_STYLES);
688 if (nodeList.getLength()>0){
689 tmpNode = newDoc.importNode(nodeList.item(0),true);
690 rootNode.appendChild(tmpNode);
691 }
692 } if (settingsDoc !=null) {
693 nodeList= settingsDoc.getElementsByTagName(TAG_OFFICE_SETTINGS);
694 if (nodeList.getLength()>0){
695 tmpNode = newDoc.importNode(nodeList.item(0),true);
696 rootNode.appendChild(tmpNode);
697 }
698 } if (contentDoc !=null) {
699 nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_AUTOMATIC_STYLES);
700 if (nodeList.getLength()>0){
701 tmpNode = newDoc.importNode(nodeList.item(0),true);
702 rootNode.appendChild(tmpNode);
703 }
704 nodeList= contentDoc.getElementsByTagName(TAG_OFFICE_BODY);
705 if (nodeList.getLength()>0){
706 tmpNode = newDoc.importNode(nodeList.item(0),true);
707 rootNode.appendChild(tmpNode);
708 }
709 }
710
711 byte contentBytes[] = docToBytes(newDoc);
712 os.write(contentBytes);
713 } catch(Exception exc){
714 System.out.println("\nException in OfficeDocument.write():" +exc);
715 }
716 }
717 }
718
719
733 static byte[] docToBytes(Document doc)
734 throws IOException {
735
736 ByteArrayOutputStream baos = new ByteArrayOutputStream();
737
738 java.lang.reflect.Constructor<?> con;
739 java.lang.reflect.Method meth;
740
741 String domImpl = doc.getClass().getName();
742
743 /*
744 * We may have multiple XML parsers in the Classpath.
745 * Depending on which one is first, the actual type of
746 * doc may vary. Need a way to find out which API is being
747 * used and use an appropriate serialization method.
748 */
749
750 try {
751 // First of all try for JAXP 1.0
752 if (domImpl.equals("com.sun.xml.tree.XmlDocument")) {
753
754 Debug.log(Debug.INFO, "Using JAXP");
755
756 Class<?> jaxpDoc = Class.forName("com.sun.xml.tree.XmlDocument");
757
758 // The method is in the XMLDocument class itself, not a helper
759 meth = jaxpDoc.getMethod("write",
760 new Class[] { Class.forName("java.io.OutputStream") } );
761
762 meth.invoke(doc, new Object [] { baos } );
763 } else if (domImpl.equals("org.apache.crimson.tree.XmlDocument")) {
764 Debug.log(Debug.INFO, "Using Crimson");
765
766 Class<?> crimsonDoc = Class.forName("org.apache.crimson.tree.XmlDocument");
767 // The method is in the XMLDocument class itself, not a helper
768 meth = crimsonDoc.getMethod("write",
769 new Class[] { Class.forName("java.io.OutputStream") } );
770
771 meth.invoke(doc, new Object [] { baos } );
772 } else if (domImpl.equals("org.apache.xerces.dom.DocumentImpl")
773 || domImpl.equals("org.apache.xerces.dom.DeferredDocumentImpl")) {
774
775 Debug.log(Debug.INFO, "Using Xerces");
776
777 // Try for Xerces
778 Class<?> xercesSer =
779 Class.forName("org.apache.xml.serialize.XMLSerializer");
780
781 // Get the OutputStream constructor
782 // May want to use the OutputFormat parameter at some stage too
783 con = xercesSer.getConstructor(new Class []
784 { Class.forName("java.io.OutputStream"),
785 Class.forName("org.apache.xml.serialize.OutputFormat") } );
786
787 // Get the serialize method
788 meth = xercesSer.getMethod("serialize",
789 new Class [] { Class.forName("org.w3c.dom.Document") } );
790
791 // Get an instance
792 Object serializer = con.newInstance(new Object [] { baos, null } );
793
794 // Now call serialize to write the document
795 meth.invoke(serializer, new Object [] { doc } );
796 } else if (domImpl.equals("gnu.xml.dom.DomDocument")) {
797 Debug.log(Debug.INFO, "Using GNU");
798
799 Class<?> gnuSer = Class.forName("gnu.xml.dom.ls.DomLSSerializer");
800
801 // Get the serialize method
802 meth = gnuSer.getMethod("serialize",
803 new Class [] { Class.forName("org.w3c.dom.Node"),
804 Class.forName("java.io.OutputStream") } );
805
806 // Get an instance
807 Object serializer = gnuSer.newInstance();
808
809 // Now call serialize to write the document
810 meth.invoke(serializer, new Object [] { doc, baos } );
811 } else {
812 try {
813 DOMSource domSource = new DOMSource(doc);
814 StringWriter writer = new StringWriter();
815 StreamResult result = new StreamResult(writer);
816 TransformerFactory tf = TransformerFactory.newInstance();
817 Transformer transformer = tf.newTransformer();
818 transformer.transform(domSource, result);
819 return writer.toString().getBytes();
820 } catch (Exception e) {
821 // We don't have another parser
822 IOException newEx = new IOException("No appropriate API (JAXP/Xerces) to serialize XML document: " + domImpl);
823 newEx.initCause(e);
824 throw newEx;
825 }
826 }
827 }
828 catch (Exception e) {
829 // We may get some other errors, but the bottom line is that
830 // the steps being executed no longer work
831 IOException newEx = new IOException(e.getMessage());
832 newEx.initCause(e);
833 throw newEx;
834 }
835
836 byte bytes[] = baos.toByteArray();
837
838 return bytes;
839 }
840
847 public final void initContentDOM() throws IOException {
848
849 contentDoc = createDOM(TAG_OFFICE_DOCUMENT_CONTENT);
850
851 // this is a work-around for a bug in Office6.0 - not really
852 // needed but StarCalc 6.0 will crash without this tag.
853 Element root = contentDoc.getDocumentElement();
854
855 Element child = contentDoc.createElement(TAG_OFFICE_FONT_DECLS);
856 root.appendChild(child);
857
858 child = contentDoc.createElement(TAG_OFFICE_AUTOMATIC_STYLES);
859 root.appendChild(child);
860
861 child = contentDoc.createElement(TAG_OFFICE_BODY);
862 root.appendChild(child);
863 }
864
871 public final void initSettingsDOM() throws IOException {
872
873 settingsDoc = createSettingsDOM(TAG_OFFICE_DOCUMENT_SETTINGS);
874
875 // this is a work-around for a bug in Office6.0 - not really
876 // needed but StarCalc 6.0 will crash without this tag.
877 Element root = settingsDoc.getDocumentElement();
878
879 Element child = settingsDoc.createElement(TAG_OFFICE_SETTINGS);
880 root.appendChild(child);
881 }
882
889 public final void initStyleDOM() throws IOException {
890
891 styleDoc = createDOM(TAG_OFFICE_DOCUMENT_STYLES);
892 }
893
904 private final Document createSettingsDOM(String rootName) throws IOException {
905
906 Document doc = null;
907
908 try {
909 DocumentBuilder builder = factory.newDocumentBuilder();
910 doc = builder.newDocument();
911 } catch (ParserConfigurationException ex) {
912 throw new OfficeDocumentException(ex);
913 }
914
915 Element root = doc.createElement(rootName);
916 doc.appendChild(root);
917
918 root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
919 root.setAttribute("xmlns:xlink", "http://openoffice.org/1999/xlink");
920 root.setAttribute("xmlns:config", "http://openoffice.org/2001/config");
921 root.setAttribute("office:version", "1.0");
922
923 return doc;
924 }
925
936 private final Document createDOM(String rootName) throws IOException {
937
938 Document doc = null;
939
940 try {
941 DocumentBuilder builder = factory.newDocumentBuilder();
942 doc = builder.newDocument();
943 } catch (ParserConfigurationException ex) {
944 throw new OfficeDocumentException(ex);
945 }
946
947 Element root = doc.createElement(rootName);
948 doc.appendChild(root);
949
950 root.setAttribute("xmlns:office", "http://openoffice.org/2000/office");
951 root.setAttribute("xmlns:style", "http://openoffice.org/2000/style");
952 root.setAttribute("xmlns:text", "http://openoffice.org/2000/text");
953 root.setAttribute("xmlns:table", "http://openoffice.org/2000/table");
954 root.setAttribute("xmlns:draw", "http://openoffice.org/2000/drawing");
955 root.setAttribute("xmlns:fo", "http://www.w3.org/1999/XSL/Format");
956 root.setAttribute("xmlns:xlink", "http://www.w3.org/1999/xlink");
957 root.setAttribute("xmlns:number", "http://openoffice.org/2000/datastyle");
958 root.setAttribute("xmlns:svg", "http://www.w3.org/2000/svg");
959 root.setAttribute("xmlns:chart", "http://openoffice.org/2000/chart");
960 root.setAttribute("xmlns:dr3d", "http://openoffice.org/2000/dr3d");
961 root.setAttribute("xmlns:math", "http://www.w3.org/1998/Math/MathML");
962 root.setAttribute("xmlns:form", "http://openoffice.org/2000/form");
963 root.setAttribute("xmlns:script", "http://openoffice.org/2000/script");
964 root.setAttribute("office:class", getOfficeClassAttribute());
965 root.setAttribute("office:version", "1.0");
966
967 return doc;
968 }
969
975 protected abstract String getOfficeClassAttribute();
976
1001 private static Reader hack(InputStream is) throws IOException {
1002
1003 BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
1004 StringBuffer buffer = new StringBuffer();
1005
1006 String str;
1007 while ((str = br.readLine()) != null) {
1008
1009 int sIndex = str.indexOf("<!DOCTYPE");
1010 if (sIndex > -1) {
1011
1012 buffer.append(str.substring(0, sIndex));
1013
1014 int eIndex = str.indexOf('>', sIndex + 8 );
1015 if (eIndex > -1) {
1016
1017 buffer.append(str.substring(eIndex + 1, str.length()));
1018 // FIX (HJ): Preserve the newline
1019 buffer.append("\n");
1020
1021 } else {
1022
1023 // FIX (HJ): More than one line. Search for '>' in following lines
1024 boolean bOK = false;
1025 while ((str = br.readLine())!=null) {
1026 eIndex = str.indexOf('>');
1027 if (eIndex>-1) {
1028 buffer.append(str.substring(eIndex+1));
1029 // FIX (HJ): Preserve the newline
1030 buffer.append("\n");
1031 bOK = true;
1032 break;
1033 }
1034 }
1035
1036 if (!bOK) { throw new IOException("Invalid XML"); }
1037 }
1038
1039 } else {
1040
1041 buffer.append(str);
1042 // FIX (HJ): Preserve the newline
1043 buffer.append("\n");
1044 }
1045 }
1046
1047 StringReader r = new StringReader(buffer.toString());
1048 return r;
1049 }
1050
1065 private static Reader secondHack(InputStream is) throws IOException {
1066
1067 BufferedReader br = new BufferedReader(new InputStreamReader(is, "UTF-8"));
1068 char[] charArray = new char[4096];
1069 StringBuffer sBuf = new StringBuffer();
1070 int n;
1071 while ((n=br.read(charArray, 0, charArray.length)) > 0) {
1072 sBuf.append(charArray, 0, n);
1073 }
1074
1075 // ensure there is no trailing garbage after the end of the stream.
1076 int sIndex = sBuf.lastIndexOf("</office:document>");
1077 sBuf.delete(sIndex, sBuf.length());
1078 sBuf.append("</office:document>");
1079 StringReader r = new StringReader(sBuf.toString());
1080 return r;
1081 }
1082
1087 private void initManifestDOM() throws IOException {
1088
1089 try {
1090 DocumentBuilder builder = factory.newDocumentBuilder();
1091 DOMImplementation domImpl = builder.getDOMImplementation();
1092
1093 DocumentType docType = domImpl.createDocumentType(TAG_MANIFEST_ROOT,
1094 "-//OpenOffice.org//DTD Manifest 1.0//EN",
1095 "Manifest.dtd");
1096 manifestDoc = domImpl.createDocument("manifest", TAG_MANIFEST_ROOT, docType);
1097 } catch (ParserConfigurationException ex) {
1098 throw new OfficeDocumentException(ex);
1099 }
1100
1101 // Add the <manifest:manifest> entry
1102 Element manifestRoot = manifestDoc.getDocumentElement();
1103
1104 manifestRoot.setAttribute("xmlns:manifest", "http://openoffice.org/2001/manifest");
1105
1106 Element docRoot = manifestDoc.createElement(TAG_MANIFEST_FILE);
1107
1108 docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_PATH, "/");
1109 docRoot.setAttribute(ATTRIBUTE_MANIFEST_FILE_TYPE, getDocumentMimeType());
1110
1111 manifestRoot.appendChild(docRoot);
1112 }
1113}
This class is used for logging debug messages.
Definition: Debug.java:39
const char * name
sal_Int64 n
int i
con
Provides general purpose utilities.
Provides interfaces for converting between two Document formats, and supports a "merge" interface for...
Definition: Convert.java:19
bool parse(OUString const &uri, SourceProviderScannerData *data)
std::vector< sal_uInt8 > bytes
DocumentType
Any result
ResultType type