LibreOffice Module sw (master) 1
swdetect.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include "swdetect.hxx"
21
23#include <com/sun/star/io/XInputStream.hpp>
24#include <com/sun/star/uno/XComponentContext.hpp>
25#include <sfx2/docfile.hxx>
26#include <sot/storage.hxx>
27#include <tools/urlobj.hxx>
29
30using namespace ::com::sun::star;
31using namespace ::com::sun::star::uno;
32using namespace ::com::sun::star::io;
33using namespace ::com::sun::star::task;
34using namespace ::com::sun::star::beans;
35using namespace ::com::sun::star::lang;
37
39{
40}
41
43{
44}
45
46OUString SAL_CALL SwFilterDetect::detect( Sequence< PropertyValue >& lDescriptor )
47{
48 MediaDescriptor aMediaDesc( lDescriptor );
49 OUString aTypeName = aMediaDesc.getUnpackedValueOrDefault( MediaDescriptor::PROP_TYPENAME, OUString() );
50 uno::Reference< io::XInputStream > xInStream ( aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM], uno::UNO_QUERY );
51 if ( !xInStream.is() )
52 return OUString();
53
54 SfxMedium aMedium;
55 aMedium.UseInteractionHandler( false );
56 aMedium.setStreamToLoadFrom( xInStream, true );
57
58 SvStream *pInStrm = aMedium.GetInStream();
59 if ( !pInStrm || pInStrm->GetError() )
60 return OUString();
61
62 bool bIsDetected = false;
63
64 if ( aTypeName == "writer_Rich_Text_Format" )
65 {
66 pInStrm->Seek( STREAM_SEEK_TO_BEGIN );
67 bIsDetected = ( read_uInt8s_ToOString( *pInStrm, 5 ) == "{\\rtf" );
68 }
69 else if ( aTypeName == "writer_MS_WinWord_5" )
70 {
71 pInStrm->Seek( STREAM_SEEK_TO_BEGIN );
72 const sal_uInt8 nBufSize = 3;
73 sal_uInt8 nBuffer[ nBufSize ];
74 if (pInStrm->ReadBytes(nBuffer, nBufSize) < nBufSize)
75 return OUString();
76
77 bIsDetected = (nBuffer[0] == 0x9B && nBuffer[1] == 0xA5 && nBuffer[2] == 0x21) // WinWord 1
78 || (nBuffer[0] == 0x9C && nBuffer[1] == 0xA5 && nBuffer[2] == 0x21) // PMWord 1
79 || (nBuffer[0] == 0xDB && nBuffer[1] == 0xA5 && nBuffer[2] == 0x2D) // WinWord 2
80 || (nBuffer[0] == 0xDC && nBuffer[1] == 0xA5 && nBuffer[2] == 0x65); // WinWord 6.0/95, as a single stream file
81 }
82 else
83 {
84 // Do not attempt to create an SotStorage on a
85 // 0-length stream as that would create the compound
86 // document header on the stream and effectively write to
87 // disk!
88 pInStrm->Seek( STREAM_SEEK_TO_BEGIN );
89 if ( pInStrm->remainingSize() == 0 )
90 return OUString();
91
92 try
93 {
94 tools::SvRef<SotStorage> aStorage = new SotStorage ( pInStrm, false );
95 if ( !aStorage->GetError() )
96 {
97 bIsDetected = aStorage->IsContained( "WordDocument" );
98 if ( bIsDetected && aTypeName.startsWith( "writer_MS_Word_97" ) )
99 {
100 bIsDetected = ( aStorage->IsContained("0Table") || aStorage->IsContained("1Table") );
101
102 // If we are checking the template type, and the document is not a .dot, don't
103 // mis-detect it.
104 if ( bIsDetected && aTypeName == "writer_MS_Word_97_Vorlage" )
105 {
106 // It is common practice to rename a .doc to .dot to make it a template.
107 // Since we have detected a.doc-ish format, always accept .dot-named-files
108 // as valid templates to avoid flagging this as an invalid .dot format..
109 INetURLObject aParser(aMediaDesc.getUnpackedValueOrDefault(
110 utl::MediaDescriptor::PROP_URL, OUString()));
111
112 // Super ugly hack, but we don't want to use the whole WW8Fib thing here in
113 // the swd library, apparently. We know (do we?) that the "aBits1" byte, as
114 // the variable is called in WW8Fib::WW8Fib(SvStream&,sal_uInt8,sal_uInt32),
115 // is at offset 10 in the WordDocument stream. The fDot bit is bit 0x01 of
116 // that byte.
117 if (aParser.getExtension().toAsciiLowerCase() != "dot")
118 {
120 = aStorage->OpenSotStream("WordDocument", StreamMode::STD_READ);
121 xWordDocument->Seek(10);
122 if (xWordDocument->Tell() == 10)
123 {
124 sal_uInt8 aBits1;
125 xWordDocument->ReadUChar(aBits1);
126 // Check fDot bit
127 bIsDetected = ((aBits1 & 0x01) == 0x01);
128 }
129 }
130 }
131 }
132 }
133 }
134 catch (...)
135 {
136 bIsDetected = false;
137 }
138 }
139
140 if ( bIsDetected )
141 return aTypeName;
142
143 return OUString();
144}
145
146/* XServiceInfo */
148{
149 return "com.sun.star.comp.writer.FormatDetector";
150}
151
152/* XServiceInfo */
154{
156}
157
158/* XServiceInfo */
159Sequence< OUString > SAL_CALL SwFilterDetect::getSupportedServiceNames()
160{
161 return { "com.sun.star.frame.ExtendedTypeDetection", "com.sun.star.text.FormatDetector", "com.sun.star.text.W4WFormatDetector" };
162}
163
164extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
166 css::uno::Sequence<css::uno::Any> const &)
167{
168 return cppu::acquire(new SwFilterDetect());
169}
170
171/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
constexpr OUStringLiteral sServiceName
Definition: accdoc.cxx:54
OUString getExtension(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
void setStreamToLoadFrom(const css::uno::Reference< css::io::XInputStream > &xInputStream, bool bIsReadOnly)
void UseInteractionHandler(bool)
SvStream * GetInStream()
sal_uInt64 Seek(sal_uInt64 nPos)
std::size_t ReadBytes(void *pData, std::size_t nSize)
ErrCode GetError() const
sal_uInt64 remainingSize()
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
Definition: swdetect.cxx:159
virtual OUString SAL_CALL getImplementationName() override
Definition: swdetect.cxx:147
virtual OUString SAL_CALL detect(css::uno::Sequence< css::beans::PropertyValue > &lDescriptor) override
Definition: swdetect.cxx:46
virtual ~SwFilterDetect() override
Definition: swdetect.cxx:42
virtual sal_Bool SAL_CALL supportsService(const OUString &sServiceName) override
Definition: swdetect.cxx:153
static constexpr OUStringLiteral PROP_URL
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
TOOLS_DLLPUBLIC OString read_uInt8s_ToOString(SvStream &rStrm, std::size_t nUnits)
#define STREAM_SEEK_TO_BEGIN
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * com_sun_star_comp_writer_FormatDetector_get_implementation(css::uno::XComponentContext *, css::uno::Sequence< css::uno::Any > const &)
Definition: swdetect.cxx:165
unsigned char sal_uInt8
unsigned char sal_Bool