LibreOffice Module sc (master) 1
scdetect.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include "scdetect.hxx"
21
22#include <sal/macros.h>
23
24#include <com/sun/star/beans/PropertyValue.hpp>
25#include <com/sun/star/uno/XComponentContext.hpp>
27#include <com/sun/star/io/XInputStream.hpp>
29#include <sfx2/docfile.hxx>
30#include <sfx2/docfilt.hxx>
31#include <sfx2/fcontnr.hxx>
32
33using namespace ::com::sun::star;
35
36namespace {
37
38// table with search pattern
39// meaning of the sequences
40// 0x00??: the exact byte 0x?? must be at that place
41// 0x0100: read over a byte (don't care)
42// 0x02nn: a byte of 0xnn variations follows
43// 0x8000: recognition finished
44
45#define M_DC 0x0100
46#define M_ALT(CNT) (0x0200+(CNT))
47#define M_END 0x8000
48
49const sal_uInt16 pLotus[] = // Lotus 1/1A/2
50 { 0x0000, 0x0000, 0x0002, 0x0000,
51 M_ALT(2), 0x0004, 0x0006,
52 0x0004, M_END };
53
54const sal_uInt16 pLotusNew[] = // Lotus >= 9.7
55 { 0x0000, 0x0000, M_DC, 0x0000, // Rec# + Len (0x1a)
56 M_ALT(3), 0x0003, 0x0004, 0x0005, // File Revision Code 97->ME
57 0x0010, 0x0004, 0x0000, 0x0000,
58 M_END };
59
60const sal_uInt16 pLotus2[] = // Lotus >3
61 { 0x0000, 0x0000, 0x001A, 0x0000, // Rec# + Len (26)
62 M_ALT(2), 0x0000, 0x0002, // File Revision Code
63 0x0010,
64 0x0004, 0x0000, // File Revision Subcode
65 M_END };
66
67const sal_uInt16 pQPro[] =
68 { 0x0000, 0x0000, 0x0002, 0x0000,
69 M_ALT(4), 0x0001, 0x0002, // WB1, WB2
70 0x0006, 0x0007, // QPro 6/7 (?)
71 0x0010,
72 M_END };
73
74const sal_uInt16 pDIF1[] = // DIF with CR-LF
75 {
76 'T', 'A', 'B', 'L', 'E',
77 M_DC, M_DC,
78 '0', ',', '1',
79 M_DC, M_DC,
80 '\"',
81 M_END };
82
83const sal_uInt16 pDIF2[] = // DIF with CR or LF
84 {
85 'T', 'A', 'B', 'L', 'E',
86 M_DC,
87 '0', ',', '1',
88 M_DC,
89 '\"',
90 M_END };
91
92const sal_uInt16 pSylk[] = // Sylk
93 {
94 'I', 'D', ';',
95 M_ALT(3), 'P', 'N', 'E', // 'P' plus undocumented Excel extensions 'N' and 'E'
96 M_END };
97
98bool detectThisFormat(SvStream& rStr, const sal_uInt16* pSearch)
99{
100 sal_uInt8 nByte;
101 rStr.Seek( 0 ); // in the beginning everything was bad...
102 rStr.ReadUChar( nByte );
103 bool bSync = true;
104 while( !rStr.eof() && bSync )
105 {
106 sal_uInt16 nMuster = *pSearch;
107
108 if( nMuster < 0x0100 )
109 { // compare bytes
110 if( static_cast<sal_uInt8>(nMuster) != nByte )
111 bSync = false;
112 }
113 else if( nMuster & M_DC )
114 { // don't care
115 }
116 else if( nMuster & M_ALT(0) )
117 { // alternative Bytes
118 sal_uInt8 nCntAlt = static_cast<sal_uInt8>(nMuster);
119 bSync = false; // first unsynchron
120 while( nCntAlt > 0 )
121 {
122 pSearch++;
123 if( static_cast<sal_uInt8>(*pSearch) == nByte )
124 bSync = true; // only now synchronization
125 nCntAlt--;
126 }
127 }
128 else if( nMuster & M_END )
129 { // Format detected
130 return true;
131 }
132
133 pSearch++;
134 rStr.ReadUChar( nByte );
135 }
136
137 return false;
138}
139
140}
141
143{
144}
145
147{
148}
149
150#if 0
151// This method is no longer used, but I do want to keep this for now to see
152// if we could transfer this check to the now centralized ascii detection
153// code in the filter module.
154static sal_Bool lcl_MayBeAscii( SvStream& rStream )
155{
156 // ASCII/CSV is considered possible if there are no null bytes, or a Byte
157 // Order Mark is present, or if, for Unicode UCS2/UTF-16, all null bytes
158 // are on either even or uneven byte positions.
159
160 rStream.Seek(STREAM_SEEK_TO_BEGIN);
161
162 const size_t nBufSize = 2048;
163 sal_uInt16 aBuffer[ nBufSize ];
164 sal_uInt8* pByte = reinterpret_cast<sal_uInt8*>(aBuffer);
165 sal_uLong nBytesRead = rStream.Read( pByte, nBufSize*2);
166
167 if ( nBytesRead >= 2 && (aBuffer[0] == 0xfffe || aBuffer[0] == 0xfeff) )
168 {
169 // Unicode BOM file may contain null bytes.
170 return sal_True;
171 }
172
173 const sal_uInt16* p = aBuffer;
174 sal_uInt16 nMask = 0xffff;
175 nBytesRead /= 2;
176 while( nBytesRead-- && nMask )
177 {
178 sal_uInt16 nVal = *p++ & nMask;
179 if (!(nVal & 0x00ff))
180 nMask &= 0xff00;
181 if (!(nVal & 0xff00))
182 nMask &= 0x00ff;
183 }
184
185 return nMask != 0;
186}
187#endif
188
189static bool lcl_MayBeDBase( SvStream& rStream )
190{
191 // Look for dbf marker, see connectivity/source/inc/dbase/DTable.hxx
192 // DBFType for values.
193 const sal_uInt8 nValidMarks[] = {
194 0x03, 0x04, 0x05, 0x30, 0x31, 0x43, 0xB3, 0x83, 0x8b, 0x8e, 0xf5 };
195 sal_uInt8 nMark;
196 rStream.Seek(STREAM_SEEK_TO_BEGIN);
197 rStream.ReadUChar( nMark );
198 bool bValidMark = false;
199 for (size_t i=0; i < SAL_N_ELEMENTS(nValidMarks) && !bValidMark; ++i)
200 {
201 if (nValidMarks[i] == nMark)
202 bValidMark = true;
203 }
204 if ( !bValidMark )
205 return false;
206
207 const size_t nHeaderBlockSize = 32;
208 // Empty dbf is >= 32*2+1 bytes in size.
209 const size_t nEmptyDbf = nHeaderBlockSize * 2 + 1;
210
211 sal_uInt64 nSize = rStream.TellEnd();
212 if ( nSize < nEmptyDbf )
213 return false;
214
215 // count of records at 4
216 rStream.Seek(4);
217 sal_uInt32 nRecords(0);
218 rStream.ReadUInt32(nRecords);
219
220 // length of header starts at 8
221 rStream.Seek(8);
222 sal_uInt16 nHeaderLen;
223 rStream.ReadUInt16( nHeaderLen );
224
225 // size of record at 10
226 sal_uInt16 nRecordSize(0);
227 rStream.ReadUInt16(nRecordSize);
228
229 if ( nHeaderLen < nEmptyDbf || nSize < nHeaderLen )
230 return false;
231
232 // see DTable.cxx ODbaseTable::readHeader()
233 if (0 == nRecordSize)
234 return false;
235
236 // see DTable.cxx ODbaseTable::construct() line 546
237 if (0 == nRecords)
238 {
239 nRecords = (nSize - nHeaderLen) / nRecordSize;
240 }
241
242 // tdf#84834 sanity check of size
243 // tdf#106423: a dbf file can have 0 record, so no need to check nRecords
244 if (nSize < nHeaderLen + nRecords * sal_uInt64(nRecordSize))
245 return false;
246
247 // Last byte of header must be 0x0d, this is how it's specified.
248 // #i9581#,#i26407# but some applications don't follow the specification
249 // and pad the header with one byte 0x00 to reach an
250 // even boundary. Some (#i88577# ) even pad more or pad using a 0x1a ^Z
251 // control character (#i8857#). This results in:
252 // Last byte of header must be 0x0d on 32 bytes boundary.
253 sal_uInt16 nBlocks = (nHeaderLen - 1) / nHeaderBlockSize;
254 sal_uInt8 nEndFlag = 0;
255 while ( nBlocks > 1 && nEndFlag != 0x0d ) {
256 rStream.Seek( nBlocks-- * nHeaderBlockSize );
257 rStream.ReadUChar( nEndFlag );
258 }
259
260 return ( 0x0d == nEndFlag );
261}
262
263OUString SAL_CALL ScFilterDetect::detect( uno::Sequence<beans::PropertyValue>& lDescriptor )
264{
265 MediaDescriptor aMediaDesc( lDescriptor );
266 OUString aTypeName = aMediaDesc.getUnpackedValueOrDefault( MediaDescriptor::PROP_TYPENAME, OUString() );
267 uno::Reference< io::XInputStream > xStream ( aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM], uno::UNO_QUERY );
268 if ( !xStream.is() )
269 return OUString();
270
271 SfxMedium aMedium;
272 aMedium.UseInteractionHandler( false );
273 aMedium.setStreamToLoadFrom( xStream, true );
274
275 SvStream* pStream = aMedium.GetInStream();
276 if ( !pStream || pStream->GetError() )
277 // No stream, no detection.
278 return OUString();
279
280 const char* pSearchFilterName = nullptr;
281 if (aTypeName == "calc_Lotus")
282 {
283 if (!detectThisFormat(*pStream, pLotus) && !detectThisFormat(*pStream, pLotusNew) && !detectThisFormat(*pStream, pLotus2))
284 return OUString();
285
286 pSearchFilterName = "Lotus";
287 }
288 else if (aTypeName == "calc_QPro")
289 {
290 if (!detectThisFormat(*pStream, pQPro))
291 return OUString();
292
293 pSearchFilterName = "Quattro Pro 6.0";
294 }
295 else if (aTypeName == "calc_SYLK")
296 {
297 if (!detectThisFormat(*pStream, pSylk))
298 return OUString();
299
300 pSearchFilterName = "SYLK";
301 }
302 else if (aTypeName == "calc_DIF")
303 {
304 if (!detectThisFormat(*pStream, pDIF1) && !detectThisFormat(*pStream, pDIF2))
305 return OUString();
306
307 pSearchFilterName = "DIF";
308 }
309 else if (aTypeName == "calc_dBase")
310 {
311 if (!lcl_MayBeDBase(*pStream))
312 return OUString();
313
314 pSearchFilterName = "dBase";
315 }
316 else
317 return OUString();
318
319 SfxFilterMatcher aMatcher("scalc");
320 std::shared_ptr<const SfxFilter> pFilter = aMatcher.GetFilter4FilterName(OUString::createFromAscii(pSearchFilterName));
321
322 if (!pFilter)
323 return OUString();
324
325 aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= pFilter->GetName();
326 aMediaDesc >> lDescriptor;
327 return aTypeName;
328}
329
331{
332 return "com.sun.star.comp.calc.FormatDetector";
333}
334
335sal_Bool ScFilterDetect::supportsService( const OUString& sServiceName )
336{
338}
339
340css::uno::Sequence<OUString> ScFilterDetect::getSupportedServiceNames()
341{
342 return { "com.sun.star.frame.ExtendedTypeDetection" };
343}
344
345extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
346com_sun_star_comp_calc_FormatDetector_get_implementation(css::uno::XComponentContext* /*context*/,
347 css::uno::Sequence<css::uno::Any> const &)
348{
349 return cppu::acquire(new ScFilterDetect);
350}
351
352
353/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Reference< XInputStream > xStream
constexpr OUStringLiteral sServiceName
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
Definition: scdetect.cxx:340
virtual OUString SAL_CALL detect(css::uno::Sequence< css::beans::PropertyValue > &lDescriptor) override
Definition: scdetect.cxx:263
virtual ~ScFilterDetect() override
Definition: scdetect.cxx:146
virtual sal_Bool SAL_CALL supportsService(const OUString &sServiceName) override
Definition: scdetect.cxx:335
virtual OUString SAL_CALL getImplementationName() override
Definition: scdetect.cxx:330
std::shared_ptr< const SfxFilter > GetFilter4FilterName(const OUString &rName, SfxFilterFlags nMust=SfxFilterFlags::NONE, SfxFilterFlags nDont=SFX_FILTER_NOTINSTALLED) const
void setStreamToLoadFrom(const css::uno::Reference< css::io::XInputStream > &xInputStream, bool bIsReadOnly)
void UseInteractionHandler(bool)
SvStream * GetInStream()
virtual sal_uInt64 TellEnd()
bool eof() const
SvStream & ReadUInt32(sal_uInt32 &rUInt32)
sal_uInt64 Seek(sal_uInt64 nPos)
ErrCode GetError() const
SvStream & ReadUInt16(sal_uInt16 &rUInt16)
SvStream & ReadUChar(unsigned char &rChar)
void * p
#define SAL_N_ELEMENTS(arr)
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
int i
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * com_sun_star_comp_calc_FormatDetector_get_implementation(css::uno::XComponentContext *, css::uno::Sequence< css::uno::Any > const &)
Definition: scdetect.cxx:346
#define M_ALT(CNT)
Definition: scdetect.cxx:46
#define M_DC
Definition: scdetect.cxx:45
static bool lcl_MayBeDBase(SvStream &rStream)
Definition: scdetect.cxx:189
#define M_END
Definition: scdetect.cxx:47
sal_uIntPtr sal_uLong
#define STREAM_SEEK_TO_BEGIN
#define sal_True
unsigned char sal_uInt8
unsigned char sal_Bool
std::unique_ptr< char[]> aBuffer