LibreOffice Module sw (master)  1
iodetect.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <iodetect.hxx>
21 #include <memory>
22 #include <osl/endian.h>
23 #include <sot/storage.hxx>
24 #include <tools/urlobj.hxx>
26 #include <sfx2/docfilt.hxx>
27 #include <sfx2/fcontnr.hxx>
28 #include <sfx2/docfile.hxx>
29 #include <com/sun/star/ucb/ContentCreationException.hpp>
30 #include <com/sun/star/embed/XStorage.hpp>
31 #include <unicode/ucsdet.h>
32 #include <unicode/uclean.h>
33 
34 using namespace ::com::sun::star;
35 
36 static bool IsDocShellRegistered()
37 {
38  return SvtModuleOptions().IsWriter();
39 }
40 
42 {
45  SwIoDetect( sWW6 ),
47  SwIoDetect( sRtfWH ),
48  SwIoDetect( sHTML ),
49  SwIoDetect( sWW5 ),
54 };
55 
56 OUString SwIoSystem::GetSubStorageName( const SfxFilter& rFltr )
57 {
58  // for StorageFilters also set the SubStorageName
59  const OUString& rUserData = rFltr.GetUserData();
60  if (rUserData == FILTER_XML ||
61  rUserData == FILTER_XMLV ||
62  rUserData == FILTER_XMLVW)
63  return "content.xml";
64  if (rUserData == sWW6 || rUserData == FILTER_WW8)
65  return "WordDocument";
66  return OUString();
67 }
68 
69 std::shared_ptr<const SfxFilter> SwIoSystem::GetFilterOfFormat(const OUString& rFormatNm,
70  const SfxFilterContainer* pCnt)
71 {
72  SfxFilterContainer aCntSw( sSWRITER );
73  SfxFilterContainer aCntSwWeb( sSWRITERWEB );
74  const SfxFilterContainer* pFltCnt = pCnt ? pCnt : ( IsDocShellRegistered() ? &aCntSw : &aCntSwWeb );
75 
76  do {
77  if( pFltCnt )
78  {
79  SfxFilterMatcher aMatcher( pFltCnt->GetName() );
80  SfxFilterMatcherIter aIter( aMatcher );
81  std::shared_ptr<const SfxFilter> pFilter = aIter.First();
82  while ( pFilter )
83  {
84  if( pFilter->GetUserData() == rFormatNm )
85  return pFilter;
86  pFilter = aIter.Next();
87  }
88  }
89  if( pCnt || pFltCnt == &aCntSwWeb )
90  break;
91  pFltCnt = &aCntSwWeb;
92  } while( true );
93  return nullptr;
94 }
95 
96 bool SwIoSystem::IsValidStgFilter( const css::uno::Reference < css::embed::XStorage >& rStg, const SfxFilter& rFilter)
97 {
98  bool bRet = false;
99  try
100  {
101  SotClipboardFormatId nStgFormatId = SotStorage::GetFormatID( rStg );
102  bRet = rStg->isStreamElement( "content.xml" );
103  if ( bRet )
104  bRet = ( nStgFormatId != SotClipboardFormatId::NONE && ( rFilter.GetFormat() == nStgFormatId ) );
105  }
106  catch (const css::uno::Exception& )
107  {
108  }
109 
110  return bRet;
111 }
112 
114 {
115  SotClipboardFormatId nStgFormatId = rStg.GetFormat();
116  /*#i8409# We cannot trust the clipboard id anymore :-(*/
117  if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
118  nStgFormatId = SotClipboardFormatId::NONE;
119 
120  bool bRet = ERRCODE_NONE == rStg.GetError() &&
121  ( nStgFormatId == SotClipboardFormatId::NONE || rFilter.GetFormat() == nStgFormatId ) &&
122  ( rStg.IsContained( SwIoSystem::GetSubStorageName( rFilter )) );
123  if( bRet )
124  {
125  /* Bug 53445 - there are Excel Docs w/o ClipBoardId! */
126  /* Bug 62703 - and also WinWord Docs w/o ClipBoardId! */
127  if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
128  {
129  bRet = (rStg.IsContained("0Table")
130  || rStg.IsContained("1Table"))
131  == (rFilter.GetUserData() == FILTER_WW8);
132  if (bRet && !rFilter.IsAllowedAsTemplate())
133  {
135  rStg.OpenSotStream("WordDocument",
136  StreamMode::STD_READ );
137  xRef->Seek(10);
138  sal_uInt8 nByte;
139  xRef->ReadUChar( nByte );
140  bRet = !(nByte & 1);
141  }
142  }
143  }
144  return bRet;
145 }
146 
147 // Check the type of the stream (file) by searching for corresponding set of bytes.
148 // If no known type is found, return ASCII for now!
149 // Returns the internal FilterName.
150 std::shared_ptr<const SfxFilter> SwIoSystem::GetFileFilter(const OUString& rFileName)
151 {
152  SfxFilterContainer aCntSw( sSWRITER );
153  SfxFilterContainer aCntSwWeb( sSWRITERWEB );
154  const SfxFilterContainer* pFCntnr = IsDocShellRegistered() ? &aCntSw : &aCntSwWeb;
155 
156  SfxFilterMatcher aMatcher( pFCntnr->GetName() );
157  SfxFilterMatcherIter aIter( aMatcher );
158  std::shared_ptr<const SfxFilter> pFilter = aIter.First();
159  if ( !pFilter )
160  return nullptr;
161 
162  if (SotStorage::IsStorageFile(rFileName))
163  {
164  // package storage or OLEStorage based format
166  INetURLObject aObj;
167  aObj.SetSmartProtocol( INetProtocol::File );
168  aObj.SetSmartURL( rFileName );
169  SfxMedium aMedium(aObj.GetMainURL(INetURLObject::DecodeMechanism::NONE), StreamMode::STD_READ);
170 
171  // templates should not get precedence over "normal" filters (#i35508, #i33168)
172  std::shared_ptr<const SfxFilter> pTemplateFilter;
173  if (aMedium.IsStorage())
174  {
175  uno::Reference<embed::XStorage> const xStor = aMedium.GetStorage();
176  if ( xStor.is() )
177  {
178  while ( pFilter )
179  {
180  if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(xStor, *pFilter ))
181  {
182  if (pFilter->IsOwnTemplateFormat())
183  {
184  // found template filter; maybe there's a "normal" one also
185  pTemplateFilter = pFilter;
186  }
187  else
188  return pFilter;
189  }
190 
191  pFilter = aIter.Next();
192  }
193 
194  // there's only a template filter that could be found
195  if ( pTemplateFilter )
196  pFilter = pTemplateFilter;
197  }
198  }
199  else
200  {
201  try
202  {
203  SvStream *const pStream = aMedium.GetInStream();
204  if ( pStream && SotStorage::IsStorageFile(pStream) )
205  xStg = new SotStorage( pStream, false );
206  }
207  catch (const css::ucb::ContentCreationException &)
208  {
209  }
210 
211  if( xStg.is() && ( xStg->GetError() == ERRCODE_NONE ) )
212  {
213  while ( pFilter )
214  {
215  if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(*xStg, *pFilter))
216  {
217  if (pFilter->IsOwnTemplateFormat())
218  {
219  // found template filter; maybe there's a "normal" one also
220  pTemplateFilter = pFilter;
221  }
222  else
223  return pFilter;
224  }
225 
226  pFilter = aIter.Next();
227  }
228 
229  // there's only a template filter that could be found
230  if ( pTemplateFilter )
231  pFilter = pTemplateFilter;
232 
233  }
234  }
235 
236  return pFilter;
237  }
238 
240 }
241 
242 bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen,
243  rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd)
244 {
245  bool bSwap = false;
246  rtl_TextEncoding eCharSet = RTL_TEXTENCODING_DONTKNOW;
247  bool bLE = true;
248  /*See if it's a known unicode type*/
249  if (rLen >= 2)
250  {
251  sal_uLong nHead=0;
252  if (rLen > 2 && sal_uInt8(pBuf[0]) == 0xEF && sal_uInt8(pBuf[1]) == 0xBB &&
253  sal_uInt8(pBuf[2]) == 0xBF)
254  {
255  eCharSet = RTL_TEXTENCODING_UTF8;
256  nHead = 3;
257  }
258  else if (sal_uInt8(pBuf[0]) == 0xFE && sal_uInt8(pBuf[1]) == 0xFF)
259  {
260  eCharSet = RTL_TEXTENCODING_UCS2;
261  bLE = false;
262  nHead = 2;
263  }
264  else if (sal_uInt8(pBuf[1]) == 0xFE && sal_uInt8(pBuf[0]) == 0xFF)
265  {
266  eCharSet = RTL_TEXTENCODING_UCS2;
267  nHead = 2;
268  }
269  pBuf+=nHead;
270  rLen-=nHead;
271  }
272  /*See unicode type again without BOM*/
273  if (rLen >= 1 && eCharSet == RTL_TEXTENCODING_DONTKNOW)
274  {
275  UErrorCode uerr = U_ZERO_ERROR;
276  UCharsetDetector* ucd = ucsdet_open(&uerr);
277  ucsdet_setText(ucd, pBuf, rLen, &uerr);
278  if (const UCharsetMatch* match = ucsdet_detect(ucd, &uerr))
279  {
280  const char* pEncodingName = ucsdet_getName(match, &uerr);
281 
282  if (U_SUCCESS(uerr) && !strcmp("UTF-8", pEncodingName))
283  {
284  eCharSet = RTL_TEXTENCODING_UTF8; // UTF-8
285  }
286  else if (U_SUCCESS(uerr) && !strcmp("UTF-16BE", pEncodingName))
287  {
288  eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16BE
289  bLE = false;
290  }
291  else if (U_SUCCESS(uerr) && !strcmp("UTF-16LE", pEncodingName))
292  {
293  eCharSet = RTL_TEXTENCODING_UCS2; // UTF-16LE
294  }
295  }
296 
297  ucsdet_close(ucd);
298  }
299 
300  bool bCR = false, bLF = false, bIsBareUnicode = false;
301 
302  if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
303  {
304  std::unique_ptr<sal_Unicode[]> aWork(new sal_Unicode[rLen+1]);
305  sal_Unicode *pNewBuf = aWork.get();
306  std::size_t nNewLen;
307  if (eCharSet != RTL_TEXTENCODING_UCS2)
308  {
309  nNewLen = rLen;
310  rtl_TextToUnicodeConverter hConverter =
311  rtl_createTextToUnicodeConverter(eCharSet);
312  rtl_TextToUnicodeContext hContext =
313  rtl_createTextToUnicodeContext(hConverter);
314 
315  sal_Size nCntBytes;
316  sal_uInt32 nInfo;
317  nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf,
318  rLen, pNewBuf, nNewLen,
319  (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
320  RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
321  RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, &nCntBytes);
322 
323  rtl_destroyTextToUnicodeContext(hConverter, hContext);
324  rtl_destroyTextToUnicodeConverter(hConverter);
325  }
326  else
327  {
328  nNewLen = rLen/2;
329  memcpy(pNewBuf, pBuf, rLen);
330 #ifdef OSL_LITENDIAN
331  bool const bNativeLE = true;
332 #else
333  bool const bNativeLE = false;
334 #endif
335  if (bLE != bNativeLE)
336  {
337  bSwap = true;
338  char* pF = reinterpret_cast<char*>(pNewBuf);
339  char* pN = pF+1;
340  for(sal_uLong n = 0; n < nNewLen; ++n, pF+=2, pN+=2 )
341  {
342  char c = *pF;
343  *pF = *pN;
344  *pN = c;
345  }
346  }
347  }
348 
349  for (sal_uLong nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf)
350  {
351  switch (*pNewBuf)
352  {
353  case 0xA:
354  bLF = true;
355  break;
356  case 0xD:
357  bCR = true;
358  break;
359  default:
360  break;
361  }
362  }
363  }
364  else
365  {
366  for( sal_uLong nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf )
367  {
368  switch (*pBuf)
369  {
370  case 0x0:
371  if( nCnt + 1 < rLen && !*(pBuf+1) )
372  return false;
373  bIsBareUnicode = true;
374  break;
375  case 0xA:
376  bLF = true;
377  break;
378  case 0xD:
379  bCR = true;
380  break;
381  case 0xC:
382  case 0x1A:
383  case 0x9:
384  break;
385  default:
386  break;
387  }
388  }
389  }
390 
391  LineEnd eSysLE = GetSystemLineEnd();
392  LineEnd eLineEnd;
393  if (!bCR && !bLF)
394  eLineEnd = eSysLE;
395  else
396  eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF;
397 
398  if (pCharSet)
399  *pCharSet = eCharSet;
400  if (pSwap)
401  *pSwap = bSwap;
402  if (pLineEnd)
403  *pLineEnd = eLineEnd;
404 
405  return !bIsBareUnicode;
406 }
407 
408 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool is() const
ErrCode GetError() const
#define FILTER_RTF
RTF filter.
Definition: iodetect.hxx:29
sal_uIntPtr sal_uLong
SotClipboardFormatId GetFormat()
sal_Int64 n
const OUString & GetUserData() const
SotClipboardFormatId GetFormat() const
#define sWW6
Definition: iodetect.hxx:41
bool IsContained(const OUString &rEleName) const
#define FILTER_BAS
StarBasic (identical to ANSI)
Definition: iodetect.hxx:32
LINEEND_CR
static bool IsStorageFile(OUString const &rFileName)
#define sWW5
Definition: iodetect.hxx:40
bool match(const sal_Unicode *pWild, const sal_Unicode *pStr, const sal_Unicode cEscape)
sal_uInt16 sal_Unicode
static bool IsValidStgFilter(SotStorage &, const SfxFilter &)
Definition: iodetect.cxx:113
#define sSWRITER
Definition: iodetect.hxx:43
bool IsWriter() const
LineEnd GetSystemLineEnd()
#define FILTER_XMLVW
XML filter.
Definition: iodetect.hxx:37
SotClipboardFormatId
static SotClipboardFormatId GetFormatID(css::uno::Reference< css::embed::XStorage > const &xStorage)
#define sSWRITERWEB
Definition: iodetect.hxx:44
static SW_DLLPUBLIC std::shared_ptr< const SfxFilter > GetFilterOfFormat(const OUString &rFormat, const SfxFilterContainer *pCnt=nullptr)
find for an internal format name the corresponding filter entry
Definition: iodetect.cxx:69
LINEEND_LF
SotStorageStream * OpenSotStream(const OUString &rEleName, StreamMode=StreamMode::STD_READWRITE)
void SetSmartProtocol(INetProtocol eTheSmartScheme)
bool IsAllowedAsTemplate() const
static bool IsDetectableText(const char *pBuf, sal_uLong &rLen, rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd)
Definition: iodetect.cxx:242
#define FILTER_DOCX
Definition: iodetect.hxx:38
#define sHTML
Definition: iodetect.hxx:39
SwIoDetect aFilterDetect[]
Definition: iodetect.cxx:41
static bool IsDocShellRegistered()
Definition: iodetect.cxx:36
#define FILTER_TEXT_DLG
text filter with encoding dialog
Definition: iodetect.hxx:34
#define FILTER_WW8
WinWord 97 filter.
Definition: iodetect.hxx:33
LineEnd
OUString GetMainURL(DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
css::uno::Reference< css::embed::XStorage > GetStorage(bool bCreateTempFile=true)
#define ERRCODE_NONE
unsigned char sal_uInt8
#define sRtfWH
Definition: iodetect.hxx:30
#define FILTER_XML
XML filter.
Definition: iodetect.hxx:35
LINEEND_CRLF
static std::shared_ptr< const SfxFilter > GetFileFilter(const OUString &rFileName)
Detect for the given file which filter should be used.
Definition: iodetect.cxx:150
static OUString GetSubStorageName(const SfxFilter &rFltr)
Definition: iodetect.cxx:56
#define FILTER_XMLV
XML filter.
Definition: iodetect.hxx:36
#define FILTER_TEXT
text filter with default codeset
Definition: iodetect.hxx:31
OUString const & GetName() const
bool SetSmartURL(OUString const &rTheAbsURIRef, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8, FSysStyle eStyle=FSysStyle::Detect)