LibreOffice Module sw (master)  1
iodetect.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <iodetect.hxx>
21 #include <memory>
22 #include <osl/endian.h>
23 #include <sot/storage.hxx>
24 #include <tools/urlobj.hxx>
26 #include <sfx2/docfilt.hxx>
27 #include <sfx2/fcontnr.hxx>
28 #include <sfx2/docfile.hxx>
29 #include <com/sun/star/ucb/ContentCreationException.hpp>
30 #include <com/sun/star/embed/XStorage.hpp>
31 
32 using namespace ::com::sun::star;
33 
34 static bool IsDocShellRegistered()
35 {
36  return SvtModuleOptions().IsWriter();
37 }
38 
40 {
43  SwIoDetect( sWW6 ),
45  SwIoDetect( sRtfWH ),
46  SwIoDetect( sHTML ),
47  SwIoDetect( sWW5 ),
52 };
53 
54 OUString SwIoSystem::GetSubStorageName( const SfxFilter& rFltr )
55 {
56  // for StorageFilters also set the SubStorageName
57  const OUString& rUserData = rFltr.GetUserData();
58  if (rUserData == FILTER_XML ||
59  rUserData == FILTER_XMLV ||
60  rUserData == FILTER_XMLVW)
61  return "content.xml";
62  if (rUserData == sWW6 || rUserData == FILTER_WW8)
63  return "WordDocument";
64  return OUString();
65 }
66 
67 std::shared_ptr<const SfxFilter> SwIoSystem::GetFilterOfFormat(const OUString& rFormatNm,
68  const SfxFilterContainer* pCnt)
69 {
70  SfxFilterContainer aCntSw( sSWRITER );
71  SfxFilterContainer aCntSwWeb( sSWRITERWEB );
72  const SfxFilterContainer* pFltCnt = pCnt ? pCnt : ( IsDocShellRegistered() ? &aCntSw : &aCntSwWeb );
73 
74  do {
75  if( pFltCnt )
76  {
77  SfxFilterMatcher aMatcher( pFltCnt->GetName() );
78  SfxFilterMatcherIter aIter( aMatcher );
79  std::shared_ptr<const SfxFilter> pFilter = aIter.First();
80  while ( pFilter )
81  {
82  if( pFilter->GetUserData() == rFormatNm )
83  return pFilter;
84  pFilter = aIter.Next();
85  }
86  }
87  if( pCnt || pFltCnt == &aCntSwWeb )
88  break;
89  pFltCnt = &aCntSwWeb;
90  } while( true );
91  return nullptr;
92 }
93 
94 bool SwIoSystem::IsValidStgFilter( const css::uno::Reference < css::embed::XStorage >& rStg, const SfxFilter& rFilter)
95 {
96  bool bRet = false;
97  try
98  {
99  SotClipboardFormatId nStgFormatId = SotStorage::GetFormatID( rStg );
100  bRet = rStg->isStreamElement( "content.xml" );
101  if ( bRet )
102  bRet = ( nStgFormatId != SotClipboardFormatId::NONE && ( rFilter.GetFormat() == nStgFormatId ) );
103  }
104  catch (const css::uno::Exception& )
105  {
106  }
107 
108  return bRet;
109 }
110 
112 {
113  SotClipboardFormatId nStgFormatId = rStg.GetFormat();
114  /*#i8409# We cannot trust the clipboard id anymore :-(*/
115  if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
116  nStgFormatId = SotClipboardFormatId::NONE;
117 
118  bool bRet = ERRCODE_NONE == rStg.GetError() &&
119  ( nStgFormatId == SotClipboardFormatId::NONE || rFilter.GetFormat() == nStgFormatId ) &&
120  ( rStg.IsContained( SwIoSystem::GetSubStorageName( rFilter )) );
121  if( bRet )
122  {
123  /* Bug 53445 - there are Excel Docs w/o ClipBoardId! */
124  /* Bug 62703 - and also WinWord Docs w/o ClipBoardId! */
125  if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
126  {
127  bRet = (rStg.IsContained("0Table")
128  || rStg.IsContained("1Table"))
129  == (rFilter.GetUserData() == FILTER_WW8);
130  if (bRet && !rFilter.IsAllowedAsTemplate())
131  {
133  rStg.OpenSotStream("WordDocument",
134  StreamMode::STD_READ );
135  xRef->Seek(10);
136  sal_uInt8 nByte;
137  xRef->ReadUChar( nByte );
138  bRet = !(nByte & 1);
139  }
140  }
141  }
142  return bRet;
143 }
144 
145 // Check the type of the stream (file) by searching for corresponding set of bytes.
146 // If no known type is found, return ASCII for now!
147 // Returns the internal FilterName.
148 std::shared_ptr<const SfxFilter> SwIoSystem::GetFileFilter(const OUString& rFileName)
149 {
150  SfxFilterContainer aCntSw( sSWRITER );
151  SfxFilterContainer aCntSwWeb( sSWRITERWEB );
152  const SfxFilterContainer* pFCntnr = IsDocShellRegistered() ? &aCntSw : &aCntSwWeb;
153 
154  SfxFilterMatcher aMatcher( pFCntnr->GetName() );
155  SfxFilterMatcherIter aIter( aMatcher );
156  std::shared_ptr<const SfxFilter> pFilter = aIter.First();
157  if ( !pFilter )
158  return nullptr;
159 
160  if (SotStorage::IsStorageFile(rFileName))
161  {
162  // package storage or OLEStorage based format
164  INetURLObject aObj;
165  aObj.SetSmartProtocol( INetProtocol::File );
166  aObj.SetSmartURL( rFileName );
167  SfxMedium aMedium(aObj.GetMainURL(INetURLObject::DecodeMechanism::NONE), StreamMode::STD_READ);
168 
169  // templates should not get precedence over "normal" filters (#i35508, #i33168)
170  std::shared_ptr<const SfxFilter> pTemplateFilter;
171  if (aMedium.IsStorage())
172  {
173  uno::Reference<embed::XStorage> const xStor = aMedium.GetStorage();
174  if ( xStor.is() )
175  {
176  while ( pFilter )
177  {
178  if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(xStor, *pFilter ))
179  {
180  if (pFilter->IsOwnTemplateFormat())
181  {
182  // found template filter; maybe there's a "normal" one also
183  pTemplateFilter = pFilter;
184  }
185  else
186  return pFilter;
187  }
188 
189  pFilter = aIter.Next();
190  }
191 
192  // there's only a template filter that could be found
193  if ( pTemplateFilter )
194  pFilter = pTemplateFilter;
195  }
196  }
197  else
198  {
199  try
200  {
201  SvStream *const pStream = aMedium.GetInStream();
202  if ( pStream && SotStorage::IsStorageFile(pStream) )
203  xStg = new SotStorage( pStream, false );
204  }
205  catch (const css::ucb::ContentCreationException &)
206  {
207  }
208 
209  if( xStg.is() && ( xStg->GetError() == ERRCODE_NONE ) )
210  {
211  while ( pFilter )
212  {
213  if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(*xStg, *pFilter))
214  {
215  if (pFilter->IsOwnTemplateFormat())
216  {
217  // found template filter; maybe there's a "normal" one also
218  pTemplateFilter = pFilter;
219  }
220  else
221  return pFilter;
222  }
223 
224  pFilter = aIter.Next();
225  }
226 
227  // there's only a template filter that could be found
228  if ( pTemplateFilter )
229  pFilter = pTemplateFilter;
230 
231  }
232  }
233 
234  return pFilter;
235  }
236 
238 }
239 
240 bool SwIoSystem::IsDetectableText(const char* pBuf, sal_uLong &rLen,
241  rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd)
242 {
243  bool bSwap = false;
244  rtl_TextEncoding eCharSet = RTL_TEXTENCODING_DONTKNOW;
245  bool bLE = true;
246  /*See if it's a known unicode type*/
247  if (rLen >= 2)
248  {
249  sal_uLong nHead=0;
250  if (rLen > 2 && sal_uInt8(pBuf[0]) == 0xEF && sal_uInt8(pBuf[1]) == 0xBB &&
251  sal_uInt8(pBuf[2]) == 0xBF)
252  {
253  eCharSet = RTL_TEXTENCODING_UTF8;
254  nHead = 3;
255  }
256  else if (sal_uInt8(pBuf[0]) == 0xFE && sal_uInt8(pBuf[1]) == 0xFF)
257  {
258  eCharSet = RTL_TEXTENCODING_UCS2;
259  bLE = false;
260  nHead = 2;
261  }
262  else if (sal_uInt8(pBuf[1]) == 0xFE && sal_uInt8(pBuf[0]) == 0xFF)
263  {
264  eCharSet = RTL_TEXTENCODING_UCS2;
265  nHead = 2;
266  }
267  pBuf+=nHead;
268  rLen-=nHead;
269  }
270 
271  bool bCR = false, bLF = false, bIsBareUnicode = false;
272 
273  if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
274  {
275  std::unique_ptr<sal_Unicode[]> aWork(new sal_Unicode[rLen+1]);
276  sal_Unicode *pNewBuf = aWork.get();
277  std::size_t nNewLen;
278  if (eCharSet != RTL_TEXTENCODING_UCS2)
279  {
280  nNewLen = rLen;
281  rtl_TextToUnicodeConverter hConverter =
282  rtl_createTextToUnicodeConverter(eCharSet);
283  rtl_TextToUnicodeContext hContext =
284  rtl_createTextToUnicodeContext(hConverter);
285 
286  sal_Size nCntBytes;
287  sal_uInt32 nInfo;
288  nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf,
289  rLen, pNewBuf, nNewLen,
290  (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
291  RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
292  RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, &nCntBytes);
293 
294  rtl_destroyTextToUnicodeContext(hConverter, hContext);
295  rtl_destroyTextToUnicodeConverter(hConverter);
296  }
297  else
298  {
299  nNewLen = rLen/2;
300  memcpy(pNewBuf, pBuf, rLen);
301 #ifdef OSL_LITENDIAN
302  bool const bNativeLE = true;
303 #else
304  bool const bNativeLE = false;
305 #endif
306  if (bLE != bNativeLE)
307  {
308  bSwap = true;
309  char* pF = reinterpret_cast<char*>(pNewBuf);
310  char* pN = pF+1;
311  for(sal_uLong n = 0; n < nNewLen; ++n, pF+=2, pN+=2 )
312  {
313  char c = *pF;
314  *pF = *pN;
315  *pN = c;
316  }
317  }
318  }
319 
320  for (sal_uLong nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf)
321  {
322  switch (*pNewBuf)
323  {
324  case 0xA:
325  bLF = true;
326  break;
327  case 0xD:
328  bCR = true;
329  break;
330  default:
331  break;
332  }
333  }
334  }
335  else
336  {
337  for( sal_uLong nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf )
338  {
339  switch (*pBuf)
340  {
341  case 0x0:
342  if( nCnt + 1 < rLen && !*(pBuf+1) )
343  return false;
344  bIsBareUnicode = true;
345  break;
346  case 0xA:
347  bLF = true;
348  break;
349  case 0xD:
350  bCR = true;
351  break;
352  case 0xC:
353  case 0x1A:
354  case 0x9:
355  break;
356  default:
357  break;
358  }
359  }
360  }
361 
362  LineEnd eSysLE = GetSystemLineEnd();
363  LineEnd eLineEnd;
364  if (!bCR && !bLF)
365  eLineEnd = eSysLE;
366  else
367  eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF;
368 
369  if (pCharSet)
370  *pCharSet = eCharSet;
371  if (pSwap)
372  *pSwap = bSwap;
373  if (pLineEnd)
374  *pLineEnd = eLineEnd;
375 
376  return !bIsBareUnicode;
377 }
378 
379 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool is() const
ErrCode GetError() const
#define FILTER_RTF
RTF filter.
Definition: iodetect.hxx:29
sal_uIntPtr sal_uLong
SotClipboardFormatId GetFormat()
sal_Int64 n
const OUString & GetUserData() const
SotClipboardFormatId GetFormat() const
#define sWW6
Definition: iodetect.hxx:41
bool IsContained(const OUString &rEleName) const
#define FILTER_BAS
StarBasic (identical to ANSI)
Definition: iodetect.hxx:32
LINEEND_CR
static bool IsStorageFile(OUString const &rFileName)
#define sWW5
Definition: iodetect.hxx:40
sal_uInt16 sal_Unicode
static bool IsValidStgFilter(SotStorage &, const SfxFilter &)
Definition: iodetect.cxx:111
#define sSWRITER
Definition: iodetect.hxx:43
bool IsWriter() const
LineEnd GetSystemLineEnd()
#define FILTER_XMLVW
XML filter.
Definition: iodetect.hxx:37
SotClipboardFormatId
static SotClipboardFormatId GetFormatID(css::uno::Reference< css::embed::XStorage > const &xStorage)
#define sSWRITERWEB
Definition: iodetect.hxx:44
static SW_DLLPUBLIC std::shared_ptr< const SfxFilter > GetFilterOfFormat(const OUString &rFormat, const SfxFilterContainer *pCnt=nullptr)
find for an internal format name the corresponding filter entry
Definition: iodetect.cxx:67
LINEEND_LF
SotStorageStream * OpenSotStream(const OUString &rEleName, StreamMode=StreamMode::STD_READWRITE)
void SetSmartProtocol(INetProtocol eTheSmartScheme)
bool IsAllowedAsTemplate() const
static bool IsDetectableText(const char *pBuf, sal_uLong &rLen, rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd)
Definition: iodetect.cxx:240
#define FILTER_DOCX
Definition: iodetect.hxx:38
#define sHTML
Definition: iodetect.hxx:39
SwIoDetect aFilterDetect[]
Definition: iodetect.cxx:39
static bool IsDocShellRegistered()
Definition: iodetect.cxx:34
#define FILTER_TEXT_DLG
text filter with encoding dialog
Definition: iodetect.hxx:34
#define FILTER_WW8
WinWord 97 filter.
Definition: iodetect.hxx:33
LineEnd
OUString GetMainURL(DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
css::uno::Reference< css::embed::XStorage > GetStorage(bool bCreateTempFile=true)
#define ERRCODE_NONE
unsigned char sal_uInt8
#define sRtfWH
Definition: iodetect.hxx:30
#define FILTER_XML
XML filter.
Definition: iodetect.hxx:35
LINEEND_CRLF
static std::shared_ptr< const SfxFilter > GetFileFilter(const OUString &rFileName)
Detect for the given file which filter should be used.
Definition: iodetect.cxx:148
static OUString GetSubStorageName(const SfxFilter &rFltr)
Definition: iodetect.cxx:54
#define FILTER_XMLV
XML filter.
Definition: iodetect.hxx:36
#define FILTER_TEXT
text filter with default codeset
Definition: iodetect.hxx:31
OUString const & GetName() const
bool SetSmartURL(OUString const &rTheAbsURIRef, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8, FSysStyle eStyle=FSysStyle::Detect)