LibreOffice Module sw (master)  1
iodetect.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <iodetect.hxx>
21 #include <memory>
22 #include <osl/endian.h>
23 #include <sot/storage.hxx>
24 #include <svtools/parhtml.hxx>
25 #include <tools/urlobj.hxx>
27 #include <sfx2/docfilt.hxx>
28 #include <sfx2/fcontnr.hxx>
29 #include <sfx2/docfile.hxx>
30 #include <com/sun/star/ucb/ContentCreationException.hpp>
31 #include <com/sun/star/embed/XStorage.hpp>
32 
33 using namespace ::com::sun::star;
34 
35 static bool IsDocShellRegistered()
36 {
37  return SvtModuleOptions().IsWriter();
38 }
39 
41 {
44  SwIoDetect( sWW6 ),
46  SwIoDetect( sRtfWH ),
47  SwIoDetect( sHTML ),
48  SwIoDetect( sWW5 ),
53 };
54 
55 const OUString SwIoSystem::GetSubStorageName( const SfxFilter& rFltr )
56 {
57  // for StorageFilters also set the SubStorageName
58  const OUString& rUserData = rFltr.GetUserData();
59  if (rUserData == FILTER_XML ||
60  rUserData == FILTER_XMLV ||
61  rUserData == FILTER_XMLVW)
62  return OUString("content.xml");
63  if (rUserData == sWW6 || rUserData == FILTER_WW8)
64  return OUString("WordDocument");
65  return OUString();
66 }
67 
68 std::shared_ptr<const SfxFilter> SwIoSystem::GetFilterOfFormat(const OUString& rFormatNm,
69  const SfxFilterContainer* pCnt)
70 {
71  SfxFilterContainer aCntSw( sSWRITER );
72  SfxFilterContainer aCntSwWeb( sSWRITERWEB );
73  const SfxFilterContainer* pFltCnt = pCnt ? pCnt : ( IsDocShellRegistered() ? &aCntSw : &aCntSwWeb );
74 
75  do {
76  if( pFltCnt )
77  {
78  SfxFilterMatcher aMatcher( pFltCnt->GetName() );
79  SfxFilterMatcherIter aIter( aMatcher );
80  std::shared_ptr<const SfxFilter> pFilter = aIter.First();
81  while ( pFilter )
82  {
83  if( pFilter->GetUserData() == rFormatNm )
84  return pFilter;
85  pFilter = aIter.Next();
86  }
87  }
88  if( pCnt || pFltCnt == &aCntSwWeb )
89  break;
90  pFltCnt = &aCntSwWeb;
91  } while( true );
92  return nullptr;
93 }
94 
95 bool SwIoSystem::IsValidStgFilter( const css::uno::Reference < css::embed::XStorage >& rStg, const SfxFilter& rFilter)
96 {
97  bool bRet = false;
98  try
99  {
100  SotClipboardFormatId nStgFormatId = SotStorage::GetFormatID( rStg );
101  bRet = rStg->isStreamElement( "content.xml" );
102  if ( bRet )
103  bRet = ( nStgFormatId != SotClipboardFormatId::NONE && ( rFilter.GetFormat() == nStgFormatId ) );
104  }
105  catch (const css::uno::Exception& )
106  {
107  }
108 
109  return bRet;
110 }
111 
113 {
114  SotClipboardFormatId nStgFormatId = rStg.GetFormat();
115  /*#i8409# We cannot trust the clipboard id anymore :-(*/
116  if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
117  nStgFormatId = SotClipboardFormatId::NONE;
118 
119  bool bRet = ERRCODE_NONE == rStg.GetError() &&
120  ( nStgFormatId == SotClipboardFormatId::NONE || rFilter.GetFormat() == nStgFormatId ) &&
121  ( rStg.IsContained( SwIoSystem::GetSubStorageName( rFilter )) );
122  if( bRet )
123  {
124  /* Bug 53445 - there are Excel Docs w/o ClipBoardId! */
125  /* Bug 62703 - and also WinWord Docs w/o ClipBoardId! */
126  if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
127  {
128  bRet = (rStg.IsContained("0Table")
129  || rStg.IsContained("1Table"))
130  == (rFilter.GetUserData() == FILTER_WW8);
131  if (bRet && !rFilter.IsAllowedAsTemplate())
132  {
134  rStg.OpenSotStream("WordDocument",
135  StreamMode::STD_READ );
136  xRef->Seek(10);
137  sal_uInt8 nByte;
138  xRef->ReadUChar( nByte );
139  bRet = !(nByte & 1);
140  }
141  }
142  }
143  return bRet;
144 }
145 
146 // Check the type of the stream (file) by searching for corresponding set of bytes.
147 // If no known type is found, return ASCII for now!
148 // Returns the internal FilterName.
149 std::shared_ptr<const SfxFilter> SwIoSystem::GetFileFilter(const OUString& rFileName)
150 {
151  SfxFilterContainer aCntSw( sSWRITER );
152  SfxFilterContainer aCntSwWeb( sSWRITERWEB );
153  const SfxFilterContainer* pFCntnr = IsDocShellRegistered() ? &aCntSw : &aCntSwWeb;
154 
155  SfxFilterMatcher aMatcher( pFCntnr->GetName() );
156  SfxFilterMatcherIter aIter( aMatcher );
157  std::shared_ptr<const SfxFilter> pFilter = aIter.First();
158  if ( !pFilter )
159  return nullptr;
160 
161  if (SotStorage::IsStorageFile(rFileName))
162  {
163  // package storage or OLEStorage based format
165  INetURLObject aObj;
166  aObj.SetSmartProtocol( INetProtocol::File );
167  aObj.SetSmartURL( rFileName );
168  SfxMedium aMedium(aObj.GetMainURL(INetURLObject::DecodeMechanism::NONE), StreamMode::STD_READ);
169 
170  // templates should not get precedence over "normal" filters (#i35508, #i33168)
171  std::shared_ptr<const SfxFilter> pTemplateFilter;
172  if (aMedium.IsStorage())
173  {
174  uno::Reference<embed::XStorage> const xStor = aMedium.GetStorage();
175  if ( xStor.is() )
176  {
177  while ( pFilter )
178  {
179  if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(xStor, *pFilter ))
180  {
181  if (pFilter->IsOwnTemplateFormat())
182  {
183  // found template filter; maybe there's a "normal" one also
184  pTemplateFilter = pFilter;
185  }
186  else
187  return pFilter;
188  }
189 
190  pFilter = aIter.Next();
191  }
192 
193  // there's only a template filter that could be found
194  if ( pTemplateFilter )
195  pFilter = pTemplateFilter;
196  }
197  }
198  else
199  {
200  try
201  {
202  SvStream *const pStream = aMedium.GetInStream();
203  if ( pStream && SotStorage::IsStorageFile(pStream) )
204  xStg = new SotStorage( pStream, false );
205  }
206  catch (const css::ucb::ContentCreationException &)
207  {
208  }
209 
210  if( xStg.is() && ( xStg->GetError() == ERRCODE_NONE ) )
211  {
212  while ( pFilter )
213  {
214  if (pFilter->GetUserData().startsWith("C") && IsValidStgFilter(*xStg, *pFilter))
215  {
216  if (pFilter->IsOwnTemplateFormat())
217  {
218  // found template filter; maybe there's a "normal" one also
219  pTemplateFilter = pFilter;
220  }
221  else
222  return pFilter;
223  }
224 
225  pFilter = aIter.Next();
226  }
227 
228  // there's only a template filter that could be found
229  if ( pTemplateFilter )
230  pFilter = pTemplateFilter;
231 
232  }
233  }
234 
235  return pFilter;
236  }
237 
239 }
240 
242  rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd)
243 {
244  bool bSwap = false;
245  rtl_TextEncoding eCharSet = RTL_TEXTENCODING_DONTKNOW;
246  bool bLE = true;
247  /*See if it's a known unicode type*/
248  if (rLen >= 2)
249  {
250  sal_uLong nHead=0;
251  if (rLen > 2 && sal_uInt8(pBuf[0]) == 0xEF && sal_uInt8(pBuf[1]) == 0xBB &&
252  sal_uInt8(pBuf[2]) == 0xBF)
253  {
254  eCharSet = RTL_TEXTENCODING_UTF8;
255  nHead = 3;
256  }
257  else if (sal_uInt8(pBuf[0]) == 0xFE && sal_uInt8(pBuf[1]) == 0xFF)
258  {
259  eCharSet = RTL_TEXTENCODING_UCS2;
260  bLE = false;
261  nHead = 2;
262  }
263  else if (sal_uInt8(pBuf[1]) == 0xFE && sal_uInt8(pBuf[0]) == 0xFF)
264  {
265  eCharSet = RTL_TEXTENCODING_UCS2;
266  nHead = 2;
267  }
268  pBuf+=nHead;
269  rLen-=nHead;
270  }
271 
272  bool bCR = false, bLF = false, bIsBareUnicode = false;
273 
274  if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
275  {
276  std::unique_ptr<sal_Unicode[]> aWork(new sal_Unicode[rLen+1]);
277  sal_Unicode *pNewBuf = aWork.get();
278  std::size_t nNewLen;
279  if (eCharSet != RTL_TEXTENCODING_UCS2)
280  {
281  nNewLen = rLen;
282  rtl_TextToUnicodeConverter hConverter =
283  rtl_createTextToUnicodeConverter(eCharSet);
284  rtl_TextToUnicodeContext hContext =
285  rtl_createTextToUnicodeContext(hConverter);
286 
287  sal_Size nCntBytes;
288  sal_uInt32 nInfo;
289  nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf,
290  rLen, pNewBuf, nNewLen,
291  (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
292  RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
293  RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, &nCntBytes);
294 
295  rtl_destroyTextToUnicodeContext(hConverter, hContext);
296  rtl_destroyTextToUnicodeConverter(hConverter);
297  }
298  else
299  {
300  nNewLen = rLen/2;
301  memcpy(pNewBuf, pBuf, rLen);
302 #ifdef OSL_LITENDIAN
303  bool const bNativeLE = true;
304 #else
305  bool const bNativeLE = false;
306 #endif
307  if (bLE != bNativeLE)
308  {
309  bSwap = true;
310  sal_Char* pF = reinterpret_cast<char*>(pNewBuf);
311  sal_Char* pN = pF+1;
312  for(sal_uLong n = 0; n < nNewLen; ++n, pF+=2, pN+=2 )
313  {
314  sal_Char c = *pF;
315  *pF = *pN;
316  *pN = c;
317  }
318  }
319  }
320 
321  for (sal_uLong nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf)
322  {
323  switch (*pNewBuf)
324  {
325  case 0xA:
326  bLF = true;
327  break;
328  case 0xD:
329  bCR = true;
330  break;
331  default:
332  break;
333  }
334  }
335  }
336  else
337  {
338  for( sal_uLong nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf )
339  {
340  switch (*pBuf)
341  {
342  case 0x0:
343  if( nCnt + 1 < rLen && !*(pBuf+1) )
344  return false;
345  bIsBareUnicode = true;
346  break;
347  case 0xA:
348  bLF = true;
349  break;
350  case 0xD:
351  bCR = true;
352  break;
353  case 0xC:
354  case 0x1A:
355  case 0x9:
356  break;
357  default:
358  break;
359  }
360  }
361  }
362 
363  LineEnd eSysLE = GetSystemLineEnd();
364  LineEnd eLineEnd;
365  if (!bCR && !bLF)
366  eLineEnd = eSysLE;
367  else
368  eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF;
369 
370  if (pCharSet)
371  *pCharSet = eCharSet;
372  if (pSwap)
373  *pSwap = bSwap;
374  if (pLineEnd)
375  *pLineEnd = eLineEnd;
376 
377  return !bIsBareUnicode;
378 }
379 
380 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool is() const
ErrCode GetError() const
#define FILTER_RTF
RTF filter.
Definition: iodetect.hxx:29
sal_uIntPtr sal_uLong
SotClipboardFormatId GetFormat()
const OUString & GetUserData() const
SotClipboardFormatId GetFormat() const
#define sWW6
Definition: iodetect.hxx:41
bool IsContained(const OUString &rEleName) const
#define FILTER_BAS
StarBasic (identical to ANSI)
Definition: iodetect.hxx:32
LINEEND_CR
static bool IsStorageFile(OUString const &rFileName)
#define sWW5
Definition: iodetect.hxx:40
sal_uInt16 sal_Unicode
static bool IsValidStgFilter(SotStorage &, const SfxFilter &)
Definition: iodetect.cxx:112
#define sSWRITER
Definition: iodetect.hxx:43
bool IsWriter() const
static bool IsDetectableText(const sal_Char *pBuf, sal_uLong &rLen, rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd)
Definition: iodetect.cxx:241
char sal_Char
LineEnd GetSystemLineEnd()
#define FILTER_XMLVW
XML filter.
Definition: iodetect.hxx:37
css::uno::Reference< css::embed::XStorage > GetStorage(bool bCreateTempIfNo=true)
SotClipboardFormatId
static SotClipboardFormatId GetFormatID(css::uno::Reference< css::embed::XStorage > const &xStorage)
#define sSWRITERWEB
Definition: iodetect.hxx:44
static SW_DLLPUBLIC std::shared_ptr< const SfxFilter > GetFilterOfFormat(const OUString &rFormat, const SfxFilterContainer *pCnt=nullptr)
find for an internal format name the corresponding filter entry
Definition: iodetect.cxx:68
LINEEND_LF
SotStorageStream * OpenSotStream(const OUString &rEleName, StreamMode=StreamMode::STD_READWRITE)
void SetSmartProtocol(INetProtocol eTheSmartScheme)
bool IsAllowedAsTemplate() const
#define FILTER_DOCX
Definition: iodetect.hxx:38
#define sHTML
Definition: iodetect.hxx:39
static const OUString GetSubStorageName(const SfxFilter &rFltr)
Definition: iodetect.cxx:55
SwIoDetect aFilterDetect[]
Definition: iodetect.cxx:40
static bool IsDocShellRegistered()
Definition: iodetect.cxx:35
#define FILTER_TEXT_DLG
text filter with encoding dialog
Definition: iodetect.hxx:34
#define FILTER_WW8
WinWord 97 filter.
Definition: iodetect.hxx:33
LineEnd
OUString GetMainURL(DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
#define ERRCODE_NONE
unsigned char sal_uInt8
#define sRtfWH
Definition: iodetect.hxx:30
#define FILTER_XML
XML filter.
Definition: iodetect.hxx:35
LINEEND_CRLF
static std::shared_ptr< const SfxFilter > GetFileFilter(const OUString &rFileName)
Detect for the given file which filter should be used.
Definition: iodetect.cxx:149
#define FILTER_XMLV
XML filter.
Definition: iodetect.hxx:36
#define FILTER_TEXT
text filter with default codeset
Definition: iodetect.hxx:31
OUString const & GetName() const
bool SetSmartURL(OUString const &rTheAbsURIRef, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8, FSysStyle eStyle=FSysStyle::Detect)