LibreOffice Module vcl (master)  1
GraphicFormatDetector.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 
25 #include <tools/solar.h>
26 #include <tools/zcodec.hxx>
27 
28 namespace vcl
29 {
30 namespace
31 {
32 bool isPCT(SvStream& rStream, sal_uLong nStreamPos, sal_uLong nStreamLen)
33 {
34  sal_uInt8 sBuf[3];
35  // store number format
36  SvStreamEndian oldNumberFormat = rStream.GetEndian();
37  sal_uInt32 nOffset; // in MS documents the pict format is used without the first 512 bytes
38  for (nOffset = 0; (nOffset <= 512) && ((nStreamPos + nOffset + 14) <= nStreamLen);
39  nOffset += 512)
40  {
41  short y1, x1, y2, x2;
42  bool bdBoxOk = true;
43 
44  rStream.Seek(nStreamPos + nOffset);
45  // size of the pict in version 1 pict ( 2bytes) : ignored
46  rStream.SeekRel(2);
47  // bounding box (bytes 2 -> 9)
48  rStream.SetEndian(SvStreamEndian::BIG);
49  rStream.ReadInt16(y1).ReadInt16(x1).ReadInt16(y2).ReadInt16(x2);
50  rStream.SetEndian(oldNumberFormat); // reset format
51 
52  if (x1 > x2 || y1 > y2 || // bad bdbox
53  (x1 == x2 && y1 == y2) || // 1 pixel picture
54  x2 - x1 > 2048 || y2 - y1 > 2048) // picture abnormally big
55  bdBoxOk = false;
56 
57  // read version op
58  rStream.ReadBytes(sBuf, 3);
59  // see http://developer.apple.com/legacy/mac/library/documentation/mac/pdf/Imaging_With_QuickDraw/Appendix_A.pdf
60  // normal version 2 - page A23 and A24
61  if (sBuf[0] == 0x00 && sBuf[1] == 0x11 && sBuf[2] == 0x02)
62  return true;
63  // normal version 1 - page A25
64  else if (sBuf[0] == 0x11 && sBuf[1] == 0x01 && bdBoxOk)
65  return true;
66  }
67  return false;
68 }
69 
70 sal_uInt8* ImplSearchEntry(sal_uInt8* pSource, sal_uInt8 const* pDest, sal_uLong nComp,
71  sal_uLong nSize)
72 {
73  while (nComp-- >= nSize)
74  {
75  sal_uLong i;
76  for (i = 0; i < nSize; i++)
77  {
78  if ((pSource[i] & ~0x20) != (pDest[i] & ~0x20))
79  break;
80  }
81  if (i == nSize)
82  return pSource;
83  pSource++;
84  }
85  return nullptr;
86 }
87 
88 } // end anonymous namespace
89 
90 GraphicFormatDetector::GraphicFormatDetector(SvStream& rStream, OUString const& rFormatExtension)
91  : mrStream(rStream)
92  , maExtension(rFormatExtension)
93  , mnFirstLong(0)
94  , mnSecondLong(0)
95  , mnStreamPosition(0)
96  , mnStreamLength(0)
97 {
98 }
99 
101 {
102  maFirstBytes.clear();
103  maFirstBytes.resize(256, 0);
104 
105  mnFirstLong = 0;
106  mnSecondLong = 0;
107 
110 
111  if (!mnStreamLength)
112  {
113  SvLockBytes* pLockBytes = mrStream.GetLockBytes();
114  if (pLockBytes)
115  pLockBytes->SetSynchronMode();
117  }
118 
119  if (mnStreamLength == 0)
120  {
121  return false; // this prevents at least a STL assertion
122  }
123  else if (mnStreamLength >= maFirstBytes.size())
124  {
125  // load first 256 bytes into a buffer
126  sal_uInt64 nRead = mrStream.ReadBytes(maFirstBytes.data(), maFirstBytes.size());
127  if (nRead < maFirstBytes.size())
128  mnStreamLength = nRead;
129  }
130  else
131  {
133  }
134 
135  if (mrStream.GetError())
136  return false;
137 
138  for (int i = 0; i < 4; ++i)
139  {
140  mnFirstLong = (mnFirstLong << 8) | sal_uInt32(maFirstBytes[i]);
141  mnSecondLong = (mnSecondLong << 8) | sal_uInt32(maFirstBytes[i + 4]);
142  }
143  return true;
144 }
145 
147 {
148  if (maFirstBytes[2] != 0xd3)
149  return false;
150  mrStream.SetEndian(SvStreamEndian::BIG);
152  sal_uInt16 nFieldSize;
154 
155  mrStream.ReadUInt16(nFieldSize).ReadUChar(nMagic);
156  for (int i = 0; i < 3; i++)
157  {
158  if (nFieldSize < 6)
159  return false;
160  if (mnStreamLength < mrStream.Tell() + nFieldSize)
161  return false;
162  mrStream.SeekRel(nFieldSize - 3);
163  mrStream.ReadUInt16(nFieldSize).ReadUChar(nMagic);
164  if (nMagic != 0xd3)
165  return false;
166  }
167  mrStream.SetEndian(SvStreamEndian::LITTLE);
168 
169  if (mrStream.GetError())
170  return false;
171 
172  msDetectedFormat = "MET";
173  return true;
174 }
175 
177 {
178  sal_uInt8 nOffset;
179 
180  // We're possibly also able to read an OS/2 bitmap array
181  // ('BA'), therefore we must adjust the offset to discover the
182  // first bitmap in the array
183  if (maFirstBytes[0] == 0x42 && maFirstBytes[1] == 0x41)
184  nOffset = 14;
185  else
186  nOffset = 0;
187 
188  // Now we initially test on 'BM'
189  if (maFirstBytes[0 + nOffset] == 0x42 && maFirstBytes[1 + nOffset] == 0x4d)
190  {
191  // OS/2 can set the Reserved flags to a value other than 0
192  // (which they really should not do...);
193  // In this case we test the size of the BmpInfoHeaders
194  if ((maFirstBytes[6 + nOffset] == 0x00 && maFirstBytes[7 + nOffset] == 0x00
195  && maFirstBytes[8 + nOffset] == 0x00 && maFirstBytes[9 + nOffset] == 0x00)
196  || maFirstBytes[14 + nOffset] == 0x28 || maFirstBytes[14 + nOffset] == 0x0c)
197  {
198  msDetectedFormat = "BMP";
199  return true;
200  }
201  }
202  return false;
203 }
204 
206 {
207  if (mnFirstLong == 0xd7cdc69a || mnFirstLong == 0x01000900)
208  {
209  msDetectedFormat = "WMF";
210  return true;
211  }
212  else if (mnFirstLong == 0x01000000 && maFirstBytes[40] == 0x20 && maFirstBytes[41] == 0x45
213  && maFirstBytes[42] == 0x4d && maFirstBytes[43] == 0x46)
214  {
215  msDetectedFormat = "EMF";
216  return true;
217  }
218  return false;
219 }
220 
222 {
223  if (maFirstBytes[0] != 0x0a)
224  return false;
225 
226  sal_uInt8 nVersion = maFirstBytes[1];
227  sal_uInt8 nEncoding = maFirstBytes[2];
228  if ((nVersion == 0 || nVersion == 2 || nVersion == 3 || nVersion == 5) && nEncoding <= 1)
229  {
230  msDetectedFormat = "PCX";
231  return true;
232  }
233 
234  return false;
235 }
236 
238 {
239  if (mnFirstLong == 0x49492a00 || mnFirstLong == 0x4d4d002a)
240  {
241  msDetectedFormat = "TIF";
242  return true;
243  }
244  return false;
245 }
246 
248 {
249  if (mnFirstLong == 0x47494638 && (maFirstBytes[4] == 0x37 || maFirstBytes[4] == 0x39)
250  && maFirstBytes[5] == 0x61)
251  {
252  msDetectedFormat = "GIF";
253  return true;
254  }
255  return false;
256 }
257 
259 {
260  if (mnFirstLong == 0x89504e47 && mnSecondLong == 0x0d0a1a0a)
261  {
262  msDetectedFormat = "PNG";
263  return true;
264  }
265  return false;
266 }
267 
269 {
270  if ((mnFirstLong == 0xffd8ffe0 && maFirstBytes[6] == 0x4a && maFirstBytes[7] == 0x46
271  && maFirstBytes[8] == 0x49 && maFirstBytes[9] == 0x46)
272  || (mnFirstLong == 0xffd8fffe) || (0xffd8ff00 == (mnFirstLong & 0xffffff00)))
273  {
274  msDetectedFormat = "JPG";
275  return true;
276  }
277  return false;
278 }
279 
281 {
282  if (mnFirstLong == 0x53564744 && maFirstBytes[4] == 0x49)
283  {
284  msDetectedFormat = "SVM";
285  return true;
286  }
287  else if (maFirstBytes[0] == 0x56 && maFirstBytes[1] == 0x43 && maFirstBytes[2] == 0x4C
288  && maFirstBytes[3] == 0x4D && maFirstBytes[4] == 0x54 && maFirstBytes[5] == 0x46)
289  {
290  msDetectedFormat = "SVM";
291  return true;
292  }
293  return false;
294 }
295 
297 {
298  if (mnStreamLength < 2055)
299  return false;
300  char sBuffer[8];
302  mrStream.ReadBytes(sBuffer, 7);
303 
304  if (strncmp(sBuffer, "PCD_IPI", 7) == 0)
305  {
306  msDetectedFormat = "PCD";
307  return true;
308  }
309  return false;
310 }
311 
313 {
314  if ((mnFirstLong == 0x38425053) && ((mnSecondLong >> 16) == 1))
315  {
316  msDetectedFormat = "PSD";
317  return true;
318  }
319  return false;
320 }
321 
323 {
324  if ((mnFirstLong == 0xC5D0D3C6)
325  || (ImplSearchEntry(maFirstBytes.data(), reinterpret_cast<sal_uInt8 const*>("%!PS-Adobe"),
326  10, 10)
327  && ImplSearchEntry(&maFirstBytes[15], reinterpret_cast<sal_uInt8 const*>("EPS"), 3, 3)))
328  {
329  msDetectedFormat = "EPS";
330  return true;
331  }
332  return false;
333 }
334 
336 {
337  if (strncmp(reinterpret_cast<char*>(maFirstBytes.data()), "AutoCAD Binary DXF", 18) == 0)
338  {
339  msDetectedFormat = "DXF";
340  return true;
341  }
342 
343  // ASCII DXF File Format
344  int i = 0;
345  while (i < 256 && maFirstBytes[i] <= 32)
346  {
347  ++i;
348  }
349 
350  if (i < 256 && maFirstBytes[i] == '0')
351  {
352  ++i;
353 
354  // only now do we have sufficient data to make a judgement
355  // based on a '0' + 'SECTION' == DXF argument
356 
357  while (i < 256 && maFirstBytes[i] <= 32)
358  {
359  ++i;
360  }
361 
362  if (i + 7 < 256
363  && (strncmp(reinterpret_cast<char*>(maFirstBytes.data() + i), "SECTION", 7) == 0))
364  {
365  msDetectedFormat = "DXF";
366  return true;
367  }
368  }
369  return false;
370 }
371 
373 {
375  {
376  msDetectedFormat = "PCT";
377  return true;
378  }
379  return false;
380 }
381 
383 {
384  if (maFirstBytes[0] == 'P')
385  {
386  switch (maFirstBytes[1])
387  {
388  case '1':
389  case '4':
390  msDetectedFormat = "PBM";
391  return true;
392 
393  case '2':
394  case '5':
395  msDetectedFormat = "PGM";
396  return true;
397 
398  case '3':
399  case '6':
400  msDetectedFormat = "PPM";
401  return true;
402  }
403  }
404  return false;
405 }
406 
408 {
409  if (mnFirstLong == 0x59a66a95)
410  {
411  msDetectedFormat = "RAS";
412  return true;
413  }
414  return false;
415 }
416 
418 {
419  if (ImplSearchEntry(maFirstBytes.data(), reinterpret_cast<sal_uInt8 const*>("/* XPM */"), 256,
420  9))
421  {
422  msDetectedFormat = "XPM";
423  return true;
424  }
425  return false;
426 }
427 
429 {
430  sal_uInt64 nSize = std::min<sal_uInt64>(mnStreamLength, 2048);
431  std::unique_ptr<sal_uInt8[]> pBuffer(new sal_uInt8[nSize]);
432 
434  mrStream.ReadBytes(pBuffer.get(), nSize);
435  sal_uInt8* pPtr
436  = ImplSearchEntry(pBuffer.get(), reinterpret_cast<sal_uInt8 const*>("#define"), nSize, 7);
437 
438  if (pPtr)
439  {
440  if (ImplSearchEntry(pPtr, reinterpret_cast<sal_uInt8 const*>("_width"),
441  pBuffer.get() + nSize - pPtr, 6))
442  {
443  msDetectedFormat = "XBM";
444  return true;
445  }
446  }
447  return false;
448 }
449 
451 {
452  sal_uInt8* pCheckArray = maFirstBytes.data();
453  sal_uInt64 nCheckSize = std::min<sal_uInt64>(mnStreamLength, 256);
454 
455  sal_uInt8 sExtendedOrDecompressedFirstBytes[2048];
456  sal_uInt64 nDecompressedSize = nCheckSize;
457 
458  bool bIsGZip(false);
459 
460  // check if it is gzipped -> svgz
461  if (maFirstBytes[0] == 0x1F && maFirstBytes[1] == 0x8B)
462  {
463  ZCodec aCodec;
465  aCodec.BeginCompression(ZCODEC_DEFAULT_COMPRESSION, /*gzLib*/ true);
466  nDecompressedSize = aCodec.Read(mrStream, sExtendedOrDecompressedFirstBytes, 2048);
467  nCheckSize = std::min<sal_uInt64>(nDecompressedSize, 256);
468  aCodec.EndCompression();
469  pCheckArray = sExtendedOrDecompressedFirstBytes;
470 
471  bIsGZip = true;
472  }
473 
474  bool bIsSvg(false);
475 
476  // check for Xml
477  // #119176# SVG files which have no xml header at all have shown up this is optional
478  if (ImplSearchEntry(pCheckArray, reinterpret_cast<sal_uInt8 const*>("<?xml"), nCheckSize,
479  5) // is it xml
480  && ImplSearchEntry(pCheckArray, reinterpret_cast<sal_uInt8 const*>("version"), nCheckSize,
481  7)) // does it have a version (required for xml)
482  {
483  // check for DOCTYPE svg combination
484  if (ImplSearchEntry(pCheckArray, reinterpret_cast<sal_uInt8 const*>("DOCTYPE"), nCheckSize,
485  7) // 'DOCTYPE' is there
486  && ImplSearchEntry(pCheckArray, reinterpret_cast<sal_uInt8 const*>("svg"), nCheckSize,
487  3)) // 'svg' is there
488  {
489  bIsSvg = true;
490  }
491  }
492 
493  // check for svg element in 1st 256 bytes
494  if (!bIsSvg
495  && ImplSearchEntry(pCheckArray, reinterpret_cast<sal_uInt8 const*>("<svg"), nCheckSize,
496  4)) // '<svg'
497  {
498  bIsSvg = true;
499  }
500 
501  // extended search for svg element
502  if (!bIsSvg)
503  {
504  // it's a xml, look for '<svg' in full file. Should not happen too
505  // often since the tests above will handle most cases, but can happen
506  // with Svg files containing big comment headers or Svg as the host
507  // language
508 
509  pCheckArray = sExtendedOrDecompressedFirstBytes;
510 
511  if (bIsGZip)
512  {
513  nCheckSize = std::min<sal_uInt64>(nDecompressedSize, 2048);
514  }
515  else
516  {
517  nCheckSize = std::min<sal_uInt64>(mnStreamLength, 2048);
519  nCheckSize = mrStream.ReadBytes(sExtendedOrDecompressedFirstBytes, nCheckSize);
520  }
521 
522  if (ImplSearchEntry(pCheckArray, reinterpret_cast<sal_uInt8 const*>("<svg"), nCheckSize,
523  4)) // '<svg'
524  {
525  bIsSvg = true;
526  }
527  }
528 
529  if (bIsSvg)
530  {
531  msDetectedFormat = "SVG";
532  return true;
533  }
534  return false;
535 }
536 
538 {
539  if (maExtension.startsWith("TGA"))
540  {
541  msDetectedFormat = "TGA";
542  return true;
543  }
544  return false;
545 }
546 
548 {
549  if ((maFirstBytes[4] == 'f' && maFirstBytes[5] == 't' && maFirstBytes[6] == 'y'
550  && maFirstBytes[7] == 'p' && maFirstBytes[8] == 'q' && maFirstBytes[9] == 't')
551  || (maFirstBytes[4] == 'm' && maFirstBytes[5] == 'o' && maFirstBytes[6] == 'o'
552  && maFirstBytes[7] == 'v' && maFirstBytes[11] == 'l' && maFirstBytes[12] == 'm'))
553  {
554  msDetectedFormat = "MOV";
555  return true;
556  }
557  return false;
558 }
559 
561 {
562  if (maFirstBytes[0] == '%' && maFirstBytes[1] == 'P' && maFirstBytes[2] == 'D'
563  && maFirstBytes[3] == 'F' && maFirstBytes[4] == '-')
564  {
565  msDetectedFormat = "PDF";
566  return true;
567  }
568  return false;
569 }
570 
571 } // vcl namespace
572 
573 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
SvStream & ReadInt16(sal_Int16 &rInt16)
SvStream & ReadUInt16(sal_uInt16 &rUInt16)
void SetSynchronMode(bool bTheSync=true)
sal_uIntPtr sal_uLong
#define ZCODEC_DEFAULT_COMPRESSION
bool isPCT(SvStream &rStream, sal_uLong nStreamPos, sal_uLong nStreamLen)
sal_uInt64 Seek(sal_uInt64 nPos)
sal_uInt64 SeekRel(sal_Int64 nPos)
ErrCode GetError() const
long EndCompression()
SvLockBytes * GetLockBytes() const
sal_uInt8 * ImplSearchEntry(sal_uInt8 *pSource, sal_uInt8 const *pDest, sal_uLong nComp, sal_uLong nSize)
const sal_uInt16 nMagic
GraphicFormatDetector(SvStream &rStream, OUString const &rFormatExtension)
sal_uInt64 remainingSize()
int i
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
SvStream & ReadUChar(unsigned char &rChar)
std::vector< sal_uInt8 > maFirstBytes
std::size_t ReadBytes(void *pData, std::size_t nSize)
SvStreamEndian GetEndian() const
long Read(SvStream &rIStm, sal_uInt8 *pData, sal_uInt32 nSize)
unsigned char sal_uInt8
void SetEndian(SvStreamEndian SvStreamEndian)
sal_uInt64 Tell() const
SvStreamEndian