LibreOffice Module vcl (master)  1
GraphicFormatDetector.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <algorithm>
23 
26 #include <tools/solar.h>
27 #include <tools/zcodec.hxx>
28 
29 namespace vcl
30 {
31 namespace
32 {
33 bool isPCT(SvStream& rStream, sal_uLong nStreamPos, sal_uLong nStreamLen)
34 {
35  sal_uInt8 sBuf[3];
36  // store number format
37  SvStreamEndian oldNumberFormat = rStream.GetEndian();
38  sal_uInt32 nOffset; // in MS documents the pict format is used without the first 512 bytes
39  for (nOffset = 0; (nOffset <= 512) && ((nStreamPos + nOffset + 14) <= nStreamLen);
40  nOffset += 512)
41  {
42  short y1, x1, y2, x2;
43  bool bdBoxOk = true;
44 
45  rStream.Seek(nStreamPos + nOffset);
46  // size of the pict in version 1 pict ( 2bytes) : ignored
47  rStream.SeekRel(2);
48  // bounding box (bytes 2 -> 9)
49  rStream.SetEndian(SvStreamEndian::BIG);
50  rStream.ReadInt16(y1).ReadInt16(x1).ReadInt16(y2).ReadInt16(x2);
51  rStream.SetEndian(oldNumberFormat); // reset format
52 
53  if (x1 > x2 || y1 > y2 || // bad bdbox
54  (x1 == x2 && y1 == y2) || // 1 pixel picture
55  x2 - x1 > 2048 || y2 - y1 > 2048) // picture abnormally big
56  bdBoxOk = false;
57 
58  // read version op
59  rStream.ReadBytes(sBuf, 3);
60  // see http://developer.apple.com/legacy/mac/library/documentation/mac/pdf/Imaging_With_QuickDraw/Appendix_A.pdf
61  // normal version 2 - page A23 and A24
62  if (sBuf[0] == 0x00 && sBuf[1] == 0x11 && sBuf[2] == 0x02)
63  return true;
64  // normal version 1 - page A25
65  else if (sBuf[0] == 0x11 && sBuf[1] == 0x01 && bdBoxOk)
66  return true;
67  }
68  return false;
69 }
70 
71 } // end anonymous namespace
72 
73 GraphicFormatDetector::GraphicFormatDetector(SvStream& rStream, OUString const& rFormatExtension)
74  : mrStream(rStream)
75  , maExtension(rFormatExtension)
76  , mnFirstLong(0)
77  , mnSecondLong(0)
78  , mnStreamPosition(0)
79  , mnStreamLength(0)
80 {
81 }
82 
84 {
85  maFirstBytes.clear();
86  maFirstBytes.resize(256, 0);
87 
88  mnFirstLong = 0;
89  mnSecondLong = 0;
90 
93 
94  if (!mnStreamLength)
95  {
96  SvLockBytes* pLockBytes = mrStream.GetLockBytes();
97  if (pLockBytes)
98  pLockBytes->SetSynchronMode();
100  }
101 
102  if (mnStreamLength == 0)
103  {
104  return false; // this prevents at least a STL assertion
105  }
106  else if (mnStreamLength >= maFirstBytes.size())
107  {
108  // load first 256 bytes into a buffer
109  sal_uInt64 nRead = mrStream.ReadBytes(maFirstBytes.data(), maFirstBytes.size());
110  if (nRead < maFirstBytes.size())
111  mnStreamLength = nRead;
112  }
113  else
114  {
116  }
117 
118  if (mrStream.GetError())
119  return false;
120 
121  for (int i = 0; i < 4; ++i)
122  {
123  mnFirstLong = (mnFirstLong << 8) | sal_uInt32(maFirstBytes[i]);
124  mnSecondLong = (mnSecondLong << 8) | sal_uInt32(maFirstBytes[i + 4]);
125  }
126  return true;
127 }
128 
130 {
131  if (maFirstBytes[2] != 0xd3)
132  return false;
133  mrStream.SetEndian(SvStreamEndian::BIG);
135  sal_uInt16 nFieldSize;
137 
138  mrStream.ReadUInt16(nFieldSize).ReadUChar(nMagic);
139  for (int i = 0; i < 3; i++)
140  {
141  if (nFieldSize < 6)
142  return false;
143  if (mnStreamLength < mrStream.Tell() + nFieldSize)
144  return false;
145  mrStream.SeekRel(nFieldSize - 3);
146  mrStream.ReadUInt16(nFieldSize).ReadUChar(nMagic);
147  if (nMagic != 0xd3)
148  return false;
149  }
150  mrStream.SetEndian(SvStreamEndian::LITTLE);
151 
152  if (mrStream.GetError())
153  return false;
154 
155  msDetectedFormat = "MET";
156  return true;
157 }
158 
160 {
161  sal_uInt8 nOffset;
162 
163  // We're possibly also able to read an OS/2 bitmap array
164  // ('BA'), therefore we must adjust the offset to discover the
165  // first bitmap in the array
166  if (maFirstBytes[0] == 0x42 && maFirstBytes[1] == 0x41)
167  nOffset = 14;
168  else
169  nOffset = 0;
170 
171  // Now we initially test on 'BM'
172  if (maFirstBytes[0 + nOffset] == 0x42 && maFirstBytes[1 + nOffset] == 0x4d)
173  {
174  // OS/2 can set the Reserved flags to a value other than 0
175  // (which they really should not do...);
176  // In this case we test the size of the BmpInfoHeaders
177  if ((maFirstBytes[6 + nOffset] == 0x00 && maFirstBytes[7 + nOffset] == 0x00
178  && maFirstBytes[8 + nOffset] == 0x00 && maFirstBytes[9 + nOffset] == 0x00)
179  || maFirstBytes[14 + nOffset] == 0x28 || maFirstBytes[14 + nOffset] == 0x0c)
180  {
181  msDetectedFormat = "BMP";
182  return true;
183  }
184  }
185  return false;
186 }
187 
189 {
190  if (mnFirstLong == 0xd7cdc69a || mnFirstLong == 0x01000900)
191  {
192  msDetectedFormat = "WMF";
193  return true;
194  }
195  else if (mnFirstLong == 0x01000000 && maFirstBytes[40] == 0x20 && maFirstBytes[41] == 0x45
196  && maFirstBytes[42] == 0x4d && maFirstBytes[43] == 0x46)
197  {
198  msDetectedFormat = "EMF";
199  return true;
200  }
201  return false;
202 }
203 
205 {
206  if (maFirstBytes[0] != 0x0a)
207  return false;
208 
210  sal_uInt8 nEncoding = maFirstBytes[2];
211  if ((nVersion == 0 || nVersion == 2 || nVersion == 3 || nVersion == 5) && nEncoding <= 1)
212  {
213  msDetectedFormat = "PCX";
214  return true;
215  }
216 
217  return false;
218 }
219 
221 {
222  if (mnFirstLong == 0x49492a00 || mnFirstLong == 0x4d4d002a)
223  {
224  msDetectedFormat = "TIF";
225  return true;
226  }
227  return false;
228 }
229 
231 {
232  if (mnFirstLong == 0x47494638 && (maFirstBytes[4] == 0x37 || maFirstBytes[4] == 0x39)
233  && maFirstBytes[5] == 0x61)
234  {
235  msDetectedFormat = "GIF";
236  return true;
237  }
238  return false;
239 }
240 
242 {
243  if (mnFirstLong == 0x89504e47 && mnSecondLong == 0x0d0a1a0a)
244  {
245  msDetectedFormat = "PNG";
246  return true;
247  }
248  return false;
249 }
250 
252 {
253  if ((mnFirstLong == 0xffd8ffe0 && maFirstBytes[6] == 0x4a && maFirstBytes[7] == 0x46
254  && maFirstBytes[8] == 0x49 && maFirstBytes[9] == 0x46)
255  || (mnFirstLong == 0xffd8fffe) || (0xffd8ff00 == (mnFirstLong & 0xffffff00)))
256  {
257  msDetectedFormat = "JPG";
258  return true;
259  }
260  return false;
261 }
262 
264 {
265  if (mnFirstLong == 0x53564744 && maFirstBytes[4] == 0x49)
266  {
267  msDetectedFormat = "SVM";
268  return true;
269  }
270  else if (maFirstBytes[0] == 0x56 && maFirstBytes[1] == 0x43 && maFirstBytes[2] == 0x4C
271  && maFirstBytes[3] == 0x4D && maFirstBytes[4] == 0x54 && maFirstBytes[5] == 0x46)
272  {
273  msDetectedFormat = "SVM";
274  return true;
275  }
276  return false;
277 }
278 
280 {
281  if (mnStreamLength < 2055)
282  return false;
283  char sBuffer[8];
285  mrStream.ReadBytes(sBuffer, 7);
286 
287  if (strncmp(sBuffer, "PCD_IPI", 7) == 0)
288  {
289  msDetectedFormat = "PCD";
290  return true;
291  }
292  return false;
293 }
294 
296 {
297  if ((mnFirstLong == 0x38425053) && ((mnSecondLong >> 16) == 1))
298  {
299  msDetectedFormat = "PSD";
300  return true;
301  }
302  return false;
303 }
304 
306 {
307  const char* pFirstBytesAsCharArray = reinterpret_cast<char*>(maFirstBytes.data());
308 
309  if (mnFirstLong == 0xC5D0D3C6)
310  {
311  msDetectedFormat = "EPS";
312  return true;
313  }
314  else if (checkArrayForMatchingStrings(pFirstBytesAsCharArray, 30, { "%!PS-Adobe", " EPS" }))
315  {
316  msDetectedFormat = "EPS";
317  return true;
318  }
319 
320  return false;
321 }
322 
324 {
325  if (strncmp(reinterpret_cast<char*>(maFirstBytes.data()), "AutoCAD Binary DXF", 18) == 0)
326  {
327  msDetectedFormat = "DXF";
328  return true;
329  }
330 
331  // ASCII DXF File Format
332  int i = 0;
333  while (i < 256 && maFirstBytes[i] <= 32)
334  {
335  ++i;
336  }
337 
338  if (i < 256 && maFirstBytes[i] == '0')
339  {
340  ++i;
341 
342  // only now do we have sufficient data to make a judgement
343  // based on a '0' + 'SECTION' == DXF argument
344 
345  while (i < 256 && maFirstBytes[i] <= 32)
346  {
347  ++i;
348  }
349 
350  if (i + 7 < 256
351  && (strncmp(reinterpret_cast<char*>(maFirstBytes.data() + i), "SECTION", 7) == 0))
352  {
353  msDetectedFormat = "DXF";
354  return true;
355  }
356  }
357  return false;
358 }
359 
361 {
363  {
364  msDetectedFormat = "PCT";
365  return true;
366  }
367  return false;
368 }
369 
371 {
372  if (maFirstBytes[0] == 'P')
373  {
374  switch (maFirstBytes[1])
375  {
376  case '1':
377  case '4':
378  msDetectedFormat = "PBM";
379  return true;
380 
381  case '2':
382  case '5':
383  msDetectedFormat = "PGM";
384  return true;
385 
386  case '3':
387  case '6':
388  msDetectedFormat = "PPM";
389  return true;
390  }
391  }
392  return false;
393 }
394 
396 {
397  if (mnFirstLong == 0x59a66a95)
398  {
399  msDetectedFormat = "RAS";
400  return true;
401  }
402  return false;
403 }
404 
406 {
407  const char* pFirstBytesAsCharArray = reinterpret_cast<char*>(maFirstBytes.data());
408  if (matchArrayWithString(pFirstBytesAsCharArray, 256, "/* XPM */"))
409  {
410  msDetectedFormat = "XPM";
411  return true;
412  }
413  return false;
414 }
415 
417 {
418  sal_uInt64 nSize = std::min<sal_uInt64>(mnStreamLength, 2048);
419  std::unique_ptr<sal_uInt8[]> pBuffer(new sal_uInt8[nSize]);
420 
422  mrStream.ReadBytes(pBuffer.get(), nSize);
423 
424  const char* pBufferAsCharArray = reinterpret_cast<char*>(pBuffer.get());
425 
426  if (checkArrayForMatchingStrings(pBufferAsCharArray, nSize, { "#define", "_width" }))
427  {
428  msDetectedFormat = "XBM";
429  return true;
430  }
431  return false;
432 }
433 
435 {
436  sal_uInt8* pCheckArray = maFirstBytes.data();
437  sal_uInt64 nCheckSize = std::min<sal_uInt64>(mnStreamLength, 256);
438 
439  sal_uInt8 sExtendedOrDecompressedFirstBytes[2048];
440  sal_uInt64 nDecompressedSize = nCheckSize;
441 
442  bool bIsGZip(false);
443 
444  // check if it is gzipped -> svgz
445  if (maFirstBytes[0] == 0x1F && maFirstBytes[1] == 0x8B)
446  {
447  ZCodec aCodec;
449  aCodec.BeginCompression(ZCODEC_DEFAULT_COMPRESSION, /*gzLib*/ true);
450  nDecompressedSize = aCodec.Read(mrStream, sExtendedOrDecompressedFirstBytes, 2048);
451  nCheckSize = std::min<sal_uInt64>(nDecompressedSize, 256);
452  aCodec.EndCompression();
453  pCheckArray = sExtendedOrDecompressedFirstBytes;
454 
455  bIsGZip = true;
456  }
457 
458  bool bIsSvg(false);
459 
460  const char* pCheckArrayAsCharArray = reinterpret_cast<char*>(pCheckArray);
461 
462  // check for XML
463  // #119176# SVG files which have no xml header at all have shown up this is optional
464  // check for "xml" then "version" then "DOCTYPE" and "svg" tags
465  if (checkArrayForMatchingStrings(pCheckArrayAsCharArray, nCheckSize,
466  { "<?xml", "version", "DOCTYPE", "svg" }))
467  {
468  bIsSvg = true;
469  }
470 
471  // check for svg element in 1st 256 bytes
472  // search for '<svg'
473  if (!bIsSvg && checkArrayForMatchingStrings(pCheckArrayAsCharArray, nCheckSize, { "<svg" }))
474  {
475  bIsSvg = true;
476  }
477 
478  // extended search for svg element
479  if (!bIsSvg)
480  {
481  // it's a xml, look for '<svg' in full file. Should not happen too
482  // often since the tests above will handle most cases, but can happen
483  // with Svg files containing big comment headers or Svg as the host
484  // language
485 
486  pCheckArrayAsCharArray = reinterpret_cast<char*>(sExtendedOrDecompressedFirstBytes);
487 
488  if (bIsGZip)
489  {
490  nCheckSize = std::min<sal_uInt64>(nDecompressedSize, 2048);
491  }
492  else
493  {
494  nCheckSize = std::min<sal_uInt64>(mnStreamLength, 2048);
496  nCheckSize = mrStream.ReadBytes(sExtendedOrDecompressedFirstBytes, nCheckSize);
497  }
498 
499  // search for '<svg'
500  if (checkArrayForMatchingStrings(pCheckArrayAsCharArray, nCheckSize, { "<svg" }))
501  {
502  bIsSvg = true;
503  }
504  }
505 
506  if (bIsSvg)
507  {
508  msDetectedFormat = "SVG";
509  return true;
510  }
511  return false;
512 }
513 
515 {
516  if (maExtension.startsWith("TGA"))
517  {
518  msDetectedFormat = "TGA";
519  return true;
520  }
521  return false;
522 }
523 
525 {
526  if ((maFirstBytes[4] == 'f' && maFirstBytes[5] == 't' && maFirstBytes[6] == 'y'
527  && maFirstBytes[7] == 'p' && maFirstBytes[8] == 'q' && maFirstBytes[9] == 't')
528  || (maFirstBytes[4] == 'm' && maFirstBytes[5] == 'o' && maFirstBytes[6] == 'o'
529  && maFirstBytes[7] == 'v' && maFirstBytes[11] == 'l' && maFirstBytes[12] == 'm'))
530  {
531  msDetectedFormat = "MOV";
532  return true;
533  }
534  return false;
535 }
536 
538 {
539  if (maFirstBytes[0] == '%' && maFirstBytes[1] == 'P' && maFirstBytes[2] == 'D'
540  && maFirstBytes[3] == 'F' && maFirstBytes[4] == '-')
541  {
542  msDetectedFormat = "PDF";
543  return true;
544  }
545  return false;
546 }
547 
548 } // vcl namespace
549 
550 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
SvStream & ReadInt16(sal_Int16 &rInt16)
SvStream & ReadUInt16(sal_uInt16 &rUInt16)
void SetSynchronMode(bool bTheSync=true)
sal_uIntPtr sal_uLong
#define ZCODEC_DEFAULT_COMPRESSION
bool isPCT(SvStream &rStream, sal_uLong nStreamPos, sal_uLong nStreamLen)
const char * matchArrayWithString(const char *pSource, sal_Int32 nSourceSize, OString const &rString)
sal_uInt64 Seek(sal_uInt64 nPos)
sal_uInt64 SeekRel(sal_Int64 nPos)
ErrCode GetError() const
long EndCompression()
SvLockBytes * GetLockBytes() const
const sal_uInt16 nMagic
GraphicFormatDetector(SvStream &rStream, OUString const &rFormatExtension)
sal_uInt64 remainingSize()
int i
bool checkArrayForMatchingStrings(const char *pSource, sal_Int32 nSourceSize, std::vector< OString > const &rStrings)
void BeginCompression(int nCompressLevel=ZCODEC_DEFAULT_COMPRESSION, bool gzLib=false)
SvStream & ReadUChar(unsigned char &rChar)
sal_Int16 nVersion
std::vector< sal_uInt8 > maFirstBytes
std::size_t ReadBytes(void *pData, std::size_t nSize)
SvStreamEndian GetEndian() const
long Read(SvStream &rIStm, sal_uInt8 *pData, sal_uInt32 nSize)
unsigned char sal_uInt8
void SetEndian(SvStreamEndian SvStreamEndian)
sal_uInt64 Tell() const
SvStreamEndian