LibreOffice Module filter (master) 1
rtfutil.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
11#include <rtl/strbuf.hxx>
12#include <sal/log.hxx>
13#include <osl/diagnose.h>
14#include <svtools/rtfkeywd.hxx>
15#include <rtl/character.hxx>
16#include <tools/stream.hxx>
17#include <sot/storage.hxx>
18
19namespace
20{
26void WrapOle1InOle2(SvStream& rOle1, sal_uInt32 nOle1Size, SvStream& rOle2,
27 const OString& rClassName)
28{
29 tools::SvRef<SotStorage> pStorage = new SotStorage(rOle2);
30 OString aAnsiUserType;
32 if (rClassName == "PBrush")
33 {
34 aAnsiUserType = "Bitmap Image";
35 aName = SvGlobalName(0x0003000A, 0, 0, 0xc0, 0, 0, 0, 0, 0, 0, 0x46);
36 }
37 else
38 {
39 if (!rClassName.isEmpty() && rClassName != "Package")
40 {
41 SAL_WARN("filter.ms", "WrapOle1InOle2: unexpected class name: '" << rClassName << "'");
42 }
43 aAnsiUserType = "OLE Package";
44 aName = SvGlobalName(0x0003000C, 0, 0, 0xc0, 0, 0, 0, 0, 0, 0, 0x46);
45 }
46 pStorage->SetClass(aName, SotClipboardFormatId::NONE, "");
47
48 // [MS-OLEDS] 2.3.7 CompObjHeader
49 tools::SvRef<SotStorageStream> pCompObj = pStorage->OpenSotStream("\1CompObj");
50 // Reserved1
51 pCompObj->WriteUInt32(0xfffe0001);
52 // Version
53 pCompObj->WriteUInt32(0x00000a03);
54 // Reserved2
55 pCompObj->WriteUInt32(0xffffffff);
56 pCompObj->WriteUInt32(0x0003000c);
57 pCompObj->WriteUInt32(0x00000000);
58 pCompObj->WriteUInt32(0x000000c0);
59 pCompObj->WriteUInt32(0x46000000);
60 // Rest of CompObjStream
61 // AnsiUserType
62 pCompObj->WriteUInt32(aAnsiUserType.getLength() + 1);
63 pCompObj->WriteOString(aAnsiUserType);
64 pCompObj->WriteChar(0);
65 // AnsiClipboardFormat
66 pCompObj->WriteUInt32(0x00000000);
67 // Reserved1
68 pCompObj->WriteUInt32(rClassName.getLength() + 1);
69 pCompObj->WriteOString(rClassName);
70 pCompObj->WriteChar(0);
71 // UnicodeMarker
72 pCompObj->WriteUInt32(0x71B239F4);
73 // UnicodeUserType
74 pCompObj->WriteUInt32(0x00000000);
75 // UnicodeClipboardFormat
76 pCompObj->WriteUInt32(0x00000000);
77 // Reserved2
78 pCompObj->WriteUInt32(0x00000000);
79 pCompObj->Commit();
80 pCompObj.clear();
81
82 // [MS-OLEDS] 2.3.6 OLENativeStream
83 tools::SvRef<SotStorageStream> pOleNative = pStorage->OpenSotStream("\1Ole10Native");
84 // NativeDataSize
85 pOleNative->WriteUInt32(nOle1Size);
86 pOleNative->WriteStream(rOle1, nOle1Size);
87 pOleNative->Commit();
88 pOleNative.clear();
89
90 pStorage->Commit();
91 pStorage.clear();
92 rOle2.Seek(0);
93}
94}
95
97{
98OString OutHex(sal_uLong nHex, sal_uInt8 nLen)
99{
100 char aNToABuf[] = "0000000000000000";
101
102 OSL_ENSURE(nLen < sizeof(aNToABuf), "nLen is too big");
103 if (nLen >= sizeof(aNToABuf))
104 nLen = (sizeof(aNToABuf) - 1);
105
106 // Set pointer to the buffer end
107 char* pStr = aNToABuf + (sizeof(aNToABuf) - 1);
108 for (sal_uInt8 n = 0; n < nLen; ++n)
109 {
110 *(--pStr) = static_cast<char>(nHex & 0xf) + 48;
111 if (*pStr > '9')
112 *pStr += 39;
113 nHex >>= 4;
114 }
115 return pStr;
116}
117
118// Ideally, this function should work on (sal_uInt32) Unicode scalar values
119// instead of (sal_Unicode) UTF-16 code units. However, at least "Rich Text
120// Format (RTF) Specification Version 1.9.1" available at
121// <https://www.microsoft.com/en-us/download/details.aspx?id=10725> does not
122// look like it allows non-BMP Unicode characters >= 0x10000 in the \uN notation
123// (it only talks about "Unicode character", but then explains how values of N
124// greater than 32767 will be expressed as negative signed 16-bit numbers, so
125// that smells like \uN is limited to BMP).
126// However the "Mathematics" section has an example that shows the code point
127// U+1D44E being encoded as UTF-16 surrogate pair "\u-10187?\u-9138?", so
128// sal_Unicode actually works fine here.
129OString OutChar(sal_Unicode c, int* pUCMode, rtl_TextEncoding eDestEnc, bool* pSuccess,
130 bool bUnicode)
131{
132 if (pSuccess)
133 *pSuccess = true;
134 OStringBuffer aBuf;
135 const char* pStr = nullptr;
136 // 0x0b instead of \n, etc because of the replacements in SwWW8AttrIter::GetSnippet()
137 switch (c)
138 {
139 case 0x0b:
140 // hard line break
142 break;
143 case '\t':
145 break;
146 case '\\':
147 case '}':
148 case '{':
149 aBuf.append('\\');
150 aBuf.append(static_cast<char>(c));
151 break;
152 case 0xa0:
153 // non-breaking space
154 pStr = "\\~";
155 break;
156 case 0x1e:
157 // non-breaking hyphen
158 pStr = "\\_";
159 break;
160 case 0x1f:
161 // optional hyphen
162 pStr = "\\-";
163 break;
164 default:
165 if (c >= ' ' && c <= '~')
166 aBuf.append(static_cast<char>(c));
167 else
168 {
169 OUString sBuf(&c, 1);
170 OString sConverted;
171 if (pSuccess)
172 *pSuccess &= sBuf.convertToString(&sConverted, eDestEnc,
173 RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
174 | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR);
175 else
176 sBuf.convertToString(&sConverted, eDestEnc, OUSTRING_TO_OSTRING_CVTFLAGS);
177 const sal_Int32 nLen = sConverted.getLength();
178
179 if (pUCMode && bUnicode)
180 {
181 if (*pUCMode != nLen)
182 {
183 aBuf.append("\\uc" + OString::number(nLen));
184 // #i47831# add an additional whitespace, so that "document whitespaces" are not ignored.
185 aBuf.append(' ');
186 *pUCMode = nLen;
187 }
188 aBuf.append("\\u" + OString::number(static_cast<sal_Int32>(c)));
189 }
190
191 for (sal_Int32 nI = 0; nI < nLen; ++nI)
192 {
193 aBuf.append("\\'" + OutHex(sConverted[nI], 2));
194 }
195 }
196 }
197 if (pStr)
198 {
199 aBuf.append(pStr);
200 switch (c)
201 {
202 case 0xa0:
203 case 0x1e:
204 case 0x1f:
205 break;
206 default:
207 aBuf.append(' ');
208 }
209 }
210 return aBuf.makeStringAndClear();
211}
212
213OString OutString(std::u16string_view rStr, rtl_TextEncoding eDestEnc, bool bUnicode)
214{
215 OStringBuffer aBuf;
216 int nUCMode = 1;
217 for (size_t n = 0; n < rStr.size(); ++n)
218 aBuf.append(OutChar(rStr[n], &nUCMode, eDestEnc, nullptr, bUnicode));
219 if (nUCMode != 1)
220 {
221 aBuf.append(
222 OOO_STRING_SVTOOLS_RTF_UC + OString::number(sal_Int32(1))
223 + " "); // #i47831# add an additional whitespace, so that "document whitespaces" are not ignored.;
224 }
225 return aBuf.makeStringAndClear();
226}
227
229static bool TryOutString(std::u16string_view rStr, rtl_TextEncoding eDestEnc)
230{
231 int nUCMode = 1;
232 for (size_t n = 0; n < rStr.size(); ++n)
233 {
234 bool bRet;
235 OutChar(rStr[n], &nUCMode, eDestEnc, &bRet);
236 if (!bRet)
237 return false;
238 }
239 return true;
240}
241
242OString OutStringUpr(std::string_view pToken, std::u16string_view rStr, rtl_TextEncoding eDestEnc)
243{
244 if (TryOutString(rStr, eDestEnc))
245 return OString::Concat("{") + pToken + " " + OutString(rStr, eDestEnc) + "}";
246
247 return OString::Concat("{" OOO_STRING_SVTOOLS_RTF_UPR "{") + pToken + " "
248 + OutString(rStr, eDestEnc, /*bUnicode =*/false)
250 + OutString(rStr, eDestEnc) + "}}}";
251}
252
253int AsHex(char ch)
254{
255 int ret = 0;
256 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
257 ret = ch - '0';
258 else
259 {
260 if (ch >= 'a' && ch <= 'f')
261 ret = ch - 'a';
262 else if (ch >= 'A' && ch <= 'F')
263 ret = ch - 'A';
264 else
265 return -1;
266 ret += 10;
267 }
268 return ret;
269}
270
271OString WriteHex(const sal_uInt8* pData, sal_uInt32 nSize, SvStream* pStream, sal_uInt32 nLimit)
272{
273 OStringBuffer aRet;
274
275 sal_uInt32 nBreak = 0;
276 for (sal_uInt32 i = 0; i < nSize; i++)
277 {
278 OString sNo = OString::number(pData[i], 16);
279 if (sNo.getLength() < 2)
280 {
281 if (pStream)
282 pStream->WriteChar('0');
283 else
284 aRet.append('0');
285 }
286 if (pStream)
287 pStream->WriteOString(sNo);
288 else
289 aRet.append(sNo);
290 if (++nBreak == nLimit)
291 {
292 if (pStream)
294 else
295 aRet.append(SAL_NEWLINE_STRING);
296 nBreak = 0;
297 }
298 }
299
300 return aRet.makeStringAndClear();
301}
302
303bool ExtractOLE2FromObjdata(const OString& rObjdata, SvStream& rOle2)
304{
305 SvMemoryStream aStream;
306 int b = 0;
307 int count = 2;
308
309 // Feed the destination text to a stream.
310 for (int i = 0; i < rObjdata.getLength(); ++i)
311 {
312 char ch = rObjdata[i];
313 if (ch != 0x0d && ch != 0x0a)
314 {
315 b = b << 4;
317 if (parsed == -1)
318 return false;
319 b += parsed;
320 count--;
321 if (!count)
322 {
323 aStream.WriteChar(b);
324 count = 2;
325 b = 0;
326 }
327 }
328 }
329
330 // Skip ObjectHeader, see [MS-OLEDS] 2.2.4.
331 if (!aStream.Tell())
332 return true;
333
334 aStream.Seek(0);
335 sal_uInt32 nData;
336 aStream.ReadUInt32(nData); // OLEVersion
337 aStream.ReadUInt32(nData); // FormatID
338 aStream.ReadUInt32(nData); // ClassName
339 OString aClassName;
340 if (nData)
341 {
342 // -1 because it is null-terminated.
343 aClassName = read_uInt8s_ToOString(aStream, nData - 1);
344 // Skip null-termination.
345 aStream.SeekRel(1);
346 }
347 aStream.ReadUInt32(nData); // TopicName
348 aStream.SeekRel(nData);
349 aStream.ReadUInt32(nData); // ItemName
350 aStream.SeekRel(nData);
351 aStream.ReadUInt32(nData); // NativeDataSize
352
353 if (!nData)
354 return true;
355
356 sal_uInt64 nPos = aStream.Tell();
357 sal_uInt8 aSignature[8];
358 aStream.ReadBytes(aSignature, SAL_N_ELEMENTS(aSignature));
359 aStream.Seek(nPos);
360 const sal_uInt8 aOle2Signature[8] = { 0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1 };
361 // Don't use Storage::IsStorageFile() here, that would seek to the start of the stream,
362 // where the magic will always mismatch.
363 if (std::memcmp(aSignature, aOle2Signature, SAL_N_ELEMENTS(aSignature)) == 0)
364 {
365 // NativeData
366 rOle2.WriteStream(aStream, nData);
367 }
368 else
369 {
370 SvMemoryStream aStorage;
371 WrapOle1InOle2(aStream, nData, aStorage, aClassName);
372 rOle2.WriteStream(aStorage);
373 }
374 rOle2.Seek(0);
375
376 return true;
377}
378
379bool StripMetafileHeader(const sal_uInt8*& rpGraphicAry, sal_uInt64& rSize)
380{
381 if (rpGraphicAry && (rSize > 0x22))
382 {
383 if ((rpGraphicAry[0] == 0xd7) && (rpGraphicAry[1] == 0xcd) && (rpGraphicAry[2] == 0xc6)
384 && (rpGraphicAry[3] == 0x9a))
385 {
386 // we have to get rid of the metafileheader
387 rpGraphicAry += 22;
388 rSize -= 22;
389 return true;
390 }
391 }
392 return false;
393}
394}
395
396/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
sal_uInt64 Tell() const
SvStream & WriteOString(std::string_view rStr)
SvStream & ReadUInt32(sal_uInt32 &rUInt32)
sal_uInt64 Seek(sal_uInt64 nPos)
SvStream & WriteChar(char nChar)
std::size_t ReadBytes(void *pData, std::size_t nSize)
sal_uInt64 SeekRel(sal_Int64 nPos)
SvStream & WriteStream(SvStream &rStream)
#define SAL_NEWLINE_STRING
OUString aName
sal_Int64 n
sal_uInt16 nPos
#define SAL_WARN(area, stream)
#define SAL_N_ELEMENTS(arr)
aBuf
std::unique_ptr< sal_Int32[]> pData
int i
Definition: gentoken.py:48
OString OutStringUpr(std::string_view pToken, std::u16string_view rStr, rtl_TextEncoding eDestEnc)
Handles correct unicode and legacy export of a string, when a '{' \upr '{' keyword ansi_text '}{*' \u...
Definition: rtfutil.cxx:242
OString OutChar(sal_Unicode c, int *pUCMode, rtl_TextEncoding eDestEnc, bool *pSuccess, bool bUnicode)
Handles correct unicode and legacy export of a single character.
Definition: rtfutil.cxx:129
OString OutString(std::u16string_view rStr, rtl_TextEncoding eDestEnc, bool bUnicode)
Handles correct unicode and legacy export of a string.
Definition: rtfutil.cxx:213
bool StripMetafileHeader(const sal_uInt8 *&rpGraphicAry, sal_uInt64 &rSize)
Strips the header of a WMF file.
Definition: rtfutil.cxx:379
OString OutHex(sal_uLong nHex, sal_uInt8 nLen)
Outputs a single character in hex form.
Definition: rtfutil.cxx:98
bool ExtractOLE2FromObjdata(const OString &rObjdata, SvStream &rOle2)
Extract OLE2 data from an \objdata hex dump.
Definition: rtfutil.cxx:303
OString WriteHex(const sal_uInt8 *pData, sal_uInt32 nSize, SvStream *pStream, sal_uInt32 nLimit)
Writes binary data as a hex dump.
Definition: rtfutil.cxx:271
int AsHex(char ch)
Get the numeric value of a single character, representing a hex value.
Definition: rtfutil.cxx:253
static bool TryOutString(std::u16string_view rStr, rtl_TextEncoding eDestEnc)
Checks if lossless conversion of the string to eDestEnc is possible or not.
Definition: rtfutil.cxx:229
#define OOO_STRING_SVTOOLS_RTF_UC
#define OOO_STRING_SVTOOLS_RTF_LINE
#define OOO_STRING_SVTOOLS_RTF_IGNORE
#define OOO_STRING_SVTOOLS_RTF_UD
#define OOO_STRING_SVTOOLS_RTF_UPR
#define OOO_STRING_SVTOOLS_RTF_TAB
sal_uIntPtr sal_uLong
TOOLS_DLLPUBLIC OString read_uInt8s_ToOString(SvStream &rStrm, std::size_t nUnits)
unsigned char sal_uInt8
sal_uInt16 sal_Unicode
signed char sal_Int8