LibreOffice Module tools (master) 1
inetmime.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <algorithm>
21#include <limits>
22#include <forward_list>
23#include <memory>
24
25#include <sal/log.hxx>
26#include <rtl/ustring.hxx>
27#include <rtl/strbuf.hxx>
28#include <rtl/ustrbuf.hxx>
29#include <rtl/tencinfo.h>
30#include <tools/inetmime.hxx>
31#include <rtl/character.hxx>
32
33namespace {
34
35rtl_TextEncoding getCharsetEncoding(const char * pBegin,
36 const char * pEnd);
37
45bool isWhiteSpace(sal_uInt32 nChar)
46{
47 return nChar == '\t' || nChar == ' ';
48}
49
59int getBase64Weight(sal_uInt32 nChar)
60{
61 return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
62 rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
63 rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
64 nChar == '+' ? 62 :
65 nChar == '/' ? 63 :
66 nChar == '=' ? -1 : -2;
67}
68
69bool startsWithLineFolding(const sal_Unicode * pBegin,
70 const sal_Unicode * pEnd)
71{
72 DBG_ASSERT(pBegin && pBegin <= pEnd,
73 "startsWithLineFolding(): Bad sequence");
74
75 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76 && isWhiteSpace(pBegin[2]); // CR, LF
77}
78
79rtl_TextEncoding translateFromMIME(rtl_TextEncoding
80 eEncoding)
81{
82#if defined(_WIN32)
83 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84 RTL_TEXTENCODING_MS_1252 : eEncoding;
85#else
86 return eEncoding;
87#endif
88}
89
90bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
91{
92 return rtl_isOctetTextEncoding(eEncoding);
93}
94
95std::unique_ptr<sal_Unicode[]> convertToUnicode(const char * pBegin,
96 const char * pEnd,
97 rtl_TextEncoding eEncoding,
98 sal_Size & rSize)
99{
100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
101 return nullptr;
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter);
106 std::unique_ptr<sal_Unicode[]> pBuffer;
107 sal_uInt32 nInfo;
108 for (sal_Size nBufferSize = pEnd - pBegin;;
109 nBufferSize += nBufferSize / 3 + 1)
110 {
111 pBuffer.reset(new sal_Unicode[nBufferSize]);
112 sal_Size nSrcCvtBytes;
113 rSize = rtl_convertTextToUnicode(
114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
115 nBufferSize,
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119 &nInfo, &nSrcCvtBytes);
120 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
121 break;
122 pBuffer.reset();
123 rtl_resetTextToUnicodeContext(hConverter, hContext);
124 }
125 rtl_destroyTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeConverter(hConverter);
127 if (nInfo != 0)
128 {
129 pBuffer.reset();
130 }
131 return pBuffer;
132}
133
134void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
135{
136 // See RFC 2279 for a discussion of UTF-8.
137 DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
138
139 if (nChar < 0x80)
140 rSink.append(char(nChar));
141 else if (nChar < 0x800)
142 rSink.append(OStringChar(char(nChar >> 6 | 0xC0))
143 + OStringChar(char((nChar & 0x3F) | 0x80)));
144 else if (nChar < 0x10000)
145 rSink.append(
146 OStringChar(char(nChar >> 12 | 0xE0))
147 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
148 + OStringChar(char((nChar & 0x3F) | 0x80)));
149 else if (nChar < 0x200000)
150 rSink.append(
151 OStringChar(char(nChar >> 18 | 0xF0))
152 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
153 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
154 + OStringChar(char((nChar & 0x3F) | 0x80)));
155 else if (nChar < 0x4000000)
156 rSink.append(
157 OStringChar(char(nChar >> 24 | 0xF8))
158 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
159 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
160 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
161 + OStringChar(char((nChar & 0x3F) | 0x80)));
162 else
163 rSink.append(
164 OStringChar(char(nChar >> 30 | 0xFC))
165 + OStringChar(char((nChar >> 24 & 0x3F) | 0x80))
166 + OStringChar(char((nChar >> 18 & 0x3F) | 0x80))
167 + OStringChar(char((nChar >> 12 & 0x3F) | 0x80))
168 + OStringChar(char((nChar >> 6 & 0x3F) | 0x80))
169 + OStringChar(char((nChar & 0x3F) | 0x80)));
170}
171
172bool translateUTF8Char(const char *& rBegin,
173 const char * pEnd,
174 sal_uInt32 & rCharacter)
175{
176 if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
177 || static_cast< unsigned char >(*rBegin) >= 0xFE)
178 return false;
179
180 int nCount;
181 sal_uInt32 nMin;
182 sal_uInt32 nUCS4;
183 const char * p = rBegin;
184 if (static_cast< unsigned char >(*p) < 0xE0)
185 {
186 nCount = 1;
187 nMin = 0x80;
188 nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
189 }
190 else if (static_cast< unsigned char >(*p) < 0xF0)
191 {
192 nCount = 2;
193 nMin = 0x800;
194 nUCS4 = static_cast< unsigned char >(*p) & 0xF;
195 }
196 else if (static_cast< unsigned char >(*p) < 0xF8)
197 {
198 nCount = 3;
199 nMin = 0x10000;
200 nUCS4 = static_cast< unsigned char >(*p) & 7;
201 }
202 else if (static_cast< unsigned char >(*p) < 0xFC)
203 {
204 nCount = 4;
205 nMin = 0x200000;
206 nUCS4 = static_cast< unsigned char >(*p) & 3;
207 }
208 else
209 {
210 nCount = 5;
211 nMin = 0x4000000;
212 nUCS4 = static_cast< unsigned char >(*p) & 1;
213 }
214 ++p;
215
216 for (; nCount-- > 0; ++p)
217 if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
218 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
219 else
220 return false;
221
222 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
223 return false;
224
225 rCharacter = nUCS4;
226 rBegin = p;
227 return true;
228}
229
230void appendISO88591(OUStringBuffer & rText, char const * pBegin,
231 char const * pEnd);
232
233struct Parameter
234{
235 OString m_aAttribute;
236 OString m_aCharset;
237 OString m_aLanguage;
238 OString m_aValue;
239 sal_uInt32 m_nSection;
240 bool m_bExtended;
241
242 bool operator<(const Parameter& rhs) const // is used by std::list<Parameter>::sort
243 {
244 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
245 return nComp < 0 ||
246 (nComp == 0 && m_nSection < rhs.m_nSection);
247 }
248 struct IsSameSection // is used to check container for duplicates with std::any_of
249 {
250 const OString& rAttribute;
251 const sal_uInt32 nSection;
252 bool operator()(const Parameter& r) const
253 { return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
254 };
255};
256
257typedef std::forward_list<Parameter> ParameterList;
258
259bool parseParameters(ParameterList const & rInput,
261
262// appendISO88591
263
264void appendISO88591(OUStringBuffer & rText, char const * pBegin,
265 char const * pEnd)
266{
267 sal_Int32 nLength = pEnd - pBegin;
268 std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
269 for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
270 *p++ = static_cast<unsigned char>(*pBegin++);
271 rText.append(pBuffer.get(), nLength);
272}
273
274// parseParameters
275
276bool parseParameters(ParameterList const & rInput,
278{
279 if (pOutput)
280 pOutput->clear();
281
282 for (auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
283 {
284 if (it->m_nSection > 0
285 && (itPrev == rInput.end()
286 || itPrev->m_nSection != it->m_nSection - 1
287 || itPrev->m_aAttribute != it->m_aAttribute))
288 return false;
289 }
290
291 if (pOutput)
292 for (auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
293 {
294 bool bCharset = !it->m_aCharset.isEmpty();
295 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
296 if (bCharset)
297 eEncoding
298 = getCharsetEncoding(it->m_aCharset.getStr(),
299 it->m_aCharset.getStr()
300 + it->m_aCharset.getLength());
301 OUStringBuffer aValue(64);
302 bool bBadEncoding = false;
303 itNext = it;
304 do
305 {
306 sal_Size nSize;
307 std::unique_ptr<sal_Unicode[]> pUnicode
308 = convertToUnicode(itNext->m_aValue.getStr(),
309 itNext->m_aValue.getStr()
310 + itNext->m_aValue.getLength(),
311 bCharset && it->m_bExtended ?
312 eEncoding :
313 RTL_TEXTENCODING_UTF8,
314 nSize);
315 if (!pUnicode && !(bCharset && it->m_bExtended))
316 pUnicode = convertToUnicode(
317 itNext->m_aValue.getStr(),
318 itNext->m_aValue.getStr()
319 + itNext->m_aValue.getLength(),
320 RTL_TEXTENCODING_ISO_8859_1, nSize);
321 if (!pUnicode)
322 {
323 bBadEncoding = true;
324 break;
325 }
326 aValue.append(pUnicode.get(), static_cast<sal_Int32>(nSize));
327 ++itNext;
328 }
329 while (itNext != rInput.end() && itNext->m_nSection != 0);
330
331 if (bBadEncoding)
332 {
333 aValue.setLength(0);
334 itNext = it;
335 do
336 {
337 if (itNext->m_bExtended)
338 {
339 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
340 aValue.append(
341 static_cast<sal_Unicode>(
342 static_cast<unsigned char>(itNext->m_aValue[i])
343 | 0xF800)); // map to unicode corporate use sub area
344 }
345 else
346 {
347 for (sal_Int32 i = 0; i < itNext->m_aValue.getLength(); ++i)
348 aValue.append( itNext->m_aValue[i] );
349 }
350 ++itNext;
351 }
352 while (itNext != rInput.end() && itNext->m_nSection != 0);
353 }
354 auto const ret = pOutput->insert(
355 {it->m_aAttribute,
356 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
357 SAL_INFO_IF(!ret.second, "tools",
358 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
359 }
360 return true;
361}
362
371bool isTokenChar(sal_uInt32 nChar)
372{
373 static const bool aMap[128]
374 = { false, false, false, false, false, false, false, false,
375 false, false, false, false, false, false, false, false,
376 false, false, false, false, false, false, false, false,
377 false, false, false, false, false, false, false, false,
378 false, true, false, true, true, true, true, true, // !"#$%&'
379 false, false, true, true, false, true, true, false, //()*+,-./
380 true, true, true, true, true, true, true, true, //01234567
381 true, true, false, false, false, false, false, false, //89:;<=>?
382 false, true, true, true, true, true, true, true, //@ABCDEFG
383 true, true, true, true, true, true, true, true, //HIJKLMNO
384 true, true, true, true, true, true, true, true, //PQRSTUVW
385 true, true, true, false, false, false, true, true, //XYZ[\]^_
386 true, true, true, true, true, true, true, true, //`abcdefg
387 true, true, true, true, true, true, true, true, //hijklmno
388 true, true, true, true, true, true, true, true, //pqrstuvw
389 true, true, true, true, true, true, true, false //xyz{|}~
390 };
391 return rtl::isAscii(nChar) && aMap[nChar];
392}
393
394const sal_Unicode * skipComment(const sal_Unicode * pBegin,
395 const sal_Unicode * pEnd)
396{
397 DBG_ASSERT(pBegin && pBegin <= pEnd,
398 "skipComment(): Bad sequence");
399
400 if (pBegin != pEnd && *pBegin == '(')
401 {
402 sal_uInt32 nLevel = 0;
403 for (const sal_Unicode * p = pBegin; p != pEnd;)
404 switch (*p++)
405 {
406 case '(':
407 ++nLevel;
408 break;
409
410 case ')':
411 if (--nLevel == 0)
412 return p;
413 break;
414
415 case '\\':
416 if (p != pEnd)
417 ++p;
418 break;
419 }
420 }
421 return pBegin;
422}
423
424const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
425 pBegin,
426 const sal_Unicode *
427 pEnd)
428{
429 DBG_ASSERT(pBegin && pBegin <= pEnd,
430 "skipLinearWhiteSpaceComment(): Bad sequence");
431
432 while (pBegin != pEnd)
433 switch (*pBegin)
434 {
435 case '\t':
436 case ' ':
437 ++pBegin;
438 break;
439
440 case 0x0D: // CR
441 if (startsWithLineFolding(pBegin, pEnd))
442 pBegin += 3;
443 else
444 return pBegin;
445 break;
446
447 case '(':
448 {
449 const sal_Unicode * p = skipComment(pBegin, pEnd);
450 if (p == pBegin)
451 return pBegin;
452 pBegin = p;
453 break;
454 }
455
456 default:
457 return pBegin;
458 }
459 return pBegin;
460}
461
462const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
463 const sal_Unicode * pEnd)
464{
465 DBG_ASSERT(pBegin && pBegin <= pEnd,
466 "skipQuotedString(): Bad sequence");
467
468 if (pBegin != pEnd && *pBegin == '"')
469 for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
470 switch (*p++)
471 {
472 case 0x0D: // CR
473 if (pEnd - p < 2 || *p++ != 0x0A // LF
474 || !isWhiteSpace(*p++))
475 return pBegin;
476 break;
477
478 case '"':
479 return p;
480
481 case '\\':
482 if (p != pEnd)
483 ++p;
484 break;
485 }
486 return pBegin;
487}
488
489sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
490 sal_Unicode const * pEnd,
492 pParameters)
493{
494 ParameterList aList;
495 sal_Unicode const * pParameterBegin = pBegin;
496 for (sal_Unicode const * p = pParameterBegin;;)
497 {
498 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
499 if (pParameterBegin == pEnd || *pParameterBegin != ';')
500 break;
501 p = pParameterBegin + 1;
502
503 sal_Unicode const * pAttributeBegin
504 = skipLinearWhiteSpaceComment(p, pEnd);
505 p = pAttributeBegin;
506 bool bDowncaseAttribute = false;
507 while (p != pEnd && isTokenChar(*p) && *p != '*')
508 {
509 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
510 ++p;
511 }
512 if (p == pAttributeBegin)
513 break;
514 OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
515 if (bDowncaseAttribute)
516 aAttribute = aAttribute.toAsciiLowerCase();
517
518 sal_uInt32 nSection = 0;
519 if (p != pEnd && *p == '*')
520 {
521 ++p;
522 if (p != pEnd && rtl::isAsciiDigit(*p)
523 && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
524 break;
525 }
526
527 bool bPresent = std::any_of(aList.begin(), aList.end(),
528 Parameter::IsSameSection{aAttribute, nSection});
529 if (bPresent)
530 break;
531
532 bool bExtended = false;
533 if (p != pEnd && *p == '*')
534 {
535 ++p;
536 bExtended = true;
537 }
538
539 p = skipLinearWhiteSpaceComment(p, pEnd);
540
541 if (p == pEnd || *p != '=')
542 break;
543
544 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
545
546 OString aCharset;
547 OString aLanguage;
548 OString aValue;
549 if (bExtended)
550 {
551 if (nSection == 0)
552 {
553 sal_Unicode const * pCharsetBegin = p;
554 bool bDowncaseCharset = false;
555 while (p != pEnd && isTokenChar(*p) && *p != '\'')
556 {
557 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
558 ++p;
559 }
560 if (p == pCharsetBegin)
561 break;
562 if (pParameters)
563 {
564 aCharset = OString(
565 pCharsetBegin,
566 p - pCharsetBegin,
567 RTL_TEXTENCODING_ASCII_US);
568 if (bDowncaseCharset)
569 aCharset = aCharset.toAsciiLowerCase();
570 }
571
572 if (p == pEnd || *p != '\'')
573 break;
574 ++p;
575
576 sal_Unicode const * pLanguageBegin = p;
577 bool bDowncaseLanguage = false;
578 int nLetters = 0;
579 for (; p != pEnd; ++p)
580 if (rtl::isAsciiAlpha(*p))
581 {
582 if (++nLetters > 8)
583 break;
584 bDowncaseLanguage = bDowncaseLanguage
585 || rtl::isAsciiUpperCase(*p);
586 }
587 else if (*p == '-')
588 {
589 if (nLetters == 0)
590 break;
591 nLetters = 0;
592 }
593 else
594 break;
595 if (nLetters == 0 || nLetters > 8)
596 break;
597 if (pParameters)
598 {
599 aLanguage = OString(
600 pLanguageBegin,
601 p - pLanguageBegin,
602 RTL_TEXTENCODING_ASCII_US);
603 if (bDowncaseLanguage)
604 aLanguage = aLanguage.toAsciiLowerCase();
605 }
606
607 if (p == pEnd || *p != '\'')
608 break;
609 ++p;
610 }
611 if (pParameters)
612 {
613 OStringBuffer aSink;
614 while (p != pEnd)
615 {
616 auto q = p;
617 sal_uInt32 nChar = INetMIME::getUTF32Character(q, pEnd);
618 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
619 break;
620 p = q;
621 if (nChar == '%' && p + 1 < pEnd)
622 {
623 int nWeight1 = INetMIME::getHexWeight(p[0]);
624 int nWeight2 = INetMIME::getHexWeight(p[1]);
625 if (nWeight1 >= 0 && nWeight2 >= 0)
626 {
627 aSink.append(char(nWeight1 << 4 | nWeight2));
628 p += 2;
629 continue;
630 }
631 }
632 writeUTF8(aSink, nChar);
633 }
634 aValue = aSink.makeStringAndClear();
635 }
636 else
637 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
638 ++p;
639 }
640 else if (p != pEnd && *p == '"')
641 if (pParameters)
642 {
643 OStringBuffer aSink(256);
644 bool bInvalid = false;
645 for (++p;;)
646 {
647 if (p == pEnd)
648 {
649 bInvalid = true;
650 break;
651 }
652 sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
653 if (nChar == '"')
654 break;
655 else if (nChar == 0x0D) // CR
656 {
657 if (pEnd - p < 2 || *p++ != 0x0A // LF
658 || !isWhiteSpace(*p))
659 {
660 bInvalid = true;
661 break;
662 }
663 nChar = static_cast<unsigned char>(*p++);
664 }
665 else if (nChar == '\\')
666 {
667 if (p == pEnd)
668 {
669 bInvalid = true;
670 break;
671 }
672 nChar = INetMIME::getUTF32Character(p, pEnd);
673 }
674 writeUTF8(aSink, nChar);
675 }
676 if (bInvalid)
677 break;
678 aValue = aSink.makeStringAndClear();
679 }
680 else
681 {
682 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
683 if (p == pStringEnd)
684 break;
685 p = pStringEnd;
686 }
687 else
688 {
689 sal_Unicode const * pTokenBegin = p;
690 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
691 ++p;
692 if (p == pTokenBegin)
693 break;
694 if (pParameters)
695 aValue = OString(
696 pTokenBegin, p - pTokenBegin,
697 RTL_TEXTENCODING_UTF8);
698 }
699 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
700 }
701 aList.sort();
702 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
703}
704
705bool equalIgnoreCase(const char * pBegin1,
706 const char * pEnd1,
707 const char * pString2)
708{
709 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
710 "equalIgnoreCase(): Bad sequences");
711
712 while (*pString2 != 0)
713 if (pBegin1 == pEnd1
714 || (rtl::toAsciiUpperCase(static_cast<unsigned char>(*pBegin1++))
715 != rtl::toAsciiUpperCase(
716 static_cast<unsigned char>(*pString2++))))
717 return false;
718 return pBegin1 == pEnd1;
719}
720
721struct EncodingEntry
722{
723 char const * m_aName;
724 rtl_TextEncoding m_eEncoding;
725};
726
727// The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
728// assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
729// noted:
730EncodingEntry const aEncodingMap[]
731 = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
732 { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
733 { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
734 { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
735 { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
736 { "ASCII", RTL_TEXTENCODING_ASCII_US },
737 { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
738 { "US", RTL_TEXTENCODING_ASCII_US },
739 { "IBM367", RTL_TEXTENCODING_ASCII_US },
740 { "CP367", RTL_TEXTENCODING_ASCII_US },
741 { "CSASCII", RTL_TEXTENCODING_ASCII_US },
742 { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
743 { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
744 { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
745 { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
746 { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
747 { "L1", RTL_TEXTENCODING_ISO_8859_1 },
748 { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
749 { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
750 { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
751 { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
752 { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
753 { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
754 { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
755 { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
756 { "L2", RTL_TEXTENCODING_ISO_8859_2 },
757 { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
758 { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
759 { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
760 { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
761 { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
762 { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
763 { "L3", RTL_TEXTENCODING_ISO_8859_3 },
764 { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
765 { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
766 { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
767 { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
768 { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
769 { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
770 { "L4", RTL_TEXTENCODING_ISO_8859_4 },
771 { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
772 { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
773 { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
774 { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
775 { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
776 { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
777 { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
778 { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
779 { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
780 { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
781 { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
782 { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
783 { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
784 { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
785 { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
786 { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
787 { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
788 { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
789 { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
790 { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
791 { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
792 { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
793 { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
794 { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
795 { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
796 { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
797 { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
798 { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
799 { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
800 { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
801 { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
802 { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
803 { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
804 { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
805 { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
806 { "L5", RTL_TEXTENCODING_ISO_8859_9 },
807 { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
808 { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
809 { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
810 { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
811 { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
812 { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
813 { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
814 { "IBM437", RTL_TEXTENCODING_IBM_437 },
815 { "CP437", RTL_TEXTENCODING_IBM_437 },
816 { "437", RTL_TEXTENCODING_IBM_437 },
817 { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
818 { "IBM850", RTL_TEXTENCODING_IBM_850 },
819 { "CP850", RTL_TEXTENCODING_IBM_850 },
820 { "850", RTL_TEXTENCODING_IBM_850 },
821 { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
822 { "IBM860", RTL_TEXTENCODING_IBM_860 },
823 { "CP860", RTL_TEXTENCODING_IBM_860 },
824 { "860", RTL_TEXTENCODING_IBM_860 },
825 { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
826 { "IBM861", RTL_TEXTENCODING_IBM_861 },
827 { "CP861", RTL_TEXTENCODING_IBM_861 },
828 { "861", RTL_TEXTENCODING_IBM_861 },
829 { "CP-IS", RTL_TEXTENCODING_IBM_861 },
830 { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
831 { "IBM863", RTL_TEXTENCODING_IBM_863 },
832 { "CP863", RTL_TEXTENCODING_IBM_863 },
833 { "863", RTL_TEXTENCODING_IBM_863 },
834 { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
835 { "IBM865", RTL_TEXTENCODING_IBM_865 },
836 { "CP865", RTL_TEXTENCODING_IBM_865 },
837 { "865", RTL_TEXTENCODING_IBM_865 },
838 { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
839 { "IBM775", RTL_TEXTENCODING_IBM_775 },
840 { "CP775", RTL_TEXTENCODING_IBM_775 },
841 { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
842 { "IBM852", RTL_TEXTENCODING_IBM_852 },
843 { "CP852", RTL_TEXTENCODING_IBM_852 },
844 { "852", RTL_TEXTENCODING_IBM_852 },
845 { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
846 { "IBM855", RTL_TEXTENCODING_IBM_855 },
847 { "CP855", RTL_TEXTENCODING_IBM_855 },
848 { "855", RTL_TEXTENCODING_IBM_855 },
849 { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
850 { "IBM857", RTL_TEXTENCODING_IBM_857 },
851 { "CP857", RTL_TEXTENCODING_IBM_857 },
852 { "857", RTL_TEXTENCODING_IBM_857 },
853 { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
854 { "IBM862", RTL_TEXTENCODING_IBM_862 },
855 { "CP862", RTL_TEXTENCODING_IBM_862 },
856 { "862", RTL_TEXTENCODING_IBM_862 },
857 { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
858 { "IBM864", RTL_TEXTENCODING_IBM_864 },
859 { "CP864", RTL_TEXTENCODING_IBM_864 },
860 { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
861 { "IBM866", RTL_TEXTENCODING_IBM_866 },
862 { "CP866", RTL_TEXTENCODING_IBM_866 },
863 { "866", RTL_TEXTENCODING_IBM_866 },
864 { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
865 { "IBM869", RTL_TEXTENCODING_IBM_869 },
866 { "CP869", RTL_TEXTENCODING_IBM_869 },
867 { "869", RTL_TEXTENCODING_IBM_869 },
868 { "CP-GR", RTL_TEXTENCODING_IBM_869 },
869 { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
870 { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
871 { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
872 { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
873 { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
874 { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
875 { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
876 { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
877 { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
878 { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
879 { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
880 { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
881 { "GB2312", RTL_TEXTENCODING_GB_2312 },
882 { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
883 { "BIG5", RTL_TEXTENCODING_BIG5 },
884 { "CSBIG5", RTL_TEXTENCODING_BIG5 },
885 { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
886 { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
887 RTL_TEXTENCODING_EUC_JP },
888 { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
889 { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
890 { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
891 { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
892 { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
893 { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
894 { "UTF-7", RTL_TEXTENCODING_UTF7 },
895 { "UTF-8", RTL_TEXTENCODING_UTF8 },
896 { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
897 { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
898 { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
899 { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
900 { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
901 { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
902 { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
903 { "CSUCS4", RTL_TEXTENCODING_UCS4 },
904 { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
905 { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
906
907rtl_TextEncoding getCharsetEncoding(char const * pBegin,
908 char const * pEnd)
909{
910 for (const EncodingEntry& i : aEncodingMap)
911 if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
912 return i.m_eEncoding;
913 return RTL_TEXTENCODING_DONTKNOW;
914}
915
916}
917
918// INetMIME
919
920// static
921bool INetMIME::isAtomChar(sal_uInt32 nChar)
922{
923 static const bool aMap[128]
924 = { false, false, false, false, false, false, false, false,
925 false, false, false, false, false, false, false, false,
926 false, false, false, false, false, false, false, false,
927 false, false, false, false, false, false, false, false,
928 false, true, false, true, true, true, true, true, // !"#$%&'
929 false, false, true, true, false, true, false, true, //()*+,-./
930 true, true, true, true, true, true, true, true, //01234567
931 true, true, false, false, false, true, false, true, //89:;<=>?
932 false, true, true, true, true, true, true, true, //@ABCDEFG
933 true, true, true, true, true, true, true, true, //HIJKLMNO
934 true, true, true, true, true, true, true, true, //PQRSTUVW
935 true, true, true, false, false, false, true, true, //XYZ[\]^_
936 true, true, true, true, true, true, true, true, //`abcdefg
937 true, true, true, true, true, true, true, true, //hijklmno
938 true, true, true, true, true, true, true, true, //pqrstuvw
939 true, true, true, true, true, true, true, false //xyz{|}~
940 };
941 return rtl::isAscii(nChar) && aMap[nChar];
942}
943
944// static
945bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
946{
947 static const bool aMap[128]
948 = { false, false, false, false, false, false, false, false,
949 false, false, false, false, false, false, false, false,
950 false, false, false, false, false, false, false, false,
951 false, false, false, false, false, false, false, false,
952 false, true, false, true, true, false, true, true, // !"#$%&'
953 false, false, false, true, true, true, true, true, //()*+,-./
954 true, true, true, true, true, true, true, true, //01234567
955 true, true, true, true, true, true, true, true, //89:;<=>?
956 true, true, true, true, true, true, true, true, //@ABCDEFG
957 true, true, true, true, true, true, true, true, //HIJKLMNO
958 true, true, true, true, true, true, true, true, //PQRSTUVW
959 true, true, true, true, false, true, true, true, //XYZ[\]^_
960 true, true, true, true, true, true, true, true, //`abcdefg
961 true, true, true, true, true, true, true, true, //hijklmno
962 true, true, true, true, true, true, true, true, //pqrstuvw
963 true, true, true, false, true, true, true, false //xyz{|}~
964 };
965 return rtl::isAscii(nChar) && aMap[nChar];
966}
967
968// static
970 const sal_Unicode * pEnd1,
971 const char * pString2)
972{
973 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
974 "INetMIME::equalIgnoreCase(): Bad sequences");
975
976 while (*pString2 != 0)
977 if (pBegin1 == pEnd1
978 || (rtl::toAsciiUpperCase(*pBegin1++)
979 != rtl::toAsciiUpperCase(
980 static_cast<unsigned char>(*pString2++))))
981 return false;
982 return pBegin1 == pEnd1;
983}
984
985// static
987 const sal_Unicode * pEnd, bool bLeadingZeroes,
988 sal_uInt32 & rValue)
989{
990 sal_uInt64 nTheValue = 0;
991 const sal_Unicode * p = rBegin;
992 for ( ; p != pEnd; ++p)
993 {
994 int nWeight = getWeight(*p);
995 if (nWeight < 0)
996 break;
997 nTheValue = 10 * nTheValue + nWeight;
998 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
999 return false;
1000 }
1001 if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
1002 return false;
1003 rBegin = p;
1004 rValue = sal_uInt32(nTheValue);
1005 return true;
1006}
1007
1008// static
1010 std::u16string_view rStr, OUString * pType,
1011 OUString * pSubType, INetContentTypeParameterList * pParameters)
1012{
1013 sal_Unicode const * pBegin = rStr.data();
1014 sal_Unicode const * pEnd = pBegin + rStr.size();
1015 sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1016 sal_Unicode const * pTypeBegin = p;
1017 while (p != pEnd && isTokenChar(*p))
1018 {
1019 ++p;
1020 }
1021 if (p == pTypeBegin)
1022 return nullptr;
1023 sal_Unicode const * pTypeEnd = p;
1024
1025 p = skipLinearWhiteSpaceComment(p, pEnd);
1026 if (p == pEnd || *p++ != '/')
1027 return nullptr;
1028
1029 p = skipLinearWhiteSpaceComment(p, pEnd);
1030 sal_Unicode const * pSubTypeBegin = p;
1031 while (p != pEnd && isTokenChar(*p))
1032 {
1033 ++p;
1034 }
1035 if (p == pSubTypeBegin)
1036 return nullptr;
1037 sal_Unicode const * pSubTypeEnd = p;
1038
1039 if (pType != nullptr)
1040 {
1041 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1042 }
1043 if (pSubType != nullptr)
1044 {
1045 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1046 .toAsciiLowerCase();
1047 }
1048
1049 return scanParameters(p, pEnd, pParameters);
1050}
1051
1052// static
1053OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
1054{
1055 // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
1056 // versions of StarOffice send mails with header fields where encoded
1057 // words can be preceded by '=', ',', '.', '"', or '(', and followed by
1058 // '=', ',', '.', '"', ')', without any required white space in between.
1059 // And there appear to exist some broken mailers that only encode single
1060 // letters within words, like "Appel
1061 // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
1062 // detect encoded words even when not properly surrounded by white space.
1063
1064 // Non US-ASCII characters in rBody are treated as ISO-8859-1.
1065
1066 // encoded-word = "=?"
1067 // 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
1068 // ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
1069 // ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
1070 // / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
1071 // "?="
1072
1073 // base64 = ALPHA / DIGIT / "+" / "/"
1074
1075 const char * pBegin = rBody.getStr();
1076 const char * pEnd = pBegin + rBody.getLength();
1077
1078 OUStringBuffer sDecoded;
1079 const char * pCopyBegin = pBegin;
1080
1081 /* bool bStartEncodedWord = true; */
1082 const char * pWSPBegin = pBegin;
1083
1084 for (const char * p = pBegin; p != pEnd;)
1085 {
1086 if (*p == '=' /* && bStartEncodedWord */)
1087 {
1088 const char * q = p + 1;
1089 bool bEncodedWord = q != pEnd && *q++ == '?';
1090
1091 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1092 if (bEncodedWord)
1093 {
1094 const char * pCharsetBegin = q;
1095 const char * pLanguageBegin = nullptr;
1096 int nAlphaCount = 0;
1097 for (bool bDone = false; !bDone;)
1098 if (q == pEnd)
1099 {
1100 bEncodedWord = false;
1101 bDone = true;
1102 }
1103 else
1104 {
1105 char cChar = *q++;
1106 switch (cChar)
1107 {
1108 case '*':
1109 pLanguageBegin = q - 1;
1110 nAlphaCount = 0;
1111 break;
1112
1113 case '-':
1114 if (pLanguageBegin != nullptr)
1115 {
1116 if (nAlphaCount == 0)
1117 pLanguageBegin = nullptr;
1118 else
1119 nAlphaCount = 0;
1120 }
1121 break;
1122
1123 case '?':
1124 if (pCharsetBegin == q - 1)
1125 bEncodedWord = false;
1126 else
1127 {
1128 eCharsetEncoding
1129 = getCharsetEncoding(
1130 pCharsetBegin,
1131 pLanguageBegin == nullptr
1132 || nAlphaCount == 0 ?
1133 q - 1 : pLanguageBegin);
1134 bEncodedWord = isMIMECharsetEncoding(
1135 eCharsetEncoding);
1136 eCharsetEncoding
1137 = translateFromMIME(eCharsetEncoding);
1138 }
1139 bDone = true;
1140 break;
1141
1142 default:
1143 if (pLanguageBegin != nullptr
1144 && (!rtl::isAsciiAlpha(
1145 static_cast<unsigned char>(cChar))
1146 || ++nAlphaCount > 8))
1147 pLanguageBegin = nullptr;
1148 break;
1149 }
1150 }
1151 }
1152
1153 bool bEncodingB = false;
1154 if (bEncodedWord)
1155 {
1156 if (q == pEnd)
1157 bEncodedWord = false;
1158 else
1159 {
1160 switch (*q++)
1161 {
1162 case 'B':
1163 case 'b':
1164 bEncodingB = true;
1165 break;
1166
1167 case 'Q':
1168 case 'q':
1169 bEncodingB = false;
1170 break;
1171
1172 default:
1173 bEncodedWord = false;
1174 break;
1175 }
1176 }
1177 }
1178
1179 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
1180
1181 OStringBuffer sText;
1182 if (bEncodedWord)
1183 {
1184 if (bEncodingB)
1185 {
1186 for (bool bDone = false; !bDone;)
1187 {
1188 if (pEnd - q < 4)
1189 {
1190 bEncodedWord = false;
1191 bDone = true;
1192 }
1193 else
1194 {
1195 bool bFinal = false;
1196 int nCount = 3;
1197 sal_uInt32 nValue = 0;
1198 for (int nShift = 18; nShift >= 0; nShift -= 6)
1199 {
1200 int nWeight = getBase64Weight(*q++);
1201 if (nWeight == -2)
1202 {
1203 bEncodedWord = false;
1204 bDone = true;
1205 break;
1206 }
1207 if (nWeight == -1)
1208 {
1209 if (!bFinal)
1210 {
1211 if (nShift >= 12)
1212 {
1213 bEncodedWord = false;
1214 bDone = true;
1215 break;
1216 }
1217 bFinal = true;
1218 nCount = nShift == 6 ? 1 : 2;
1219 }
1220 }
1221 else
1222 nValue |= nWeight << nShift;
1223 }
1224 if (bEncodedWord)
1225 {
1226 for (int nShift = 16; nCount-- > 0; nShift -= 8)
1227 sText.append(char(nValue >> nShift & 0xFF));
1228 if (*q == '?')
1229 {
1230 ++q;
1231 bDone = true;
1232 }
1233 if (bFinal && !bDone)
1234 {
1235 bEncodedWord = false;
1236 bDone = true;
1237 }
1238 }
1239 }
1240 }
1241 }
1242 else
1243 {
1244 const char * pEncodedTextBegin = q;
1245 const char * pEncodedTextCopyBegin = q;
1246 for (bool bDone = false; !bDone;)
1247 if (q == pEnd)
1248 {
1249 bEncodedWord = false;
1250 bDone = true;
1251 }
1252 else
1253 {
1254 sal_uInt32 nChar = static_cast<unsigned char>(*q++);
1255 switch (nChar)
1256 {
1257 case '=':
1258 {
1259 if (pEnd - q < 2)
1260 {
1261 bEncodedWord = false;
1262 bDone = true;
1263 break;
1264 }
1265 int nDigit1 = getHexWeight(q[0]);
1266 int nDigit2 = getHexWeight(q[1]);
1267 if (nDigit1 < 0 || nDigit2 < 0)
1268 {
1269 bEncodedWord = false;
1270 bDone = true;
1271 break;
1272 }
1273 sText.append(
1274 rBody.subView(
1275 (pEncodedTextCopyBegin - pBegin),
1276 (q - 1 - pEncodedTextCopyBegin))
1277 + OStringChar(char(nDigit1 << 4 | nDigit2)));
1278 q += 2;
1279 pEncodedTextCopyBegin = q;
1280 break;
1281 }
1282
1283 case '?':
1284 if (q - pEncodedTextBegin > 1)
1285 sText.append(rBody.subView(
1286 (pEncodedTextCopyBegin - pBegin),
1287 (q - 1 - pEncodedTextCopyBegin)));
1288 else
1289 bEncodedWord = false;
1290 bDone = true;
1291 break;
1292
1293 case '_':
1294 sText.append(
1295 rBody.subView(
1296 (pEncodedTextCopyBegin - pBegin),
1297 (q - 1 - pEncodedTextCopyBegin))
1298 + OString::Concat(" "));
1299 pEncodedTextCopyBegin = q;
1300 break;
1301
1302 default:
1303 if (!isVisible(nChar))
1304 {
1305 bEncodedWord = false;
1306 bDone = true;
1307 }
1308 break;
1309 }
1310 }
1311 }
1312 }
1313
1314 bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
1315
1316 std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1317 sal_Size nUnicodeSize = 0;
1318 if (bEncodedWord)
1319 {
1320 pUnicodeBuffer
1321 = convertToUnicode(sText.getStr(),
1322 sText.getStr() + sText.getLength(),
1323 eCharsetEncoding, nUnicodeSize);
1324 if (!pUnicodeBuffer)
1325 bEncodedWord = false;
1326 }
1327
1328 if (bEncodedWord)
1329 {
1330 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1331 sDecoded.append(
1332 pUnicodeBuffer.get(),
1333 static_cast< sal_Int32 >(nUnicodeSize));
1334 pUnicodeBuffer.reset();
1335 p = q;
1336 pCopyBegin = p;
1337
1338 pWSPBegin = p;
1339 while (p != pEnd && isWhiteSpace(*p))
1340 ++p;
1341 /* bStartEncodedWord = p != pWSPBegin; */
1342 continue;
1343 }
1344 }
1345
1346 if (p == pEnd)
1347 break;
1348
1349 switch (*p++)
1350 {
1351 case '"':
1352 /* bStartEncodedWord = true; */
1353 break;
1354
1355 case '(':
1356 /* bStartEncodedWord = true; */
1357 break;
1358
1359 case ')':
1360 /* bStartEncodedWord = false; */
1361 break;
1362
1363 default:
1364 {
1365 const char * pUTF8Begin = p - 1;
1366 const char * pUTF8End = pUTF8Begin;
1367 sal_uInt32 nCharacter = 0;
1368 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1369 {
1370 appendISO88591(sDecoded, pCopyBegin, p - 1);
1371 sDecoded.appendUtf32(nCharacter);
1372 p = pUTF8End;
1373 pCopyBegin = p;
1374 }
1375 /* bStartEncodedWord = false; */
1376 break;
1377 }
1378 }
1379 pWSPBegin = p;
1380 }
1381
1382 appendISO88591(sDecoded, pCopyBegin, pEnd);
1383 return sDecoded.makeStringAndClear();
1384}
1385
1386/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool operator<(const BigInt &rVal1, const BigInt &rVal2)
Definition: bigint.cxx:818
static bool isVisible(sal_uInt32 nChar)
Check for US-ASCII visible character.
Definition: inetmime.hxx:207
static sal_uInt32 getUTF32Character(const sal_Unicode *&rBegin, const sal_Unicode *pEnd)
Get the UTF-32 character at the head of a UTF-16 encoded string.
Definition: inetmime.hxx:227
static OUString decodeHeaderFieldBody(const OString &rBody)
Definition: inetmime.cxx:1053
static bool equalIgnoreCase(const sal_Unicode *pBegin1, const sal_Unicode *pEnd1, const char *pString2)
Check two US-ASCII strings for equality, ignoring case.
Definition: inetmime.cxx:969
static int getHexWeight(sal_uInt32 nChar)
Get the hexadecimal digit weight of a US-ASCII character.
Definition: inetmime.hxx:219
static int getWeight(sal_uInt32 nChar)
Get the digit weight of a US-ASCII character.
Definition: inetmime.hxx:213
static bool isIMAPAtomChar(sal_uInt32 nChar)
Check whether some character is valid within an RFC 2060 <atom>.
Definition: inetmime.cxx:945
static sal_Unicode const * scanContentType(std::u16string_view rStr, OUString *pType=nullptr, OUString *pSubType=nullptr, INetContentTypeParameterList *pParameters=nullptr)
Parse the body of an RFC 2045 Content-Type header field.
Definition: inetmime.cxx:1009
static bool isAtomChar(sal_uInt32 nChar)
Check whether some character is valid within an RFC 822 <atom>.
Definition: inetmime.cxx:921
static bool scanUnsigned(const sal_Unicode *&rBegin, const sal_Unicode *pEnd, bool bLeadingZeroes, sal_uInt32 &rValue)
Definition: inetmime.cxx:986
int nCount
#define DBG_ASSERT(sCon, aError)
Definition: debug.hxx:57
sal_Int16 nValue
std::unordered_map< OString, INetContentTypeParameter > INetContentTypeParameterList
The key is the name of the attribute, in US-ASCII encoding and converted to lower case.
Definition: inetmime.hxx:74
void * p
#define SAL_INFO_IF(condition, area, stream)
int i
HashMap_OWString_Interface aMap
OUString m_aName
const wchar_t *typedef int(__stdcall *DllNativeUnregProc)(int
sal_uInt16 sal_Unicode
sal_Int32 nLength