22#include <forward_list>
26#include <rtl/ustring.hxx>
27#include <rtl/strbuf.hxx>
28#include <rtl/ustrbuf.hxx>
29#include <rtl/tencinfo.h>
31#include <rtl/character.hxx>
35rtl_TextEncoding getCharsetEncoding(
const char * pBegin,
45bool isWhiteSpace(sal_uInt32 nChar)
47 return nChar ==
'\t' || nChar ==
' ';
59int getBase64Weight(sal_uInt32 nChar)
61 return rtl::isAsciiUpperCase(nChar) ?
int(nChar -
'A') :
62 rtl::isAsciiLowerCase(nChar) ?
int(nChar -
'a' + 26) :
63 rtl::isAsciiDigit(nChar) ?
int(nChar -
'0' + 52) :
66 nChar ==
'=' ? -1 : -2;
69bool startsWithLineFolding(
const sal_Unicode * pBegin,
73 "startsWithLineFolding(): Bad sequence");
75 return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
76 && isWhiteSpace(pBegin[2]);
79rtl_TextEncoding translateFromMIME(rtl_TextEncoding
83 return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
84 RTL_TEXTENCODING_MS_1252 : eEncoding;
90bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
92 return rtl_isOctetTextEncoding(eEncoding);
95std::unique_ptr<sal_Unicode[]> convertToUnicode(
const char * pBegin,
97 rtl_TextEncoding eEncoding,
100 if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
102 rtl_TextToUnicodeConverter hConverter
103 = rtl_createTextToUnicodeConverter(eEncoding);
104 rtl_TextToUnicodeContext hContext
105 = rtl_createTextToUnicodeContext(hConverter);
106 std::unique_ptr<sal_Unicode[]> pBuffer;
108 for (sal_Size nBufferSize = pEnd - pBegin;;
109 nBufferSize += nBufferSize / 3 + 1)
112 sal_Size nSrcCvtBytes;
113 rSize = rtl_convertTextToUnicode(
114 hConverter, hContext, pBegin, pEnd - pBegin, pBuffer.get(),
116 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
117 | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
118 | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
119 &nInfo, &nSrcCvtBytes);
120 if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL)
123 rtl_resetTextToUnicodeContext(hConverter, hContext);
125 rtl_destroyTextToUnicodeContext(hConverter, hContext);
126 rtl_destroyTextToUnicodeConverter(hConverter);
134void writeUTF8(OStringBuffer & rSink, sal_uInt32 nChar)
137 DBG_ASSERT(nChar < 0x80000000,
"writeUTF8(): Bad char");
140 rSink.append(
char(nChar));
141 else if (nChar < 0x800)
142 rSink.append(OStringChar(
char(nChar >> 6 | 0xC0))
143 + OStringChar(
char((nChar & 0x3F) | 0x80)));
144 else if (nChar < 0x10000)
146 OStringChar(
char(nChar >> 12 | 0xE0))
147 + OStringChar(
char((nChar >> 6 & 0x3F) | 0x80))
148 + OStringChar(
char((nChar & 0x3F) | 0x80)));
149 else if (nChar < 0x200000)
151 OStringChar(
char(nChar >> 18 | 0xF0))
152 + OStringChar(
char((nChar >> 12 & 0x3F) | 0x80))
153 + OStringChar(
char((nChar >> 6 & 0x3F) | 0x80))
154 + OStringChar(
char((nChar & 0x3F) | 0x80)));
155 else if (nChar < 0x4000000)
157 OStringChar(
char(nChar >> 24 | 0xF8))
158 + OStringChar(
char((nChar >> 18 & 0x3F) | 0x80))
159 + OStringChar(
char((nChar >> 12 & 0x3F) | 0x80))
160 + OStringChar(
char((nChar >> 6 & 0x3F) | 0x80))
161 + OStringChar(
char((nChar & 0x3F) | 0x80)));
164 OStringChar(
char(nChar >> 30 | 0xFC))
165 + OStringChar(
char((nChar >> 24 & 0x3F) | 0x80))
166 + OStringChar(
char((nChar >> 18 & 0x3F) | 0x80))
167 + OStringChar(
char((nChar >> 12 & 0x3F) | 0x80))
168 + OStringChar(
char((nChar >> 6 & 0x3F) | 0x80))
169 + OStringChar(
char((nChar & 0x3F) | 0x80)));
172bool translateUTF8Char(
const char *& rBegin,
174 sal_uInt32 & rCharacter)
176 if (rBegin == pEnd ||
static_cast< unsigned char >(*rBegin) < 0x80
177 ||
static_cast< unsigned char >(*rBegin) >= 0xFE)
183 const char *
p = rBegin;
184 if (
static_cast< unsigned char >(*p) < 0xE0)
188 nUCS4 =
static_cast< unsigned char >(*p) & 0x1F;
190 else if (
static_cast< unsigned char >(*p) < 0xF0)
194 nUCS4 =
static_cast< unsigned char >(*p) & 0xF;
196 else if (
static_cast< unsigned char >(*p) < 0xF8)
200 nUCS4 =
static_cast< unsigned char >(*p) & 7;
202 else if (
static_cast< unsigned char >(*p) < 0xFC)
206 nUCS4 =
static_cast< unsigned char >(*p) & 3;
212 nUCS4 =
static_cast< unsigned char >(*p) & 1;
217 if ((
static_cast< unsigned char >(*p) & 0xC0) == 0x80)
218 nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
222 if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
230void appendISO88591(OUStringBuffer & rText,
char const * pBegin,
235 OString m_aAttribute;
239 sal_uInt32 m_nSection;
242 bool operator<(
const Parameter& rhs)
const
244 int nComp = m_aAttribute.compareTo(rhs.m_aAttribute);
246 (nComp == 0 && m_nSection < rhs.m_nSection);
250 const OString& rAttribute;
251 const sal_uInt32 nSection;
252 bool operator()(
const Parameter& r)
const
253 {
return r.m_aAttribute == rAttribute && r.m_nSection == nSection; }
257typedef std::forward_list<Parameter> ParameterList;
259bool parseParameters(ParameterList
const & rInput,
264void appendISO88591(OUStringBuffer & rText,
char const * pBegin,
267 sal_Int32
nLength = pEnd - pBegin;
268 std::unique_ptr<sal_Unicode[]> pBuffer(
new sal_Unicode[nLength]);
269 for (
sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
270 *
p++ =
static_cast<unsigned char>(*pBegin++);
271 rText.append(pBuffer.get(),
nLength);
276bool parseParameters(ParameterList
const & rInput,
282 for (
auto it = rInput.begin(), itPrev = rInput.end(); it != rInput.end() ; itPrev = it++)
284 if (it->m_nSection > 0
285 && (itPrev == rInput.end()
286 || itPrev->m_nSection != it->m_nSection - 1
287 || itPrev->m_aAttribute != it->m_aAttribute))
292 for (
auto it = rInput.begin(), itNext = rInput.begin(); it != rInput.end(); it = itNext)
294 bool bCharset = !it->m_aCharset.isEmpty();
295 rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
298 = getCharsetEncoding(it->m_aCharset.getStr(),
299 it->m_aCharset.getStr()
300 + it->m_aCharset.getLength());
301 OUStringBuffer aValue(64);
302 bool bBadEncoding =
false;
307 std::unique_ptr<sal_Unicode[]> pUnicode
308 = convertToUnicode(itNext->m_aValue.getStr(),
309 itNext->m_aValue.getStr()
310 + itNext->m_aValue.getLength(),
311 bCharset && it->m_bExtended ?
313 RTL_TEXTENCODING_UTF8,
315 if (!pUnicode && !(bCharset && it->m_bExtended))
316 pUnicode = convertToUnicode(
317 itNext->m_aValue.getStr(),
318 itNext->m_aValue.getStr()
319 + itNext->m_aValue.getLength(),
320 RTL_TEXTENCODING_ISO_8859_1, nSize);
326 aValue.append(pUnicode.get(),
static_cast<sal_Int32
>(nSize));
329 while (itNext != rInput.end() && itNext->m_nSection != 0);
337 if (itNext->m_bExtended)
339 for (sal_Int32 i = 0;
i < itNext->m_aValue.getLength(); ++
i)
342 static_cast<unsigned char>(itNext->m_aValue[i])
347 for (sal_Int32 i = 0;
i < itNext->m_aValue.getLength(); ++
i)
348 aValue.append( itNext->m_aValue[i] );
352 while (itNext != rInput.end() && itNext->m_nSection != 0);
354 auto const ret = pOutput->insert(
356 {it->m_aCharset, it->m_aLanguage, aValue.makeStringAndClear(), !bBadEncoding}});
358 "INetMIME: dropping duplicate parameter: " << it->m_aAttribute);
371bool isTokenChar(sal_uInt32 nChar)
373 static const bool aMap[128]
374 = {
false,
false,
false,
false,
false,
false,
false,
false,
375 false,
false,
false,
false,
false,
false,
false,
false,
376 false,
false,
false,
false,
false,
false,
false,
false,
377 false,
false,
false,
false,
false,
false,
false,
false,
378 false,
true,
false,
true,
true,
true,
true,
true,
379 false,
false,
true,
true,
false,
true,
true,
false,
380 true,
true,
true,
true,
true,
true,
true,
true,
381 true,
true,
false,
false,
false,
false,
false,
false,
382 false,
true,
true,
true,
true,
true,
true,
true,
383 true,
true,
true,
true,
true,
true,
true,
true,
384 true,
true,
true,
true,
true,
true,
true,
true,
385 true,
true,
true,
false,
false,
false,
true,
true,
386 true,
true,
true,
true,
true,
true,
true,
true,
387 true,
true,
true,
true,
true,
true,
true,
true,
388 true,
true,
true,
true,
true,
true,
true,
true,
389 true,
true,
true,
true,
true,
true,
true,
false
391 return rtl::isAscii(nChar) &&
aMap[nChar];
398 "skipComment(): Bad sequence");
400 if (pBegin != pEnd && *pBegin ==
'(')
402 sal_uInt32 nLevel = 0;
430 "skipLinearWhiteSpaceComment(): Bad sequence");
432 while (pBegin != pEnd)
441 if (startsWithLineFolding(pBegin, pEnd))
466 "skipQuotedString(): Bad sequence");
468 if (pBegin != pEnd && *pBegin ==
'"')
473 if (pEnd - p < 2 || *p++ != 0x0A
474 || !isWhiteSpace(*p++))
498 pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
499 if (pParameterBegin == pEnd || *pParameterBegin !=
';')
501 p = pParameterBegin + 1;
504 = skipLinearWhiteSpaceComment(p, pEnd);
506 bool bDowncaseAttribute =
false;
507 while (p != pEnd && isTokenChar(*p) && *p !=
'*')
509 bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
512 if (p == pAttributeBegin)
514 OString aAttribute(pAttributeBegin, p - pAttributeBegin, RTL_TEXTENCODING_ASCII_US);
515 if (bDowncaseAttribute)
516 aAttribute = aAttribute.toAsciiLowerCase();
518 sal_uInt32 nSection = 0;
519 if (p != pEnd && *p ==
'*')
522 if (p != pEnd && rtl::isAsciiDigit(*p)
527 bool bPresent = std::any_of(aList.begin(), aList.end(),
528 Parameter::IsSameSection{aAttribute, nSection});
532 bool bExtended =
false;
533 if (p != pEnd && *p ==
'*')
539 p = skipLinearWhiteSpaceComment(p, pEnd);
541 if (p == pEnd || *p !=
'=')
544 p = skipLinearWhiteSpaceComment(p + 1, pEnd);
554 bool bDowncaseCharset =
false;
555 while (p != pEnd && isTokenChar(*p) && *p !=
'\'')
557 bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
560 if (p == pCharsetBegin)
567 RTL_TEXTENCODING_ASCII_US);
568 if (bDowncaseCharset)
569 aCharset = aCharset.toAsciiLowerCase();
572 if (p == pEnd || *p !=
'\'')
577 bool bDowncaseLanguage =
false;
579 for (;
p != pEnd; ++
p)
580 if (rtl::isAsciiAlpha(*p))
584 bDowncaseLanguage = bDowncaseLanguage
585 || rtl::isAsciiUpperCase(*p);
595 if (nLetters == 0 || nLetters > 8)
602 RTL_TEXTENCODING_ASCII_US);
603 if (bDowncaseLanguage)
604 aLanguage = aLanguage.toAsciiLowerCase();
607 if (p == pEnd || *p !=
'\'')
618 if (rtl::isAscii(nChar) && !isTokenChar(nChar))
621 if (nChar ==
'%' && p + 1 < pEnd)
625 if (nWeight1 >= 0 && nWeight2 >= 0)
627 aSink.append(
char(nWeight1 << 4 | nWeight2));
632 writeUTF8(aSink, nChar);
634 aValue = aSink.makeStringAndClear();
637 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
640 else if (p != pEnd && *p ==
'"')
643 OStringBuffer aSink(256);
644 bool bInvalid =
false;
655 else if (nChar == 0x0D)
657 if (pEnd - p < 2 || *p++ != 0x0A
658 || !isWhiteSpace(*p))
663 nChar =
static_cast<unsigned char>(*
p++);
665 else if (nChar ==
'\\')
674 writeUTF8(aSink, nChar);
678 aValue = aSink.makeStringAndClear();
682 sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
690 while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
692 if (p == pTokenBegin)
696 pTokenBegin, p - pTokenBegin,
697 RTL_TEXTENCODING_UTF8);
699 aList.emplace_front(Parameter{aAttribute, aCharset, aLanguage, aValue, nSection, bExtended});
702 return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
705bool equalIgnoreCase(
const char * pBegin1,
707 const char * pString2)
709 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
710 "equalIgnoreCase(): Bad sequences");
712 while (*pString2 != 0)
714 || (rtl::toAsciiUpperCase(
static_cast<unsigned char>(*pBegin1++))
715 != rtl::toAsciiUpperCase(
716 static_cast<unsigned char>(*pString2++))))
718 return pBegin1 == pEnd1;
724 rtl_TextEncoding m_eEncoding;
730EncodingEntry
const aEncodingMap[]
731 = { {
"US-ASCII", RTL_TEXTENCODING_ASCII_US },
732 {
"ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
733 {
"ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
734 {
"ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
735 {
"ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
736 {
"ASCII", RTL_TEXTENCODING_ASCII_US },
737 {
"ISO646-US", RTL_TEXTENCODING_ASCII_US },
738 {
"US", RTL_TEXTENCODING_ASCII_US },
739 {
"IBM367", RTL_TEXTENCODING_ASCII_US },
740 {
"CP367", RTL_TEXTENCODING_ASCII_US },
741 {
"CSASCII", RTL_TEXTENCODING_ASCII_US },
742 {
"ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
743 {
"ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
744 {
"ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
745 {
"ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
746 {
"LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
747 {
"L1", RTL_TEXTENCODING_ISO_8859_1 },
748 {
"IBM819", RTL_TEXTENCODING_ISO_8859_1 },
749 {
"CP819", RTL_TEXTENCODING_ISO_8859_1 },
750 {
"CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
751 {
"ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
752 {
"ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
753 {
"ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
754 {
"ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
755 {
"LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
756 {
"L2", RTL_TEXTENCODING_ISO_8859_2 },
757 {
"CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
758 {
"ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
759 {
"ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
760 {
"ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
761 {
"ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
762 {
"LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
763 {
"L3", RTL_TEXTENCODING_ISO_8859_3 },
764 {
"CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
765 {
"ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
766 {
"ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
767 {
"ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
768 {
"ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
769 {
"LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
770 {
"L4", RTL_TEXTENCODING_ISO_8859_4 },
771 {
"CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
772 {
"ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
773 {
"ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
774 {
"ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
775 {
"ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
776 {
"CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
777 {
"CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
778 {
"ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
779 {
"ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
780 {
"ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
781 {
"ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
782 {
"ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
783 {
"ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
784 {
"ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
785 {
"CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
786 {
"ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
787 {
"ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
788 {
"ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
789 {
"ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
790 {
"ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
791 {
"ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
792 {
"GREEK", RTL_TEXTENCODING_ISO_8859_7 },
793 {
"GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
794 {
"CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
795 {
"ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
796 {
"ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
797 {
"ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
798 {
"ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
799 {
"HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
800 {
"CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
801 {
"ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
802 {
"ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
803 {
"ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
804 {
"ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
805 {
"LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
806 {
"L5", RTL_TEXTENCODING_ISO_8859_9 },
807 {
"CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
808 {
"ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 },
809 {
"ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
810 {
"ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 },
811 {
"MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
812 {
"MAC", RTL_TEXTENCODING_APPLE_ROMAN },
813 {
"CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
814 {
"IBM437", RTL_TEXTENCODING_IBM_437 },
815 {
"CP437", RTL_TEXTENCODING_IBM_437 },
816 {
"437", RTL_TEXTENCODING_IBM_437 },
817 {
"CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
818 {
"IBM850", RTL_TEXTENCODING_IBM_850 },
819 {
"CP850", RTL_TEXTENCODING_IBM_850 },
820 {
"850", RTL_TEXTENCODING_IBM_850 },
821 {
"CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
822 {
"IBM860", RTL_TEXTENCODING_IBM_860 },
823 {
"CP860", RTL_TEXTENCODING_IBM_860 },
824 {
"860", RTL_TEXTENCODING_IBM_860 },
825 {
"CSIBM860", RTL_TEXTENCODING_IBM_860 },
826 {
"IBM861", RTL_TEXTENCODING_IBM_861 },
827 {
"CP861", RTL_TEXTENCODING_IBM_861 },
828 {
"861", RTL_TEXTENCODING_IBM_861 },
829 {
"CP-IS", RTL_TEXTENCODING_IBM_861 },
830 {
"CSIBM861", RTL_TEXTENCODING_IBM_861 },
831 {
"IBM863", RTL_TEXTENCODING_IBM_863 },
832 {
"CP863", RTL_TEXTENCODING_IBM_863 },
833 {
"863", RTL_TEXTENCODING_IBM_863 },
834 {
"CSIBM863", RTL_TEXTENCODING_IBM_863 },
835 {
"IBM865", RTL_TEXTENCODING_IBM_865 },
836 {
"CP865", RTL_TEXTENCODING_IBM_865 },
837 {
"865", RTL_TEXTENCODING_IBM_865 },
838 {
"CSIBM865", RTL_TEXTENCODING_IBM_865 },
839 {
"IBM775", RTL_TEXTENCODING_IBM_775 },
840 {
"CP775", RTL_TEXTENCODING_IBM_775 },
841 {
"CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
842 {
"IBM852", RTL_TEXTENCODING_IBM_852 },
843 {
"CP852", RTL_TEXTENCODING_IBM_852 },
844 {
"852", RTL_TEXTENCODING_IBM_852 },
845 {
"CSPCP852", RTL_TEXTENCODING_IBM_852 },
846 {
"IBM855", RTL_TEXTENCODING_IBM_855 },
847 {
"CP855", RTL_TEXTENCODING_IBM_855 },
848 {
"855", RTL_TEXTENCODING_IBM_855 },
849 {
"CSIBM855", RTL_TEXTENCODING_IBM_855 },
850 {
"IBM857", RTL_TEXTENCODING_IBM_857 },
851 {
"CP857", RTL_TEXTENCODING_IBM_857 },
852 {
"857", RTL_TEXTENCODING_IBM_857 },
853 {
"CSIBM857", RTL_TEXTENCODING_IBM_857 },
854 {
"IBM862", RTL_TEXTENCODING_IBM_862 },
855 {
"CP862", RTL_TEXTENCODING_IBM_862 },
856 {
"862", RTL_TEXTENCODING_IBM_862 },
857 {
"CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
858 {
"IBM864", RTL_TEXTENCODING_IBM_864 },
859 {
"CP864", RTL_TEXTENCODING_IBM_864 },
860 {
"CSIBM864", RTL_TEXTENCODING_IBM_864 },
861 {
"IBM866", RTL_TEXTENCODING_IBM_866 },
862 {
"CP866", RTL_TEXTENCODING_IBM_866 },
863 {
"866", RTL_TEXTENCODING_IBM_866 },
864 {
"CSIBM866", RTL_TEXTENCODING_IBM_866 },
865 {
"IBM869", RTL_TEXTENCODING_IBM_869 },
866 {
"CP869", RTL_TEXTENCODING_IBM_869 },
867 {
"869", RTL_TEXTENCODING_IBM_869 },
868 {
"CP-GR", RTL_TEXTENCODING_IBM_869 },
869 {
"CSIBM869", RTL_TEXTENCODING_IBM_869 },
870 {
"WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
871 {
"WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
872 {
"WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
873 {
"WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
874 {
"WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
875 {
"WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
876 {
"WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
877 {
"WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
878 {
"SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
879 {
"MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
880 {
"CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
881 {
"GB2312", RTL_TEXTENCODING_GB_2312 },
882 {
"CSGB2312", RTL_TEXTENCODING_GB_2312 },
883 {
"BIG5", RTL_TEXTENCODING_BIG5 },
884 {
"CSBIG5", RTL_TEXTENCODING_BIG5 },
885 {
"EUC-JP", RTL_TEXTENCODING_EUC_JP },
886 {
"EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
887 RTL_TEXTENCODING_EUC_JP },
888 {
"CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
889 {
"ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
890 {
"CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
891 {
"ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
892 {
"KOI8-R", RTL_TEXTENCODING_KOI8_R },
893 {
"CSKOI8R", RTL_TEXTENCODING_KOI8_R },
894 {
"UTF-7", RTL_TEXTENCODING_UTF7 },
895 {
"UTF-8", RTL_TEXTENCODING_UTF8 },
896 {
"ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 },
897 {
"ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 },
898 {
"EUC-KR", RTL_TEXTENCODING_EUC_KR },
899 {
"CSEUCKR", RTL_TEXTENCODING_EUC_KR },
900 {
"ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
901 {
"CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
902 {
"ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
903 {
"CSUCS4", RTL_TEXTENCODING_UCS4 },
904 {
"ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
905 {
"CSUNICODE", RTL_TEXTENCODING_UCS2 } };
907rtl_TextEncoding getCharsetEncoding(
char const * pBegin,
910 for (
const EncodingEntry& i : aEncodingMap)
911 if (equalIgnoreCase(pBegin, pEnd,
i.m_aName))
912 return i.m_eEncoding;
913 return RTL_TEXTENCODING_DONTKNOW;
923 static const bool aMap[128]
924 = {
false,
false,
false,
false,
false,
false,
false,
false,
925 false,
false,
false,
false,
false,
false,
false,
false,
926 false,
false,
false,
false,
false,
false,
false,
false,
927 false,
false,
false,
false,
false,
false,
false,
false,
928 false,
true,
false,
true,
true,
true,
true,
true,
929 false,
false,
true,
true,
false,
true,
false,
true,
930 true,
true,
true,
true,
true,
true,
true,
true,
931 true,
true,
false,
false,
false,
true,
false,
true,
932 false,
true,
true,
true,
true,
true,
true,
true,
933 true,
true,
true,
true,
true,
true,
true,
true,
934 true,
true,
true,
true,
true,
true,
true,
true,
935 true,
true,
true,
false,
false,
false,
true,
true,
936 true,
true,
true,
true,
true,
true,
true,
true,
937 true,
true,
true,
true,
true,
true,
true,
true,
938 true,
true,
true,
true,
true,
true,
true,
true,
939 true,
true,
true,
true,
true,
true,
true,
false
941 return rtl::isAscii(nChar) &&
aMap[nChar];
947 static const bool aMap[128]
948 = {
false,
false,
false,
false,
false,
false,
false,
false,
949 false,
false,
false,
false,
false,
false,
false,
false,
950 false,
false,
false,
false,
false,
false,
false,
false,
951 false,
false,
false,
false,
false,
false,
false,
false,
952 false,
true,
false,
true,
true,
false,
true,
true,
953 false,
false,
false,
true,
true,
true,
true,
true,
954 true,
true,
true,
true,
true,
true,
true,
true,
955 true,
true,
true,
true,
true,
true,
true,
true,
956 true,
true,
true,
true,
true,
true,
true,
true,
957 true,
true,
true,
true,
true,
true,
true,
true,
958 true,
true,
true,
true,
true,
true,
true,
true,
959 true,
true,
true,
true,
false,
true,
true,
true,
960 true,
true,
true,
true,
true,
true,
true,
true,
961 true,
true,
true,
true,
true,
true,
true,
true,
962 true,
true,
true,
true,
true,
true,
true,
true,
963 true,
true,
true,
false,
true,
true,
true,
false
965 return rtl::isAscii(nChar) &&
aMap[nChar];
971 const char * pString2)
973 DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
974 "INetMIME::equalIgnoreCase(): Bad sequences");
976 while (*pString2 != 0)
978 || (rtl::toAsciiUpperCase(*pBegin1++)
979 != rtl::toAsciiUpperCase(
980 static_cast<unsigned char>(*pString2++))))
982 return pBegin1 == pEnd1;
990 sal_uInt64 nTheValue = 0;
992 for ( ;
p != pEnd; ++
p)
997 nTheValue = 10 * nTheValue + nWeight;
998 if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
1001 if (nTheValue == 0 && (
p == rBegin || (!bLeadingZeroes &&
p - rBegin != 1)))
1004 rValue = sal_uInt32(nTheValue);
1010 std::u16string_view rStr, OUString * pType,
1015 sal_Unicode const *
p = skipLinearWhiteSpaceComment(pBegin, pEnd);
1017 while (
p != pEnd && isTokenChar(*
p))
1021 if (
p == pTypeBegin)
1025 p = skipLinearWhiteSpaceComment(
p, pEnd);
1026 if (
p == pEnd || *
p++ !=
'/')
1029 p = skipLinearWhiteSpaceComment(
p, pEnd);
1031 while (
p != pEnd && isTokenChar(*
p))
1035 if (
p == pSubTypeBegin)
1039 if (pType !=
nullptr)
1041 *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
1043 if (pSubType !=
nullptr)
1045 *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
1046 .toAsciiLowerCase();
1049 return scanParameters(
p, pEnd, pParameters);
1075 const char * pBegin = rBody.getStr();
1076 const char * pEnd = pBegin + rBody.getLength();
1078 OUStringBuffer sDecoded;
1079 const char * pCopyBegin = pBegin;
1082 const char * pWSPBegin = pBegin;
1084 for (
const char *
p = pBegin;
p != pEnd;)
1088 const char * q =
p + 1;
1089 bool bEncodedWord = q != pEnd && *q++ ==
'?';
1091 rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
1094 const char * pCharsetBegin = q;
1095 const char * pLanguageBegin =
nullptr;
1096 int nAlphaCount = 0;
1097 for (
bool bDone =
false; !bDone;)
1100 bEncodedWord =
false;
1109 pLanguageBegin = q - 1;
1114 if (pLanguageBegin !=
nullptr)
1116 if (nAlphaCount == 0)
1117 pLanguageBegin =
nullptr;
1124 if (pCharsetBegin == q - 1)
1125 bEncodedWord =
false;
1129 = getCharsetEncoding(
1131 pLanguageBegin ==
nullptr
1132 || nAlphaCount == 0 ?
1133 q - 1 : pLanguageBegin);
1134 bEncodedWord = isMIMECharsetEncoding(
1137 = translateFromMIME(eCharsetEncoding);
1143 if (pLanguageBegin !=
nullptr
1144 && (!rtl::isAsciiAlpha(
1145 static_cast<unsigned char>(cChar))
1146 || ++nAlphaCount > 8))
1147 pLanguageBegin =
nullptr;
1153 bool bEncodingB =
false;
1157 bEncodedWord =
false;
1173 bEncodedWord =
false;
1179 bEncodedWord = bEncodedWord && q != pEnd && *q++ ==
'?';
1181 OStringBuffer sText;
1186 for (
bool bDone =
false; !bDone;)
1190 bEncodedWord =
false;
1195 bool bFinal =
false;
1198 for (
int nShift = 18; nShift >= 0; nShift -= 6)
1200 int nWeight = getBase64Weight(*q++);
1203 bEncodedWord =
false;
1213 bEncodedWord =
false;
1218 nCount = nShift == 6 ? 1 : 2;
1222 nValue |= nWeight << nShift;
1226 for (
int nShift = 16;
nCount-- > 0; nShift -= 8)
1227 sText.append(
char(
nValue >> nShift & 0xFF));
1233 if (bFinal && !bDone)
1235 bEncodedWord =
false;
1244 const char * pEncodedTextBegin = q;
1245 const char * pEncodedTextCopyBegin = q;
1246 for (
bool bDone =
false; !bDone;)
1249 bEncodedWord =
false;
1254 sal_uInt32 nChar =
static_cast<unsigned char>(*q++);
1261 bEncodedWord =
false;
1267 if (nDigit1 < 0 || nDigit2 < 0)
1269 bEncodedWord =
false;
1275 (pEncodedTextCopyBegin - pBegin),
1276 (q - 1 - pEncodedTextCopyBegin))
1277 + OStringChar(
char(nDigit1 << 4 | nDigit2)));
1279 pEncodedTextCopyBegin = q;
1284 if (q - pEncodedTextBegin > 1)
1285 sText.append(rBody.subView(
1286 (pEncodedTextCopyBegin - pBegin),
1287 (q - 1 - pEncodedTextCopyBegin)));
1289 bEncodedWord =
false;
1296 (pEncodedTextCopyBegin - pBegin),
1297 (q - 1 - pEncodedTextCopyBegin))
1298 + OString::Concat(
" "));
1299 pEncodedTextCopyBegin = q;
1305 bEncodedWord =
false;
1314 bEncodedWord = bEncodedWord && q != pEnd && *q++ ==
'=';
1316 std::unique_ptr<sal_Unicode[]> pUnicodeBuffer;
1317 sal_Size nUnicodeSize = 0;
1321 = convertToUnicode(sText.getStr(),
1322 sText.getStr() + sText.getLength(),
1323 eCharsetEncoding, nUnicodeSize);
1324 if (!pUnicodeBuffer)
1325 bEncodedWord =
false;
1330 appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
1332 pUnicodeBuffer.get(),
1333 static_cast< sal_Int32
>(nUnicodeSize));
1334 pUnicodeBuffer.reset();
1339 while (
p != pEnd && isWhiteSpace(*
p))
1365 const char * pUTF8Begin =
p - 1;
1366 const char * pUTF8End = pUTF8Begin;
1367 sal_uInt32 nCharacter = 0;
1368 if (translateUTF8Char(pUTF8End, pEnd, nCharacter))
1370 appendISO88591(sDecoded, pCopyBegin,
p - 1);
1371 sDecoded.appendUtf32(nCharacter);
1382 appendISO88591(sDecoded, pCopyBegin, pEnd);
1383 return sDecoded.makeStringAndClear();
bool operator<(const BigInt &rVal1, const BigInt &rVal2)
static bool isVisible(sal_uInt32 nChar)
Check for US-ASCII visible character.
static sal_uInt32 getUTF32Character(const sal_Unicode *&rBegin, const sal_Unicode *pEnd)
Get the UTF-32 character at the head of a UTF-16 encoded string.
static OUString decodeHeaderFieldBody(const OString &rBody)
static bool equalIgnoreCase(const sal_Unicode *pBegin1, const sal_Unicode *pEnd1, const char *pString2)
Check two US-ASCII strings for equality, ignoring case.
static int getHexWeight(sal_uInt32 nChar)
Get the hexadecimal digit weight of a US-ASCII character.
static int getWeight(sal_uInt32 nChar)
Get the digit weight of a US-ASCII character.
static bool isIMAPAtomChar(sal_uInt32 nChar)
Check whether some character is valid within an RFC 2060 <atom>.
static sal_Unicode const * scanContentType(std::u16string_view rStr, OUString *pType=nullptr, OUString *pSubType=nullptr, INetContentTypeParameterList *pParameters=nullptr)
Parse the body of an RFC 2045 Content-Type header field.
static bool isAtomChar(sal_uInt32 nChar)
Check whether some character is valid within an RFC 822 <atom>.
static bool scanUnsigned(const sal_Unicode *&rBegin, const sal_Unicode *pEnd, bool bLeadingZeroes, sal_uInt32 &rValue)
#define DBG_ASSERT(sCon, aError)
std::unordered_map< OString, INetContentTypeParameter > INetContentTypeParameterList
The key is the name of the attribute, in US-ASCII encoding and converted to lower case.
#define SAL_INFO_IF(condition, area, stream)
HashMap_OWString_Interface aMap
const wchar_t *typedef int(__stdcall *DllNativeUnregProc)(int