LibreOffice Module writerfilter (master) 1
rtftokenizer.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#include "rtftokenizer.hxx"
11#include <o3tl/string_view.hxx>
12#include <tools/stream.hxx>
13#include <svx/dialmgr.hxx>
14#include <svx/strings.hrc>
15#include <rtl/strbuf.hxx>
16#include <rtl/character.hxx>
17#include <sal/log.hxx>
19#include <com/sun/star/io/BufferSizeExceededException.hpp>
20#include <com/sun/star/task/XStatusIndicator.hpp>
22
23using namespace com::sun::star;
24
26{
27std::unordered_map<OString, RTFSymbol> RTFTokenizer::s_aRTFControlWords;
29std::vector<RTFMathSymbol> RTFTokenizer::s_aRTFMathControlWords;
31
33 uno::Reference<task::XStatusIndicator> const& xStatusIndicator)
34 : m_rImport(rImport)
35 , m_pInStream(pInStream)
36 , m_xStatusIndicator(xStatusIndicator)
37 , m_nGroup(0)
38 , m_nLineNumber(0)
39 , m_nLineStartPos(0)
40 , m_nGroupStart(0)
41{
43 {
45 for (int i = 0; i < nRTFControlWords; ++i)
46 s_aRTFControlWords.emplace(OString(aRTFControlWords[i].GetKeyword()),
48 }
50 {
52 s_aRTFMathControlWords = std::vector<RTFMathSymbol>(
54 std::sort(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end());
55 }
56}
57
59
61{
62 SAL_INFO("writerfilter.rtf", __func__);
63 char ch;
64 RTFError ret;
65 // for hex chars
66 int b = 0;
67 int count = 2;
68 std::size_t nPercentSize = 0;
69 sal_uInt64 nLastPos = 0;
70
71 if (m_xStatusIndicator.is())
72 {
73 OUString sDocLoad(SvxResId(RID_SVXSTR_DOC_LOAD));
74
75 sal_uInt64 const nCurrentPos = Strm().Tell();
76 sal_uInt64 const nEndPos = nCurrentPos + Strm().remainingSize();
77 m_xStatusIndicator->start(sDocLoad, nEndPos);
78 nPercentSize = nEndPos / 100;
79
80 nLastPos = nCurrentPos;
81 m_xStatusIndicator->setValue(nLastPos);
82 }
83
84 while (Strm().ReadChar(ch), !Strm().eof())
85 {
86 //SAL_INFO("writerfilter", __func__ << ": parsing character '" << ch << "'");
87
88 sal_uInt64 const nCurrentPos = Strm().Tell();
89 if (m_xStatusIndicator.is() && nCurrentPos > (nLastPos + nPercentSize))
90 {
91 nLastPos = nCurrentPos;
92 m_xStatusIndicator->setValue(nLastPos);
93 }
94
95 if (m_nGroup < 0)
98 {
100 if (ret != RTFError::OK)
101 return ret;
102 }
103 else
104 {
105 switch (ch)
106 {
107 case '{':
108 m_nGroupStart = Strm().Tell() - 1;
109 ret = m_rImport.pushState();
110 if (ret != RTFError::OK)
111 return ret;
112 break;
113 case '}':
114 ret = m_rImport.popState();
115 if (ret != RTFError::OK)
116 return ret;
117 if (m_nGroup == 0)
118 {
121 return RTFError::OK;
122 }
123 break;
124 case '\\':
125 ret = resolveKeyword();
126 if (ret != RTFError::OK)
127 return ret;
128 break;
129 case 0x0d:
130 break; // ignore this
131 case 0x0a:
133 m_nLineStartPos = nCurrentPos;
134 break;
135 default:
136 if (m_nGroup == 0)
137 return RTFError::CHAR_OVER;
139 {
141 if (ret != RTFError::OK)
142 return ret;
143 }
144 else
145 {
146 SAL_INFO("writerfilter.rtf", __func__ << ": hex internal state");
147 // Assume that \'<number><junk> means \'0<number>.
148 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch))
149 || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F'))
150 {
151 b = b << 4;
153 if (parsed == -1)
155 b += parsed;
156 }
157 count--;
158 if (!count)
159 {
160 ret = m_rImport.resolveChars(b);
161 if (ret != RTFError::OK)
162 return ret;
163 count = 2;
164 b = 0;
166 }
167 }
168 break;
169 }
170 }
171 }
172
173 if (m_nGroup < 0)
175 if (m_nGroup > 0)
177 return RTFError::OK;
178}
179
181
183
185{
186 char ch;
187
188 Strm().ReadChar(ch);
189 if (Strm().eof())
191
192 if (!rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
193 {
194 // control symbols aren't followed by a space, so we can return here
195 // without doing any SeekRel()
196 return dispatchKeyword(OString(ch), false, 0);
197 }
198 OStringBuffer aBuf(32);
199 while (rtl::isAsciiAlpha(static_cast<unsigned char>(ch)))
200 {
201 aBuf.append(ch);
202 if (aBuf.getLength() > 32)
203 // See RTF spec v1.9.1, page 7
204 // A control word's name cannot be longer than 32 letters.
205 throw io::BufferSizeExceededException();
206 Strm().ReadChar(ch);
207 if (Strm().eof())
208 {
209 ch = ' ';
210 break;
211 }
212 }
213
214 bool bNeg = false;
215 if (ch == '-')
216 {
217 // in case we'll have a parameter, that will be negative
218 bNeg = true;
219 Strm().ReadChar(ch);
220 if (Strm().eof())
222 }
223 bool bParam = false;
224 int nParam = 0;
225 if (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
226 {
227 OStringBuffer aParameter;
228
229 // we have a parameter
230 bParam = true;
231 while (rtl::isAsciiDigit(static_cast<unsigned char>(ch)))
232 {
233 aParameter.append(ch);
234 Strm().ReadChar(ch);
235 if (Strm().eof())
236 {
237 ch = ' ';
238 break;
239 }
240 }
241 nParam = o3tl::toInt32(aParameter);
242 if (bNeg)
243 nParam = -nParam;
244 }
245 if (ch != ' ')
246 Strm().SeekRel(-1);
247 OString aKeyword = aBuf.makeStringAndClear();
248 return dispatchKeyword(aKeyword, bParam, nParam);
249}
250
252{
253 auto low
254 = std::lower_bound(s_aRTFMathControlWords.begin(), s_aRTFMathControlWords.end(), rSymbol);
255 if (low == s_aRTFMathControlWords.end() || rSymbol < *low)
256 return false;
257 rSymbol = *low;
258 return true;
259}
260
261RTFError RTFTokenizer::dispatchKeyword(OString const& rKeyword, bool bParam, int nParam)
262{
264 {
265 // skip binary data explicitly, to not trip over rtf markup
266 // control characters
267 if (rKeyword == "bin" && nParam > 0)
268 Strm().SeekRel(nParam);
269 return RTFError::OK;
270 }
271 SAL_INFO("writerfilter.rtf", __func__ << ": keyword '\\" << rKeyword << "' with param? "
272 << (bParam ? 1 : 0) << " param val: '"
273 << (bParam ? nParam : 0) << "'");
274 auto findIt = s_aRTFControlWords.find(rKeyword);
275 if (findIt == s_aRTFControlWords.end())
276 {
277 SAL_INFO("writerfilter.rtf", __func__ << ": unknown keyword '\\" << rKeyword << "'");
279 aSkip.setParsed(false);
280 return RTFError::OK;
281 }
282
283 RTFError ret;
284 RTFSymbol const& rSymbol = findIt->second;
285 switch (rSymbol.GetControlType())
286 {
288 // flags ignore any parameter by definition
289 ret = m_rImport.dispatchFlag(rSymbol.GetIndex());
290 if (ret != RTFError::OK)
291 return ret;
292 break;
294 // same for destinations
295 ret = m_rImport.dispatchDestination(rSymbol.GetIndex());
296 if (ret != RTFError::OK)
297 return ret;
298 break;
300 // and symbols
301 ret = m_rImport.dispatchSymbol(rSymbol.GetIndex());
302 if (ret != RTFError::OK)
303 return ret;
304 break;
306 ret = m_rImport.dispatchToggle(rSymbol.GetIndex(), bParam, nParam);
307 if (ret != RTFError::OK)
308 return ret;
309 break;
311 if (!bParam)
312 nParam = rSymbol.GetDefValue();
313 ret = m_rImport.dispatchValue(rSymbol.GetIndex(), nParam);
314 if (ret != RTFError::OK)
315 return ret;
316 break;
317 }
318
319 return RTFError::OK;
320}
321
323{
324 return OUString::number(m_nLineNumber + 1) + ","
325 + OUString::number(Strm().Tell() - m_nLineStartPos + 1);
326}
327
328} // namespace writerfilter::rtftok
329
330/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const SvXMLImport & m_rImport
sal_uInt64 Tell() const
SvStream & ReadChar(char &rChar)
sal_uInt64 SeekRel(sal_Int64 nPos)
sal_uInt64 remainingSize()
RTFTokenizer needs a class implementing this interface.
Definition: rtflistener.hxx:41
virtual RTFError resolveChars(char ch)=0
virtual RTFInternalState getInternalState()=0
virtual bool isSubstream() const =0
virtual RTFError pushState()=0
virtual RTFError dispatchSymbol(RTFKeyword nKeyword)=0
virtual Destination getDestination()=0
virtual RTFError popState()=0
virtual RTFError dispatchToggle(RTFKeyword nKeyword, bool bParam, int nParam)=0
virtual RTFError dispatchValue(RTFKeyword nKeyword, int nParam)=0
virtual RTFError dispatchDestination(RTFKeyword nKeyword)=0
virtual RTFError dispatchFlag(RTFKeyword nKeyword)=0
virtual void setInternalState(RTFInternalState nInternalState)=0
Represents an RTF Math Control Word.
Skips a destination after a not parsed control word if it was prefixed with *.
Represents an RTF Control Word.
RTFControlType GetControlType() const
void pushGroup()
To be invoked by the pushState() callback to signal when the importer enters a group.
int m_nGroup
Same as the size of the importer's states, except that this can be negative for invalid input.
static std::vector< RTFMathSymbol > s_aRTFMathControlWords
RTFTokenizer(RTFListener &rImport, SvStream *pInStream, css::uno::Reference< css::task::XStatusIndicator > const &xStatusIndicator)
static bool lookupMathKeyword(RTFMathSymbol &rSymbol)
To look up additional properties of a math symbol.
void popGroup()
To be invoked by the popState() callback to signal when the importer leaves a group.
RTFError dispatchKeyword(OString const &rKeyword, bool bParam, int nParam)
static std::unordered_map< OString, RTFSymbol > s_aRTFControlWords
css::uno::Reference< css::task::XStatusIndicator > const & m_xStatusIndicator
SVXCORE_DLLPUBLIC OUString SvxResId(TranslateId aId)
#define SAL_INFO(area, stream)
aBuf
int i
int AsHex(char ch)
sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix=10)
RTFMathSymbol const aRTFMathControlWords[]
RTFSymbol const aRTFControlWords[]
signed char sal_Int8