LibreOffice Module svl (master) 1
adrparse.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <rtl/ustrbuf.hxx>
21#include <svl/adrparse.hxx>
22
23namespace
24{
25
26enum ElementType { ELEMENT_START, ELEMENT_DELIM, ELEMENT_ITEM, ELEMENT_END };
27
28struct ParsedAddrSpec
29{
30 sal_Unicode const * m_pBegin;
31 sal_Unicode const * m_pEnd;
32 ElementType m_eLastElem;
33 bool m_bAtFound;
34 bool m_bReparse;
35
36 ParsedAddrSpec() { reset(); }
37
38 bool isPoorlyValid() const { return m_eLastElem >= ELEMENT_ITEM; }
39
40 bool isValid() const { return isPoorlyValid() && m_bAtFound; }
41
42 void reset();
43
44 void finish();
45};
46
47void ParsedAddrSpec::reset()
48{
49 m_pBegin = nullptr;
50 m_pEnd = nullptr;
51 m_eLastElem = ELEMENT_START;
52 m_bAtFound = false;
53 m_bReparse = false;
54}
55
56void ParsedAddrSpec::finish()
57{
58 if (isPoorlyValid())
59 m_eLastElem = ELEMENT_END;
60 else
61 reset();
62}
63
64}
65
67{
69
70 enum TokenType: sal_uInt32 {
72
75 sal_uInt32 m_nCurToken;
78 ParsedAddrSpec m_aOuterAddrSpec;
79 ParsedAddrSpec m_aInnerAddrSpec;
80 ParsedAddrSpec * m_pAddrSpec;
83
84 inline void reset();
85
86 void addTokenToAddrSpec(ElementType eTokenElem);
87
88 bool readToken();
89
90 static OUString reparse(sal_Unicode const * pBegin,
91 sal_Unicode const * pEnd);
92
93public:
94 SvAddressParser_Impl(SvAddressParser * pParser, const OUString& rIn);
95};
96
98{
99 m_aOuterAddrSpec.reset();
100 m_aInnerAddrSpec.reset();
104}
105
107{
108 if (!m_pAddrSpec->m_pBegin)
109 m_pAddrSpec->m_pBegin = m_pCurTokenBegin;
110 else if (m_pAddrSpec->m_pEnd < m_pCurTokenBegin)
111 m_pAddrSpec->m_bReparse = true;
112 m_pAddrSpec->m_pEnd = m_pCurTokenEnd;
113 m_pAddrSpec->m_eLastElem = eTokenElem;
114}
115
116
117// SvAddressParser_Impl
118
119
121{
123 switch (m_eType)
124 {
125 case TOKEN_QUOTED:
126 {
128 bool bEscaped = false;
129 for (;;)
130 {
132 return false;
133 sal_Unicode cChar = *m_pInputPos++;
134 if (bEscaped)
135 {
136 bEscaped = false;
137 }
138 else if (cChar == '"')
139 {
141 return true;
142 }
143 else if (cChar == '\\')
144 bEscaped = true;
145 }
146 }
147
148 case TOKEN_DOMAIN:
149 {
151 bool bEscaped = false;
152 for (;;)
153 {
155 return false;
156 sal_Unicode cChar = *m_pInputPos++;
157 if (bEscaped)
158 bEscaped = false;
159 else if (cChar == ']')
160 {
162 return true;
163 }
164 else if (cChar == '\\')
165 bEscaped = true;
166 }
167 }
168
169 case TOKEN_COMMENT:
170 {
172 bool bEscaped = false;
173 int nLevel = 0;
174 for (;;)
175 {
177 return false;
178 sal_Unicode cChar = *m_pInputPos++;
179 if (bEscaped)
180 {
181 bEscaped = false;
182 }
183 else if (cChar == '(')
184 {
185 ++nLevel;
186 }
187 else if (cChar == ')')
188 if (nLevel)
189 {
190 --nLevel;
191 }
192 else
193 return true;
194 else if (cChar == '\\')
195 {
196 bEscaped = true;
197 }
198 }
199 }
200
201 default:
202 {
203 sal_Unicode cChar;
204 for (;;)
205 {
207 return false;
208 cChar = *m_pInputPos++;
209 if (cChar > ' ' && cChar != 0x7F) // DEL
210 break;
211 }
213 if (cChar == '"' || cChar == '(' || cChar == ')' || cChar == ','
214 || cChar == '.' || cChar == ':' || cChar == ';'
215 || cChar == '<' || cChar == '>' || cChar == '@'
216 || cChar == '[' || cChar == '\\' || cChar == ']')
217 {
218 m_nCurToken = cChar;
220 return true;
221 }
222 else
223 for (;;)
224 {
226 {
228 return true;
229 }
230 cChar = *m_pInputPos++;
231 if (cChar <= ' ' || cChar == '"' || cChar == '('
232 || cChar == ')' || cChar == ',' || cChar == '.'
233 || cChar == ':' || cChar == ';' || cChar == '<'
234 || cChar == '>' || cChar == '@' || cChar == '['
235 || cChar == '\\' || cChar == ']'
236 || cChar == 0x7F) // DEL
237 {
239 return true;
240 }
241 }
242 }
243 }
244}
245
246// static
248 sal_Unicode const * pEnd)
249{
250 OUStringBuffer aResult;
252 bool bEscaped = false;
253 int nLevel = 0;
254 while (pBegin < pEnd)
255 {
256 sal_Unicode cChar = *pBegin++;
257 switch (eMode)
258 {
259 case TOKEN_QUOTED:
260 if (bEscaped)
261 {
262 aResult.append(cChar);
263 bEscaped = false;
264 }
265 else if (cChar == '"')
266 {
267 aResult.append(cChar);
269 }
270 else if (cChar == '\\')
271 {
272 aResult.append(cChar);
273 bEscaped = true;
274 }
275 else
276 aResult.append(cChar);
277 break;
278
279 case TOKEN_DOMAIN:
280 if (bEscaped)
281 {
282 aResult.append(cChar);
283 bEscaped = false;
284 }
285 else if (cChar == ']')
286 {
287 aResult.append(cChar);
289 }
290 else if (cChar == '\\')
291 {
292 aResult.append(cChar);
293 bEscaped = true;
294 }
295 else
296 aResult.append(cChar);
297 break;
298
299 case TOKEN_COMMENT:
300 if (bEscaped)
301 bEscaped = false;
302 else if (cChar == '(')
303 ++nLevel;
304 else if (cChar == ')')
305 if (nLevel)
306 --nLevel;
307 else
309 else if (cChar == '\\')
310 bEscaped = true;
311 break;
312
313 case TOKEN_ATOM:
314 if (cChar <= ' ' || cChar == 0x7F) // DEL
315 {
316 }
317 else if (cChar == '(')
318 {
320 }
321 else
322 {
323 if (cChar == '"')
324 {
325 aResult.append(cChar);
327 }
328 else if (cChar == '[')
329 {
330 aResult.append(cChar);
332 }
333 else
334 aResult.append(cChar);
335 }
336 break;
337 }
338 }
339 return aResult.makeStringAndClear();
340}
341
343 const OUString& rInput)
344 : m_pCurTokenBegin(nullptr)
345 , m_pCurTokenEnd(nullptr)
346{
347 m_pInputPos = rInput.getStr();
348 m_pInputEnd = m_pInputPos + rInput.getLength();
349
350 reset();
351 bool bDone = false;
352 for (;;)
353 {
354 if (!readToken())
355 {
356 if (m_eState == AFTER_LESS)
357 m_nCurToken = '>';
358 else
359 {
360 m_nCurToken = ',';
361 bDone = true;
362 }
363 }
364 switch (m_nCurToken)
365 {
366 case TOKEN_QUOTED:
367 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
368 {
369 if (m_pAddrSpec->m_bAtFound
370 || m_pAddrSpec->m_eLastElem <= ELEMENT_DELIM)
371 m_pAddrSpec->reset();
372 addTokenToAddrSpec(ELEMENT_ITEM);
373 }
375 break;
376
377 case TOKEN_DOMAIN:
378 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
379 {
380 if (m_pAddrSpec->m_bAtFound && m_pAddrSpec->m_eLastElem == ELEMENT_DELIM)
381 addTokenToAddrSpec(ELEMENT_ITEM);
382 else
383 m_pAddrSpec->reset();
384 }
386 break;
387
388 case TOKEN_COMMENT:
390 break;
391
392 case TOKEN_ATOM:
393 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
394 {
395 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
396 m_pAddrSpec->reset();
397 addTokenToAddrSpec(ELEMENT_ITEM);
398 }
399 break;
400
401 case '(':
403 break;
404
405 case ')':
406 case '\\':
407 case ']':
408 m_pAddrSpec->finish();
409 break;
410
411 case '<':
412 switch (m_eState)
413 {
414 case BEFORE_COLON:
415 case BEFORE_LESS:
416 m_aOuterAddrSpec.finish();
419 break;
420
421 case AFTER_LESS:
422 m_aInnerAddrSpec.finish();
423 break;
424
425 case AFTER_GREATER:
426 m_aOuterAddrSpec.finish();
427 break;
428 }
429 break;
430
431 case '>':
432 if (m_eState == AFTER_LESS)
433 {
434 m_aInnerAddrSpec.finish();
435 if (m_aInnerAddrSpec.isValid())
436 m_aOuterAddrSpec.m_eLastElem = ELEMENT_END;
439 }
440 else
441 {
442 m_aOuterAddrSpec.finish();
443 }
444 break;
445
446 case '@':
447 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
448 {
449 if (!m_pAddrSpec->m_bAtFound
450 && m_pAddrSpec->m_eLastElem == ELEMENT_ITEM)
451 {
452 addTokenToAddrSpec(ELEMENT_DELIM);
453 m_pAddrSpec->m_bAtFound = true;
454 }
455 else
456 m_pAddrSpec->reset();
457 }
458 break;
459
460 case ',':
461 case ';':
462 if (m_eState == AFTER_LESS)
463 if (m_nCurToken == ',')
464 {
465 if (m_aInnerAddrSpec.m_eLastElem != ELEMENT_END)
466 m_aInnerAddrSpec.reset();
467 }
468 else
469 m_aInnerAddrSpec.finish();
470 else
471 {
472 if(m_aInnerAddrSpec.isValid() || (!m_aOuterAddrSpec.isValid() && m_aInnerAddrSpec.isPoorlyValid()))
473 {
475 }
476 else if(m_aOuterAddrSpec.isPoorlyValid())
477 {
479 }
480 else
481 {
482 m_pAddrSpec = nullptr;
483 }
484
485 if (m_pAddrSpec)
486 {
487 OUString aTheAddrSpec;
488 if (m_pAddrSpec->m_bReparse)
489 aTheAddrSpec = reparse(m_pAddrSpec->m_pBegin, m_pAddrSpec->m_pEnd);
490 else
491 {
492 sal_Int32 nLen = m_pAddrSpec->m_pEnd - m_pAddrSpec->m_pBegin;
493 if (nLen == rInput.getLength())
494 aTheAddrSpec = rInput;
495 else
496 aTheAddrSpec = rInput.copy( (m_pAddrSpec->m_pBegin - rInput.getStr()),
497 nLen);
498 }
499 pParser->m_vAddresses.emplace_back( aTheAddrSpec );
500 }
501 if (bDone)
502 return;
503 reset();
504 }
505 break;
506
507 case ':':
508 switch (m_eState)
509 {
510 case BEFORE_COLON:
511 m_aOuterAddrSpec.reset();
513 break;
514
515 case BEFORE_LESS:
516 case AFTER_GREATER:
517 m_aOuterAddrSpec.finish();
518 break;
519
520 case AFTER_LESS:
521 m_aInnerAddrSpec.reset();
522 break;
523 }
524 break;
525
526 case '"':
528 break;
529
530 case '.':
531 if (m_pAddrSpec->m_eLastElem != ELEMENT_END)
532 {
533 if (m_pAddrSpec->m_eLastElem != ELEMENT_DELIM)
534 addTokenToAddrSpec(ELEMENT_DELIM);
535 else
536 m_pAddrSpec->reset();
537 }
538 break;
539
540 case '[':
542 break;
543 }
544 }
545}
546
547SvAddressParser::SvAddressParser(const OUString& rInput)
548{
549 SvAddressParser_Impl aDoParse(this, rInput);
550}
551
553{
554}
555
556/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static OUString reparse(sal_Unicode const *pBegin, sal_Unicode const *pEnd)
Definition: adrparse.cxx:247
ParsedAddrSpec m_aOuterAddrSpec
Definition: adrparse.cxx:78
sal_Unicode const * m_pInputPos
Definition: adrparse.cxx:73
sal_Unicode const * m_pCurTokenBegin
Definition: adrparse.cxx:76
ParsedAddrSpec m_aInnerAddrSpec
Definition: adrparse.cxx:79
void addTokenToAddrSpec(ElementType eTokenElem)
Definition: adrparse.cxx:106
SvAddressParser_Impl(SvAddressParser *pParser, const OUString &rIn)
Definition: adrparse.cxx:342
sal_uInt32 m_nCurToken
Definition: adrparse.cxx:75
sal_Unicode const * m_pCurTokenEnd
Definition: adrparse.cxx:77
sal_Unicode const * m_pInputEnd
Definition: adrparse.cxx:74
ParsedAddrSpec * m_pAddrSpec
Definition: adrparse.cxx:80
SvAddressParser(const OUString &rInput)
Definition: adrparse.cxx:547
::std::vector< OUString > m_vAddresses
Definition: adrparse.hxx:33
Mode eMode
ElementType
sal_uInt16 sal_Unicode