LibreOffice Module sw (master) 1
parcss1.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <o3tl/string_view.hxx>
21#include <osl/diagnose.h>
22#include <rtl/character.hxx>
23#include <rtl/ustrbuf.hxx>
24#include <tools/color.hxx>
25#include <tools/solar.h>
26#include <svtools/htmltokn.h>
27#include <comphelper/string.hxx>
28#include "parcss1.hxx"
29
30// Loop-Check: Used to avoid infinite loops, is checked after every
31// loop, if there is progress of the input position
32#define LOOP_CHECK
33
34#ifdef LOOP_CHECK
35
36#define LOOP_CHECK_DECL \
37 sal_Int32 nOldInPos = SAL_MAX_INT32;
38#define LOOP_CHECK_RESTART \
39 nOldInPos = SAL_MAX_INT32;
40#define LOOP_CHECK_CHECK( where ) \
41 OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where ); \
42 if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) ) \
43 break; \
44 else \
45 nOldInPos = m_nInPos;
46
47#else
48
49#define LOOP_CHECK_DECL
50#define LOOP_CHECK_RESTART
51#define LOOP_CHECK_CHECK( where )
52
53#endif
54
55const sal_Int32 MAX_LEN = 1024;
56
57void CSS1Parser::InitRead( const OUString& rIn )
58{
59 m_nlLineNr = 0;
60 m_nlLinePos = 0;
61
62 m_bWhiteSpace = true; // if nothing was read it's like there was WS
63 m_bEOF = false;
65 m_nValue = 0.;
66
67 m_aIn = rIn;
68 m_nInPos = 0;
71}
72
74{
75 if( m_nInPos >= m_aIn.getLength() )
76 {
77 m_bEOF = true;
78 return sal_Unicode(EOF);
79 }
80
82 m_nInPos++;
83
84 if( c == '\n' )
85 {
86 ++m_nlLineNr;
87 m_nlLinePos = 1;
88 }
89 else
91
92 return c;
93}
94
95// This function implements the scanner described in
96
97// http://www.w3.org/pub/WWW/TR/WD-css1.html
98// resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
99
100// for CSS1. It's a direct implementation of the
101// described Lex grammar.
102
104{
105 CSS1Token nRet = CSS1_NULL;
106 m_aToken.clear();
107
108 do {
109 // remember if white space was read
110 bool bPrevWhiteSpace = m_bWhiteSpace;
111 m_bWhiteSpace = false;
112
113 bool bNextCh = true;
114 switch( m_cNextCh )
115 {
116 case '/': // COMMENT | '/'
117 {
119 if( '*' == m_cNextCh )
120 {
121 // COMMENT
123
124 bool bAsterisk = false;
125 while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
126 {
127 bAsterisk = ('*'==m_cNextCh);
129 }
130 }
131 else
132 {
133 // '/'
134 bNextCh = false;
135 nRet = CSS1_SLASH;
136 }
137 }
138 break;
139
140 case '@': // '@import' | '@XXX'
141 {
143 if (rtl::isAsciiAlpha(m_cNextCh))
144 {
145 // scan the next identifier
146 OUStringBuffer sTmpBuffer(32);
147 do {
148 sTmpBuffer.append( m_cNextCh );
150 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
151 '-' == m_cNextCh) && !IsEOF() );
152
153 m_aToken += sTmpBuffer;
154
155 // check if we know it
156 switch( m_aToken[0] )
157 {
158 case 'i':
159 case 'I':
160 if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
161 nRet = CSS1_IMPORT_SYM;
162 break;
163 case 'p':
164 case 'P':
165 if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
166 nRet = CSS1_PAGE_SYM;
167 break;
168 }
169
170 // error handling: ignore '@indent' and the rest until
171 // semicolon at end of the next block
172 if( CSS1_NULL==nRet )
173 {
174 m_aToken.clear();
175 int nBlockLvl = 0;
176 sal_Unicode cQuoteCh = 0;
177 bool bDone = false, bEscape = false;
178 while( !bDone && !IsEOF() )
179 {
180 bool bOldEscape = bEscape;
181 bEscape = false;
182 switch( m_cNextCh )
183 {
184 case '{':
185 if( !cQuoteCh && !bOldEscape )
186 nBlockLvl++;
187 break;
188 case ';':
189 if( !cQuoteCh && !bOldEscape )
190 bDone = nBlockLvl==0;
191 break;
192 case '}':
193 if( !cQuoteCh && !bOldEscape )
194 bDone = --nBlockLvl==0;
195 break;
196 case '\"':
197 case '\'':
198 if( !bOldEscape )
199 {
200 if( cQuoteCh )
201 {
202 if( cQuoteCh == m_cNextCh )
203 cQuoteCh = 0;
204 }
205 else
206 {
207 cQuoteCh = m_cNextCh;
208 }
209 }
210 break;
211 case '\\':
212 if( !bOldEscape )
213 bEscape = true;
214 break;
215 }
217 }
218 }
219
220 bNextCh = false;
222 }
223 break;
224
225 case '!': // '!' 'legal' | '!' 'important' | syntax error
226 {
227 // ignore white space
229 while( ( ' ' == m_cNextCh ||
230 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
231 {
232 m_bWhiteSpace = true;
234 }
235
236 if( 'i'==m_cNextCh || 'I'==m_cNextCh)
237 {
238 // scan next identifier
239 OUStringBuffer sTmpBuffer(32);
240 do {
241 sTmpBuffer.append( m_cNextCh );
243 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
244 '-' == m_cNextCh) && !IsEOF() );
245
246 m_aToken += sTmpBuffer;
247
248 if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
249 m_aToken.equalsIgnoreAsciiCase( "important" ) )
250 {
251 // '!' 'important'
252 nRet = CSS1_IMPORTANT_SYM;
253 }
254 else
255 {
256 // error handling: ignore '!', not IDENT
257 nRet = CSS1_IDENT;
258 }
259
260 m_bWhiteSpace = false;
261 bNextCh = false;
262 }
263 else
264 {
265 // error handling: ignore '!'
266 bNextCh = false;
267 }
268 }
269 break;
270
271 case '\"':
272 case '\'': // STRING
273 {
274 // \... isn't possible yet!!!
275 sal_Unicode cQuoteChar = m_cNextCh;
277
278 OUStringBuffer sTmpBuffer( MAX_LEN );
279 do {
280 sTmpBuffer.append( m_cNextCh );
282 } while( cQuoteChar != m_cNextCh && !IsEOF() );
283
284 m_aToken += sTmpBuffer;
285
286 nRet = CSS1_STRING;
287 }
288 break;
289
290 case '0':
291 case '1':
292 case '2':
293 case '3':
294 case '4':
295 case '5':
296 case '6':
297 case '7':
298 case '8':
299 case '9': // NUMBER | PERCENTAGE | LENGTH
300 {
301 // save current position
302 std::size_t nInPosSave = m_nInPos;
303 sal_Unicode cNextChSave = m_cNextCh;
304 sal_uInt32 nlLineNrSave = m_nlLineNr;
305 sal_uInt32 nlLinePosSave = m_nlLinePos;
306 bool bEOFSave = m_bEOF;
307
308 // first try to parse a hex digit
309 OUStringBuffer sTmpBuffer( 16 );
310 do {
311 sTmpBuffer.append( m_cNextCh );
313 } while( sTmpBuffer.getLength() < 7 &&
314 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
315 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
316 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
317 !IsEOF() );
318
319 if( sTmpBuffer.getLength()==6 )
320 {
321 // we found a color in hex
322 m_aToken += sTmpBuffer;
323 nRet = CSS1_HEXCOLOR;
324 bNextCh = false;
325
326 break;
327 }
328
329 // otherwise we try a number
330 m_nInPos = nInPosSave;
331 m_cNextCh = cNextChSave;
332 m_nlLineNr = nlLineNrSave;
333 m_nlLinePos = nlLinePosSave;
334 m_bEOF = bEOFSave;
335
336 // first parse the number
337 sTmpBuffer.setLength( 0 );
338 do {
339 sTmpBuffer.append( m_cNextCh );
341 } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
342 !IsEOF() );
343
344 m_aToken += sTmpBuffer;
345 m_nValue = m_aToken.toDouble();
346
347 // ignore white space
348 while( ( ' ' == m_cNextCh ||
349 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
350 {
351 m_bWhiteSpace = true;
353 }
354
355 // check now, of there is a unit
356 switch( m_cNextCh )
357 {
358 case '%': // PERCENTAGE
359 m_bWhiteSpace = false;
360 nRet = CSS1_PERCENTAGE;
361 break;
362
363 case 'c':
364 case 'C': // LENGTH cm | LENGTH IDENT
365 case 'e':
366 case 'E': // LENGTH (em | ex) | LENGTH IDENT
367 case 'i':
368 case 'I': // LENGTH inch | LENGTH IDENT
369 case 'p':
370 case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
371 case 'm':
372 case 'M': // LENGTH mm | LENGTH IDENT
373 {
374 // save current position
375 sal_Int32 nInPosOld = m_nInPos;
376 sal_Unicode cNextChOld = m_cNextCh;
377 sal_uInt32 nlLineNrOld = m_nlLineNr;
378 sal_uInt32 nlLinePosOld = m_nlLinePos;
379 bool bEOFOld = m_bEOF;
380
381 // parse the next identifier
382 OUString aIdent;
383 OUStringBuffer sTmpBuffer2(64);
384 do {
385 sTmpBuffer2.append( m_cNextCh );
387 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
388 '-' == m_cNextCh) && !IsEOF() );
389
390 aIdent += sTmpBuffer2;
391
392 // Is it a unit?
393 const char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr;
394 double nScale1 = 1., nScale2 = 1.;
395 CSS1Token nToken1 = CSS1_LENGTH,
396 nToken2 = CSS1_LENGTH,
397 nToken3 = CSS1_LENGTH;
398 switch( aIdent[0] )
399 {
400 case 'c':
401 case 'C':
402 pCmp1 = "cm";
403 nScale1 = (72.*20.)/2.54; // twip
404 break;
405 case 'e':
406 case 'E':
407 pCmp1 = "em";
408 nToken1 = CSS1_EMS;
409
410 pCmp2 = "ex";
411 nToken2 = CSS1_EMX;
412 break;
413 case 'i':
414 case 'I':
415 pCmp1 = "in";
416 nScale1 = 72.*20.; // twip
417 break;
418 case 'm':
419 case 'M':
420 pCmp1 = "mm";
421 nScale1 = (72.*20.)/25.4; // twip
422 break;
423 case 'p':
424 case 'P':
425 pCmp1 = "pt";
426 nScale1 = 20.; // twip
427
428 pCmp2 = "pc";
429 nScale2 = 12.*20.; // twip
430
431 pCmp3 = "px";
432 nToken3 = CSS1_PIXLENGTH;
433 break;
434 }
435
436 double nScale = 0.0;
437 OSL_ENSURE( pCmp1, "Where does the first digit come from?" );
438 if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) )
439 {
440 nScale = nScale1;
441 nRet = nToken1;
442 }
443 else if( pCmp2 &&
444 aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) )
445 {
446 nScale = nScale2;
447 nRet = nToken2;
448 }
449 else if( pCmp3 &&
450 aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) )
451 {
452 nScale = 1.; // nScale3
453 nRet = nToken3;
454 }
455 else
456 {
457 nRet = CSS1_NUMBER;
458 }
459
460 if( CSS1_LENGTH==nRet && nScale!=1.0 )
461 m_nValue *= nScale;
462
463 if( nRet == CSS1_NUMBER )
464 {
465 m_nInPos = nInPosOld;
466 m_cNextCh = cNextChOld;
467 m_nlLineNr = nlLineNrOld;
468 m_nlLinePos = nlLinePosOld;
469 m_bEOF = bEOFOld;
470 }
471 else
472 {
473 m_bWhiteSpace = false;
474 }
475 bNextCh = false;
476 }
477 break;
478 default: // NUMBER IDENT
479 bNextCh = false;
480 nRet = CSS1_NUMBER;
481 break;
482 }
483 }
484 break;
485
486 case ':': // ':'
487 // catch link/visited/active !!!
488 nRet = CSS1_COLON;
489 break;
490
491 case '.': // DOT_W_WS | DOT_WO_WS
492 nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
493 break;
494
495 case '+': // '+'
496 nRet = CSS1_PLUS;
497 break;
498
499 case '-': // '-'
500 nRet = CSS1_MINUS;
501 break;
502
503 case '{': // '{'
504 nRet = CSS1_OBRACE;
505 break;
506
507 case '}': // '}'
508 nRet = CSS1_CBRACE;
509 break;
510
511 case ';': // ';'
512 nRet = CSS1_SEMICOLON;
513 break;
514
515 case ',': // ','
516 nRet = CSS1_COMMA;
517 break;
518
519 case '#': // '#'
521 if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
522 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
523 ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
524 {
525 // save current position
526 sal_Int32 nInPosSave = m_nInPos;
527 sal_Unicode cNextChSave = m_cNextCh;
528 sal_uInt32 nlLineNrSave = m_nlLineNr;
529 sal_uInt32 nlLinePosSave = m_nlLinePos;
530 bool bEOFSave = m_bEOF;
531
532 // first try to parse a hex digit
533 OUStringBuffer sTmpBuffer(8);
534 do {
535 sTmpBuffer.append( m_cNextCh );
537 } while( sTmpBuffer.getLength() < 9 &&
538 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
539 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
540 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
541 !IsEOF() );
542
543 if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
544 {
545 // we found a color in hex (RGB)
546 m_aToken += sTmpBuffer;
547 nRet = CSS1_HEXCOLOR;
548 bNextCh = false;
549
550 break;
551 }
552
553 if( sTmpBuffer.getLength()==8 )
554 {
555 // we found a color in hex (RGBA)
556 // we convert it to RGB assuming white background
557 sal_uInt32 nColor = sTmpBuffer.makeStringAndClear().toUInt32(16);
558 sal_uInt32 nRed = (nColor & 0xff000000) >> 24;
559 sal_uInt32 nGreen = (nColor & 0xff0000) >> 16;
560 sal_uInt32 nBlue = (nColor & 0xff00) >> 8;
561 double nAlpha = (nColor & 0xff) / 255.0;
562 nRed = (1 - nAlpha) * 255 + nAlpha * nRed;
563 nGreen = (1 - nAlpha) * 255 + nAlpha * nGreen;
564 nBlue = (1 - nAlpha) * 255 + nAlpha * nBlue;
565 nColor = (nRed << 16) + (nGreen << 8) + nBlue;
566 m_aToken += OUString::number(nColor, 16);
567 nRet = CSS1_HEXCOLOR;
568 bNextCh = false;
569
570 break;
571 }
572
573 // otherwise we try a number
574 m_nInPos = nInPosSave;
575 m_cNextCh = cNextChSave;
576 m_nlLineNr = nlLineNrSave;
577 m_nlLinePos = nlLinePosSave;
578 m_bEOF = bEOFSave;
579 }
580
581 nRet = CSS1_HASH;
582 bNextCh = false;
583 break;
584
585 case ' ':
586 case '\t':
587 case '\r':
588 case '\n': // White-Space
589 m_bWhiteSpace = true;
590 break;
591
592 case sal_Unicode(EOF):
593 if( IsEOF() )
594 {
596 bNextCh = false;
597 break;
598 }
599 [[fallthrough]];
600
601 default: // IDENT | syntax error
602 if (rtl::isAsciiAlpha(m_cNextCh))
603 {
604 // IDENT
605
606 bool bHexColor = true;
607
608 // parse the next identifier
609 OUStringBuffer sTmpBuffer(64);
610 do {
611 sTmpBuffer.append( m_cNextCh );
612 if( bHexColor )
613 {
614 bHexColor = sTmpBuffer.getLength()<7 &&
615 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
616 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
617 ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
618 }
620 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
621 '-' == m_cNextCh) && !IsEOF() );
622
623 m_aToken += sTmpBuffer;
624
625 if( bHexColor && sTmpBuffer.getLength()==6 )
626 {
627 bNextCh = false;
628 nRet = CSS1_HEXCOLOR;
629
630 break;
631 }
632 if( '('==m_cNextCh &&
633 ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
634 m_aToken.equalsIgnoreAsciiCase( "url" )) ||
635 (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
636 (m_aToken.equalsIgnoreAsciiCase( "rgb" ) || m_aToken.equalsIgnoreAsciiCase( "rgba" ) )
637 ) ) )
638 {
639 int nNestCnt = 0;
640 OUStringBuffer sTmpBuffer2(64);
641 do {
642 sTmpBuffer2.append( m_cNextCh );
643 switch( m_cNextCh )
644 {
645 case '(': nNestCnt++; break;
646 case ')': nNestCnt--; break;
647 }
649 } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
650 sTmpBuffer2.append( m_cNextCh );
651 m_aToken += sTmpBuffer2;
652 bNextCh = true;
653 nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
654 ? CSS1_URL
655 : CSS1_RGB;
656 }
657 else
658 {
659 bNextCh = false;
660 nRet = CSS1_IDENT;
661 }
662 }
663 // error handling: ignore digit
664 break;
665 }
666 if( bNextCh )
668
669 } while( CSS1_NULL==nRet && IsParserWorking() );
670
671 return nRet;
672}
673
674// These functions implement the parser described in
675
676// http://www.w3.org/pub/WWW/TR/WD-css1.html
677// resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
678
679// for CSS1. It's a direct implementation of the
680// described Lex grammar.
681
682// stylesheet
683// : import* rule*
684
685// import
686// : IMPORT_SYM url
687
688// url
689// : STRING
690
692{
694
695 // import*
696 bool bDone = false;
697 while( !bDone && IsParserWorking() )
698 {
699 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
700
701 switch( m_nToken )
702 {
703 case CSS1_IMPORT_SYM:
704 // IMPORT_SYM url
705 // URL are skipped without checks
707 break;
708 case CSS1_IDENT: // Look-Aheads
709 case CSS1_DOT_W_WS:
710 case CSS1_HASH:
711 case CSS1_PAGE_SYM:
712 // rule
713 bDone = true;
714 break;
715 default:
716 // error handling: ignore
717 break;
718 }
719
720 if( !bDone )
722 }
723
725
726 // rule *
727 while( IsParserWorking() )
728 {
729 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
730
731 switch( m_nToken )
732 {
733 case CSS1_IDENT: // Look-Aheads
734 case CSS1_DOT_W_WS:
735 case CSS1_HASH:
736 case CSS1_PAGE_SYM:
737 // rule
738 ParseRule();
739 break;
740 default:
741 // error handling: ignore
743 break;
744 }
745 }
746}
747
748// rule
749// : selector [ ',' selector ]*
750// '{' declaration [ ';' declaration ]* '}'
751
753{
754 // selector
755 std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
756 if( !pSelector )
757 return;
758
759 // process selector
760 SelectorParsed( std::move(pSelector), true );
761
763
764 // [ ',' selector ]*
765 while( CSS1_COMMA==m_nToken && IsParserWorking() )
766 {
767 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
768
769 // ignore ','
771
772 // selector
773 pSelector = ParseSelector();
774 if( !pSelector )
775 return;
776
777 // process selector
778 SelectorParsed( std::move(pSelector), false );
779 }
780
781 // '{'
782 if( CSS1_OBRACE != m_nToken )
783 return;
785
786 // declaration
787 OUString aProperty;
788 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
789 if( !pExpr )
790 return;
791
792 // process expression
793 DeclarationParsed( aProperty, std::move(pExpr) );
794
796
797 // [ ';' declaration ]*
799 {
800 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
801
802 // ';'
804
805 // declaration
806 if( CSS1_IDENT == m_nToken )
807 {
808 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
809 if( pExp )
810 {
811 // process expression
812 DeclarationParsed( aProperty, std::move(pExp));
813 }
814 }
815 }
816
817 // '}'
818 if( CSS1_CBRACE == m_nToken )
820}
821
822// selector
823// : simple_selector+ [ ':' pseudo_element ]?
824
825// simple_selector
826// : element_name [ DOT_WO_WS class ]?
827// | DOT_W_WS class
828// | id_selector
829
830// element_name
831// : IDENT
832
833// class
834// : IDENT
835
836// id_selector
837// : '#' IDENT
838
839// pseudo_element
840// : IDENT
841
842std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
843{
844 std::unique_ptr<CSS1Selector> pRoot;
845 CSS1Selector *pLast = nullptr;
846
847 bool bDone = false;
848 CSS1Selector *pNew = nullptr;
849
851
852 // simple_selector+
853 while( !bDone && IsParserWorking() )
854 {
855 LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
856
857 bool bNextToken = true;
858
859 switch( m_nToken )
860 {
861 case CSS1_IDENT:
862 {
863 // element_name [ DOT_WO_WS class ]?
864
865 // element_name
866 OUString aElement = m_aToken;
869
870 if( CSS1_DOT_WO_WS == m_nToken )
871 {
872 // DOT_WO_WS
874
875 // class
876 if( CSS1_IDENT == m_nToken )
877 {
878 aElement += "." + m_aToken;
880 }
881 else
882 {
883 // missing class
884 return pRoot;
885 }
886 }
887 else
888 {
889 // that was a look-ahead
890 bNextToken = false;
891 }
892 pNew = new CSS1Selector( eType, aElement );
893 }
894 break;
895 case CSS1_DOT_W_WS:
896 // DOT_W_WS class
897
898 // DOT_W_WS
900
901 if( CSS1_IDENT==m_nToken )
902 {
903 // class
905 }
906 else
907 {
908 // missing class
909 return pRoot;
910 }
911 break;
912 case CSS1_HASH:
913 // '#' id_selector
914
915 // '#'
917
918 if( CSS1_IDENT==m_nToken )
919 {
920 // id_selector
922 }
923 else
924 {
925 // missing id_selector
926 return pRoot;
927 }
928 break;
929
930 case CSS1_PAGE_SYM:
931 {
932 // @page
934 }
935 break;
936
937 default:
938 // stop because we don't know what's next
939 bDone = true;
940 break;
941 }
942
943 // if created a new selector then save it
944 if( pNew )
945 {
946 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
947 "Root-Selector, but no Last" );
948 if( pLast )
949 pLast->SetNext( pNew );
950 else
951 pRoot.reset(pNew);
952
953 pLast = pNew;
954 pNew = nullptr;
955 }
956
957 if( bNextToken && !bDone )
959 }
960
961 if( !pRoot )
962 {
963 // missing simple_selector
964 return pRoot;
965 }
966
967 // [ ':' pseudo_element ]?
969 {
970 // ':' pseudo element
972 if( CSS1_IDENT==m_nToken )
973 {
974 if (pLast)
977 }
978 else
979 {
980 // missing pseudo_element
981 return pRoot;
982 }
983 }
984
985 return pRoot;
986}
987
988// declaration
989// : property ':' expr prio?
990// | /* empty */
991
992// expression
993// : term [ operator term ]*
994
995// term
996// : unary_operator?
997// [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
998// HEXCOLOR | URL | RGB ]
999
1000// operator
1001// : '/' | ',' | /* empty */
1002
1003// unary_operator
1004// : '-' | '+'
1005
1006// property
1007// : ident
1008
1009// the sign is only used for numeric values (except PERCENTAGE)
1010// and it's applied on nValue!
1011std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
1012{
1013 std::unique_ptr<CSS1Expression> pRoot;
1014 CSS1Expression *pLast = nullptr;
1015
1016 // property
1017 if( CSS1_IDENT != m_nToken )
1018 {
1019 // missing property
1020 return pRoot;
1021 }
1022 rProperty = m_aToken;
1023
1025
1026 // ':'
1027 if( CSS1_COLON != m_nToken )
1028 {
1029 // missing ':'
1030 return pRoot;
1031 }
1033
1034 // term [operator term]*
1035 // here we're pretty lax regarding the syntax, but this shouldn't
1036 // be a problem
1037 bool bDone = false;
1038 sal_Unicode cSign = 0, cOp = 0;
1039 CSS1Expression *pNew = nullptr;
1040
1042
1043 while( !bDone && IsParserWorking() )
1044 {
1045 LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1046
1047 switch( m_nToken )
1048 {
1049 case CSS1_MINUS:
1050 cSign = '-';
1051 break;
1052
1053 case CSS1_PLUS:
1054 cSign = '+';
1055 break;
1056
1057 case CSS1_NUMBER:
1058 case CSS1_LENGTH:
1059 case CSS1_PIXLENGTH:
1060 case CSS1_EMS:
1061 case CSS1_EMX:
1062 if( '-'==cSign )
1063 m_nValue = -m_nValue;
1064 [[fallthrough]];
1065 case CSS1_STRING:
1066 case CSS1_PERCENTAGE:
1067 case CSS1_IDENT:
1068 case CSS1_URL:
1069 case CSS1_RGB:
1070 case CSS1_HEXCOLOR:
1071 pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1072 m_nValue = 0; // otherwise this also is applied to next ident
1073 cSign = 0;
1074 cOp = 0;
1075 break;
1076
1077 case CSS1_SLASH:
1078 cOp = '/';
1079 cSign = 0;
1080 break;
1081
1082 case CSS1_COMMA:
1083 cOp = ',';
1084 cSign = 0;
1085 break;
1086
1087 default:
1088 bDone = true;
1089 break;
1090 }
1091
1092 // if created a new expression save it
1093 if( pNew )
1094 {
1095 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1096 "Root-Selector, but no Last" );
1097 if( pLast )
1098 pLast->SetNext( pNew );
1099 else
1100 pRoot.reset(pNew);
1101
1102 pLast = pNew;
1103 pNew = nullptr;
1104 }
1105
1106 if( !bDone )
1108 }
1109
1110 if( !pRoot )
1111 {
1112 // missing term
1113 return pRoot;
1114 }
1115
1116 // prio?
1118 {
1119 // IMPORTANT_SYM
1121 }
1122
1123 return pRoot;
1124}
1125
1127 : m_bWhiteSpace(false)
1128 , m_bEOF(false)
1129 , m_cNextCh(0)
1130 , m_nInPos(0)
1131 , m_nlLineNr(0)
1132 , m_nlLinePos(0)
1133 , m_nValue(0)
1134 , m_eState(CSS1_PAR_ACCEPTED)
1135 , m_nToken(CSS1_NULL)
1136{
1137}
1138
1140{
1141}
1142
1143void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1144{
1145 OUString aTmp( rIn );
1146
1147 sal_Unicode c;
1148 while( !aTmp.isEmpty() &&
1149 ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1150 aTmp = aTmp.copy( 1 );
1151
1152 while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1153 || '\t'==c || '\r'==c || '\n'==c ) )
1154 aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1155
1156 // remove SGML comments
1157 if( aTmp.getLength() >= 4 &&
1158 aTmp.startsWith( "<!--" ) )
1159 aTmp = aTmp.copy( 4 );
1160
1161 if( aTmp.getLength() >=3 &&
1162 aTmp.endsWith("-->") )
1163 aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1164
1165 if( aTmp.isEmpty() )
1166 return;
1167
1168 InitRead( aTmp );
1169
1171}
1172
1173void CSS1Parser::ParseStyleOption( const OUString& rIn )
1174{
1175 if( rIn.isEmpty() )
1176 return;
1177
1178 InitRead( rIn );
1179
1180 // fdo#41796: skip over spurious semicolons
1181 while (CSS1_SEMICOLON == m_nToken)
1182 {
1184 }
1185
1186 OUString aProperty;
1187 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1188 if( !pExpr )
1189 return;
1190
1191 // process expression
1192 DeclarationParsed( aProperty, std::move(pExpr) );
1193
1195
1196 // [ ';' declaration ]*
1198 {
1199 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1200
1202 if( CSS1_IDENT==m_nToken )
1203 {
1204 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1205 if( pExp )
1206 {
1207 // process expression
1208 DeclarationParsed( aProperty, std::move(pExp) );
1209 }
1210 }
1211 }
1212}
1213
1214void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1215{
1216}
1217
1218void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1219 std::unique_ptr<CSS1Expression> /* pExpr */ )
1220{
1221}
1222
1224{
1225 delete m_pNext;
1226}
1227
1229{
1230 delete pNext;
1231}
1232
1233void CSS1Expression::GetURL( OUString& rURL ) const
1234{
1235 OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1236
1237 OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1238 aValue.getLength() > 5 &&
1239 '(' == aValue[3] &&
1240 ')' == aValue[aValue.getLength()-1],
1241 "no valid URL(...)" );
1242
1243 if( aValue.getLength() <= 5 )
1244 return;
1245
1246 rURL = aValue.copy( 4, aValue.getLength() - 5 );
1247
1248 // tdf#94088 original stripped only spaces, but there may also be
1249 // double quotes in CSS style URLs, so be prepared to spaces followed
1250 // by a single quote followed by spaces
1251 const sal_Unicode aSpace(' ');
1252 const sal_Unicode aSingleQuote('\'');
1253
1254 rURL = comphelper::string::strip(rURL, aSpace);
1255 rURL = comphelper::string::strip(rURL, aSingleQuote);
1256 rURL = comphelper::string::strip(rURL, aSpace);
1257}
1258
1259bool CSS1Expression::GetColor( Color &rColor ) const
1260{
1261 OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1263 "CSS1-Expression cannot be colour" );
1264
1265 bool bRet = false;
1266 sal_uInt32 nColor = SAL_MAX_UINT32;
1267
1268 switch( eType )
1269 {
1270 case CSS1_RGB:
1271 {
1272 // fourth value to 255 means no alpha transparency
1273 // so the right by default value
1274 sal_uInt8 aColors[4] = { 0, 0, 0, 255 };
1275
1276 // it can be "rgb" or "rgba"
1277 if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1278 (aValue[3] != '(' && aValue[4] != '(' ) || aValue[aValue.getLength()-1] != ')')
1279 {
1280 break;
1281 }
1282
1283 sal_Int32 nPos = aValue.startsWithIgnoreAsciiCase( "rgba" )?5:4; // start after "rgba(" or "rgb("
1284 char cSep = (aValue.indexOf(',') != -1)?',':' ';
1285 // alpha value can be after a "/" or ","
1286 bool bIsSepAlphaDiv = (aValue.indexOf('/') != -1)?true:false;
1287 for ( int nCol = 0; nCol < 4 && nPos > 0; ++nCol )
1288 {
1289 const std::u16string_view aNumber = o3tl::getToken(aValue, 0, cSep, nPos);
1290
1291 sal_Int32 nNumber = o3tl::toInt32(aNumber);
1292 if( nNumber<0 )
1293 {
1294 nNumber = 0;
1295 }
1296 else if( aNumber.find('%') != std::u16string_view::npos )
1297 {
1298 if( nNumber > 100 )
1299 nNumber = 100;
1300 nNumber *= 255;
1301 nNumber /= 100;
1302 }
1303 else if( nNumber > 255 )
1304 nNumber = 255;
1305 else if( aNumber.find('.') != std::u16string_view::npos )
1306 {
1307 // in this case aNumber contains something like "0.3" so not an sal_Int32
1308 nNumber = static_cast<sal_Int32>(255.0*o3tl::toDouble(aNumber));
1309 }
1310 aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1311 // rgb with alpha and '/' has this form: rgb(255 0 0 / 50%)
1312 if (bIsSepAlphaDiv && nCol == 2)
1313 {
1314 // but there can be some spaces or not before and after the "/", so skip them
1315 while (aValue[nPos] == '/' || aValue[nPos] == ' ')
1316 ++nPos;
1317 }
1318 }
1319
1320 rColor.SetRed( aColors[0] );
1321 rColor.SetGreen( aColors[1] );
1322 rColor.SetBlue( aColors[2] );
1323 rColor.SetAlpha( aColors[3] );
1324
1325 bRet = true; // something different than a colour isn't possible
1326 }
1327 break;
1328
1329 case CSS1_IDENT:
1330 case CSS1_STRING:
1331 {
1332 OUString aTmp( aValue.toAsciiUpperCase() );
1333 nColor = GetHTMLColor( aTmp );
1334 bRet = nColor != SAL_MAX_UINT32;
1335 }
1336 if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1337 aValue[0] != '#' )
1338 break;
1339 [[fallthrough]];
1340 case CSS1_HEXCOLOR:
1341 {
1342 // MS-IE hack: colour can also be a string
1343 sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1344 bool bDouble = aValue.getLength()-nOffset == 3;
1345 sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1346
1347 nColor = 0;
1348 for( ; i<nEnd; i++ )
1349 {
1350 sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1351 : '0' );
1352 if( c >= '0' && c <= '9' )
1353 c -= 48;
1354 else if( c >= 'A' && c <= 'F' )
1355 c -= 55;
1356 else if( c >= 'a' && c <= 'f' )
1357 c -= 87;
1358 else
1359 c = 16;
1360
1361 nColor *= 16;
1362 if( c<16 )
1363 nColor += c;
1364 if( bDouble )
1365 {
1366 nColor *= 16;
1367 if( c<16 )
1368 nColor += c;
1369 }
1370 }
1371 bRet = true;
1372 }
1373 break;
1374 default:
1375 ;
1376 }
1377
1378 if( bRet && nColor!=SAL_MAX_UINT32 )
1379 {
1380 rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1381 rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1382 rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1383 }
1384
1385 return bRet;
1386}
1387
1388/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool IsParserWorking() const
Is the parser still working?
Definition: parcss1.hxx:211
double m_nValue
Definition: parcss1.hxx:193
CSS1Token GetNextToken()
Definition: parcss1.cxx:103
virtual ~CSS1Parser()
Definition: parcss1.cxx:1139
bool m_bEOF
Definition: parcss1.hxx:184
bool IsEOF() const
Definition: parcss1.hxx:213
sal_uInt32 m_nlLineNr
Definition: parcss1.hxx:190
bool m_bWhiteSpace
Definition: parcss1.hxx:183
OUString m_aIn
Definition: parcss1.hxx:198
void InitRead(const OUString &rIn)
prepare parsing
Definition: parcss1.cxx:57
virtual void SelectorParsed(std::unique_ptr< CSS1Selector > pSelector, bool bFirst)
Called after a selector was parsed.
Definition: parcss1.cxx:1214
virtual void DeclarationParsed(const OUString &rProperty, std::unique_ptr< CSS1Expression > pExpr)
Called after a declaration or property was parsed.
Definition: parcss1.cxx:1218
sal_Unicode GetNextChar()
Definition: parcss1.cxx:73
std::unique_ptr< CSS1Expression > ParseDeclaration(OUString &rProperty)
Definition: parcss1.cxx:1011
sal_Unicode m_cNextCh
Definition: parcss1.hxx:186
void ParseRule()
Definition: parcss1.cxx:752
OUString m_aToken
Definition: parcss1.hxx:199
void ParseStyleSheet()
Definition: parcss1.cxx:691
sal_uInt32 m_nlLinePos
Definition: parcss1.hxx:191
CSS1Token m_nToken
Definition: parcss1.hxx:196
CSS1ParserState m_eState
Definition: parcss1.hxx:195
void ParseStyleOption(const OUString &rIn)
parse the content of a HTML style option
Definition: parcss1.cxx:1173
sal_Int32 m_nInPos
Definition: parcss1.hxx:188
std::unique_ptr< CSS1Selector > ParseSelector()
Definition: parcss1.cxx:842
A simple selector.
Definition: parcss1.hxx:93
void SetNext(CSS1Selector *pNxt)
Definition: parcss1.hxx:108
CSS1Selector * m_pNext
Definition: parcss1.hxx:96
void SetGreen(sal_uInt8 nGreen)
void SetRed(sal_uInt8 nRed)
void SetAlpha(sal_uInt8 nAlpha)
void SetBlue(sal_uInt8 nBlue)
DocumentType eType
SVT_DLLPUBLIC sal_uInt32 GetHTMLColor(const rtl::OUString &rName)
sal_uInt16 nPos
if(aStr !=aBuf) UpdateName_Impl(m_xFollowLb.get()
OString strip(const OString &rIn, char c)
int i
sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix=10)
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
double toDouble(std::u16string_view str)
#define LOOP_CHECK_RESTART
Definition: parcss1.cxx:38
const sal_Int32 MAX_LEN
Definition: parcss1.cxx:55
#define LOOP_CHECK_DECL
Definition: parcss1.cxx:36
#define LOOP_CHECK_CHECK(where)
Definition: parcss1.cxx:40
CSS1SelectorType
Definition: parcss1.hxx:72
@ CSS1_SELTYPE_ELEM_CLASS
Definition: parcss1.hxx:74
@ CSS1_SELTYPE_CLASS
Definition: parcss1.hxx:75
@ CSS1_SELTYPE_PAGE
Definition: parcss1.hxx:78
@ CSS1_SELTYPE_PSEUDO
Definition: parcss1.hxx:77
@ CSS1_SELTYPE_ELEMENT
Definition: parcss1.hxx:73
@ CSS1_SELTYPE_ID
Definition: parcss1.hxx:76
@ CSS1_PAR_ACCEPTED
Definition: parcss1.hxx:67
@ CSS1_PAR_WORKING
Definition: parcss1.hxx:68
CSS1Token
Definition: parcss1.hxx:31
@ CSS1_PAGE_SYM
Definition: parcss1.hxx:57
@ CSS1_MINUS
Definition: parcss1.hxx:49
@ CSS1_SEMICOLON
Definition: parcss1.hxx:52
@ CSS1_DOT_W_WS
Definition: parcss1.hxx:44
@ CSS1_OBRACE
Definition: parcss1.hxx:50
@ CSS1_PIXLENGTH
Definition: parcss1.hxx:39
@ CSS1_HASH
Definition: parcss1.hxx:54
@ CSS1_COLON
Definition: parcss1.hxx:46
@ CSS1_DOT_WO_WS
Definition: parcss1.hxx:45
@ CSS1_NUMBER
Definition: parcss1.hxx:36
@ CSS1_EMX
Definition: parcss1.hxx:41
@ CSS1_IMPORTANT_SYM
Definition: parcss1.hxx:59
@ CSS1_SLASH
Definition: parcss1.hxx:47
@ CSS1_IMPORT_SYM
Definition: parcss1.hxx:56
@ CSS1_RGB
Definition: parcss1.hxx:62
@ CSS1_PERCENTAGE
Definition: parcss1.hxx:37
@ CSS1_URL
Definition: parcss1.hxx:61
@ CSS1_COMMA
Definition: parcss1.hxx:53
@ CSS1_STRING
Definition: parcss1.hxx:35
@ CSS1_LENGTH
Definition: parcss1.hxx:38
@ CSS1_CBRACE
Definition: parcss1.hxx:51
@ CSS1_NULL
Definition: parcss1.hxx:32
@ CSS1_EMS
Definition: parcss1.hxx:40
@ CSS1_HEXCOLOR
Definition: parcss1.hxx:42
@ CSS1_IDENT
Definition: parcss1.hxx:34
@ CSS1_PLUS
Definition: parcss1.hxx:48
a subexpression of a CSS1 declaration
Definition: parcss1.hxx:120
CSS1Expression * pNext
Definition: parcss1.hxx:126
void SetNext(CSS1Expression *pNxt)
Definition: parcss1.hxx:148
OUString aValue
Definition: parcss1.hxx:124
void GetURL(OUString &rURL) const
Definition: parcss1.cxx:1233
CSS1Token eType
Definition: parcss1.hxx:123
bool GetColor(Color &rRGB) const
Definition: parcss1.cxx:1259
unsigned char sal_uInt8
sal_uInt16 sal_Unicode
#define SAL_MAX_UINT32