LibreOffice Module sw (master) 1
parcss1.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <o3tl/string_view.hxx>
21#include <osl/diagnose.h>
22#include <rtl/character.hxx>
23#include <rtl/ustrbuf.hxx>
24#include <tools/color.hxx>
25#include <tools/solar.h>
26#include <svtools/htmltokn.h>
27#include <comphelper/string.hxx>
28#include "parcss1.hxx"
29
30// Loop-Check: Used to avoid infinite loops, is checked after every
31// loop, if there is progress of the input position
32#define LOOP_CHECK
33
34#ifdef LOOP_CHECK
35
36#define LOOP_CHECK_DECL \
37 sal_Int32 nOldInPos = SAL_MAX_INT32;
38#define LOOP_CHECK_RESTART \
39 nOldInPos = SAL_MAX_INT32;
40#define LOOP_CHECK_CHECK( where ) \
41 OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where ); \
42 if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) ) \
43 break; \
44 else \
45 nOldInPos = m_nInPos;
46
47#else
48
49#define LOOP_CHECK_DECL
50#define LOOP_CHECK_RESTART
51#define LOOP_CHECK_CHECK( where )
52
53#endif
54
55const sal_Int32 MAX_LEN = 1024;
56
57void CSS1Parser::InitRead( const OUString& rIn )
58{
59 m_nlLineNr = 0;
60 m_nlLinePos = 0;
61
62 m_bWhiteSpace = true; // if nothing was read it's like there was WS
63 m_bEOF = false;
65 m_nValue = 0.;
66
67 m_aIn = rIn;
68 m_nInPos = 0;
71}
72
74{
75 if( m_nInPos >= m_aIn.getLength() )
76 {
77 m_bEOF = true;
78 return sal_Unicode(EOF);
79 }
80
82 m_nInPos++;
83
84 if( c == '\n' )
85 {
86 ++m_nlLineNr;
87 m_nlLinePos = 1;
88 }
89 else
91
92 return c;
93}
94
95// This function implements the scanner described in
96
97// http://www.w3.org/pub/WWW/TR/WD-css1.html
98// resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
99
100// for CSS1. It's a direct implementation of the
101// described Lex grammar.
102
104{
105 CSS1Token nRet = CSS1_NULL;
106 m_aToken.clear();
107
108 do {
109 // remember if white space was read
110 bool bPrevWhiteSpace = m_bWhiteSpace;
111 m_bWhiteSpace = false;
112
113 bool bNextCh = true;
114 switch( m_cNextCh )
115 {
116 case '/': // COMMENT | '/'
117 {
119 if( '*' == m_cNextCh )
120 {
121 // COMMENT
123
124 bool bAsterisk = false;
125 while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
126 {
127 bAsterisk = ('*'==m_cNextCh);
129 }
130 }
131 else
132 {
133 // '/'
134 bNextCh = false;
135 nRet = CSS1_SLASH;
136 }
137 }
138 break;
139
140 case '@': // '@import' | '@XXX'
141 {
143 if (rtl::isAsciiAlpha(m_cNextCh))
144 {
145 // scan the next identifier
146 OUStringBuffer sTmpBuffer(32);
147 do {
148 sTmpBuffer.append( m_cNextCh );
150 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
151 '-' == m_cNextCh) && !IsEOF() );
152
153 m_aToken += sTmpBuffer;
154
155 // check if we know it
156 switch( m_aToken[0] )
157 {
158 case 'i':
159 case 'I':
160 if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
161 nRet = CSS1_IMPORT_SYM;
162 break;
163 case 'p':
164 case 'P':
165 if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
166 nRet = CSS1_PAGE_SYM;
167 break;
168 }
169
170 // error handling: ignore '@indent' and the rest until
171 // semicolon at end of the next block
172 if( CSS1_NULL==nRet )
173 {
174 m_aToken.clear();
175 int nBlockLvl = 0;
176 sal_Unicode cQuoteCh = 0;
177 bool bDone = false, bEscape = false;
178 while( !bDone && !IsEOF() )
179 {
180 bool bOldEscape = bEscape;
181 bEscape = false;
182 switch( m_cNextCh )
183 {
184 case '{':
185 if( !cQuoteCh && !bOldEscape )
186 nBlockLvl++;
187 break;
188 case ';':
189 if( !cQuoteCh && !bOldEscape )
190 bDone = nBlockLvl==0;
191 break;
192 case '}':
193 if( !cQuoteCh && !bOldEscape )
194 bDone = --nBlockLvl==0;
195 break;
196 case '\"':
197 case '\'':
198 if( !bOldEscape )
199 {
200 if( cQuoteCh )
201 {
202 if( cQuoteCh == m_cNextCh )
203 cQuoteCh = 0;
204 }
205 else
206 {
207 cQuoteCh = m_cNextCh;
208 }
209 }
210 break;
211 case '\\':
212 if( !bOldEscape )
213 bEscape = true;
214 break;
215 }
217 }
218 }
219
220 bNextCh = false;
221 }
222 }
223 break;
224
225 case '!': // '!' 'legal' | '!' 'important' | syntax error
226 {
227 // ignore white space
229 while( ( ' ' == m_cNextCh ||
230 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
231 {
232 m_bWhiteSpace = true;
234 }
235
236 if( 'i'==m_cNextCh || 'I'==m_cNextCh)
237 {
238 // scan next identifier
239 OUStringBuffer sTmpBuffer(32);
240 do {
241 sTmpBuffer.append( m_cNextCh );
243 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
244 '-' == m_cNextCh) && !IsEOF() );
245
246 m_aToken += sTmpBuffer;
247
248 if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
249 m_aToken.equalsIgnoreAsciiCase( "important" ) )
250 {
251 // '!' 'important'
252 nRet = CSS1_IMPORTANT_SYM;
253 }
254 else
255 {
256 // error handling: ignore '!', not IDENT
257 nRet = CSS1_IDENT;
258 }
259
260 m_bWhiteSpace = false;
261 bNextCh = false;
262 }
263 else
264 {
265 // error handling: ignore '!'
266 bNextCh = false;
267 }
268 }
269 break;
270
271 case '\"':
272 case '\'': // STRING
273 {
274 // \... isn't possible yet!!!
275 sal_Unicode cQuoteChar = m_cNextCh;
277
278 OUStringBuffer sTmpBuffer( MAX_LEN );
279 do {
280 sTmpBuffer.append( m_cNextCh );
282 } while( cQuoteChar != m_cNextCh && !IsEOF() );
283
284 m_aToken += sTmpBuffer;
285
286 nRet = CSS1_STRING;
287 }
288 break;
289
290 case '0':
291 case '1':
292 case '2':
293 case '3':
294 case '4':
295 case '5':
296 case '6':
297 case '7':
298 case '8':
299 case '9': // NUMBER | PERCENTAGE | LENGTH
300 {
301 // save current position
302 std::size_t nInPosSave = m_nInPos;
303 sal_Unicode cNextChSave = m_cNextCh;
304 sal_uInt32 nlLineNrSave = m_nlLineNr;
305 sal_uInt32 nlLinePosSave = m_nlLinePos;
306 bool bEOFSave = m_bEOF;
307
308 // first try to parse a hex digit
309 OUStringBuffer sTmpBuffer( 16 );
310 do {
311 sTmpBuffer.append( m_cNextCh );
313 } while( sTmpBuffer.getLength() < 7 &&
314 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
315 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
316 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
317 !IsEOF() );
318
319 if( sTmpBuffer.getLength()==6 )
320 {
321 // we found a color in hex
322 m_aToken += sTmpBuffer;
323 nRet = CSS1_HEXCOLOR;
324 bNextCh = false;
325
326 break;
327 }
328
329 // otherwise we try a number
330 m_nInPos = nInPosSave;
331 m_cNextCh = cNextChSave;
332 m_nlLineNr = nlLineNrSave;
333 m_nlLinePos = nlLinePosSave;
334 m_bEOF = bEOFSave;
335
336 // first parse the number
337 sTmpBuffer.setLength( 0 );
338 do {
339 sTmpBuffer.append( m_cNextCh );
341 } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
342 !IsEOF() );
343
344 m_aToken += sTmpBuffer;
345 m_nValue = m_aToken.toDouble();
346
347 // ignore white space
348 while( ( ' ' == m_cNextCh ||
349 (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
350 {
351 m_bWhiteSpace = true;
353 }
354
355 // check now, of there is a unit
356 switch( m_cNextCh )
357 {
358 case '%': // PERCENTAGE
359 m_bWhiteSpace = false;
360 nRet = CSS1_PERCENTAGE;
361 break;
362
363 case 'c':
364 case 'C': // LENGTH cm | LENGTH IDENT
365 case 'e':
366 case 'E': // LENGTH (em | ex) | LENGTH IDENT
367 case 'i':
368 case 'I': // LENGTH inch | LENGTH IDENT
369 case 'p':
370 case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
371 case 'm':
372 case 'M': // LENGTH mm | LENGTH IDENT
373 {
374 // save current position
375 sal_Int32 nInPosOld = m_nInPos;
376 sal_Unicode cNextChOld = m_cNextCh;
377 sal_uLong nlLineNrOld = m_nlLineNr;
378 sal_uLong nlLinePosOld = m_nlLinePos;
379 bool bEOFOld = m_bEOF;
380
381 // parse the next identifier
382 OUString aIdent;
383 OUStringBuffer sTmpBuffer2(64);
384 do {
385 sTmpBuffer2.append( m_cNextCh );
387 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
388 '-' == m_cNextCh) && !IsEOF() );
389
390 aIdent += sTmpBuffer2;
391
392 // Is it a unit?
393 const char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr;
394 double nScale1 = 1., nScale2 = 1.;
395 CSS1Token nToken1 = CSS1_LENGTH,
396 nToken2 = CSS1_LENGTH,
397 nToken3 = CSS1_LENGTH;
398 switch( aIdent[0] )
399 {
400 case 'c':
401 case 'C':
402 pCmp1 = "cm";
403 nScale1 = (72.*20.)/2.54; // twip
404 break;
405 case 'e':
406 case 'E':
407 pCmp1 = "em";
408 nToken1 = CSS1_EMS;
409
410 pCmp2 = "ex";
411 nToken2 = CSS1_EMX;
412 break;
413 case 'i':
414 case 'I':
415 pCmp1 = "in";
416 nScale1 = 72.*20.; // twip
417 break;
418 case 'm':
419 case 'M':
420 pCmp1 = "mm";
421 nScale1 = (72.*20.)/25.4; // twip
422 break;
423 case 'p':
424 case 'P':
425 pCmp1 = "pt";
426 nScale1 = 20.; // twip
427
428 pCmp2 = "pc";
429 nScale2 = 12.*20.; // twip
430
431 pCmp3 = "px";
432 nToken3 = CSS1_PIXLENGTH;
433 break;
434 }
435
436 double nScale = 0.0;
437 OSL_ENSURE( pCmp1, "Where does the first digit come from?" );
438 if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) )
439 {
440 nScale = nScale1;
441 nRet = nToken1;
442 }
443 else if( pCmp2 &&
444 aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) )
445 {
446 nScale = nScale2;
447 nRet = nToken2;
448 }
449 else if( pCmp3 &&
450 aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) )
451 {
452 nScale = 1.; // nScale3
453 nRet = nToken3;
454 }
455 else
456 {
457 nRet = CSS1_NUMBER;
458 }
459
460 if( CSS1_LENGTH==nRet && nScale!=1.0 )
461 m_nValue *= nScale;
462
463 if( nRet == CSS1_NUMBER )
464 {
465 m_nInPos = nInPosOld;
466 m_cNextCh = cNextChOld;
467 m_nlLineNr = nlLineNrOld;
468 m_nlLinePos = nlLinePosOld;
469 m_bEOF = bEOFOld;
470 }
471 else
472 {
473 m_bWhiteSpace = false;
474 }
475 bNextCh = false;
476 }
477 break;
478 default: // NUMBER IDENT
479 bNextCh = false;
480 nRet = CSS1_NUMBER;
481 break;
482 }
483 }
484 break;
485
486 case ':': // ':'
487 // catch link/visited/active !!!
488 nRet = CSS1_COLON;
489 break;
490
491 case '.': // DOT_W_WS | DOT_WO_WS
492 nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
493 break;
494
495 case '+': // '+'
496 nRet = CSS1_PLUS;
497 break;
498
499 case '-': // '-'
500 nRet = CSS1_MINUS;
501 break;
502
503 case '{': // '{'
504 nRet = CSS1_OBRACE;
505 break;
506
507 case '}': // '}'
508 nRet = CSS1_CBRACE;
509 break;
510
511 case ';': // ';'
512 nRet = CSS1_SEMICOLON;
513 break;
514
515 case ',': // ','
516 nRet = CSS1_COMMA;
517 break;
518
519 case '#': // '#'
521 if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
522 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
523 ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
524 {
525 // save current position
526 sal_Int32 nInPosSave = m_nInPos;
527 sal_Unicode cNextChSave = m_cNextCh;
528 sal_uLong nlLineNrSave = m_nlLineNr;
529 sal_uLong nlLinePosSave = m_nlLinePos;
530 bool bEOFSave = m_bEOF;
531
532 // first try to parse a hex digit
533 OUStringBuffer sTmpBuffer(6);
534 do {
535 sTmpBuffer.append( m_cNextCh );
537 } while( sTmpBuffer.getLength() < 7 &&
538 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
539 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
540 ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
541 !IsEOF() );
542
543 if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
544 {
545 // we found a color in hex
546 m_aToken += sTmpBuffer;
547 nRet = CSS1_HEXCOLOR;
548 bNextCh = false;
549
550 break;
551 }
552
553 // otherwise we try a number
554 m_nInPos = nInPosSave;
555 m_cNextCh = cNextChSave;
556 m_nlLineNr = nlLineNrSave;
557 m_nlLinePos = nlLinePosSave;
558 m_bEOF = bEOFSave;
559 }
560
561 nRet = CSS1_HASH;
562 bNextCh = false;
563 break;
564
565 case ' ':
566 case '\t':
567 case '\r':
568 case '\n': // White-Space
569 m_bWhiteSpace = true;
570 break;
571
572 case sal_Unicode(EOF):
573 if( IsEOF() )
574 {
576 bNextCh = false;
577 break;
578 }
579 [[fallthrough]];
580
581 default: // IDENT | syntax error
582 if (rtl::isAsciiAlpha(m_cNextCh))
583 {
584 // IDENT
585
586 bool bHexColor = true;
587
588 // parse the next identifier
589 OUStringBuffer sTmpBuffer(64);
590 do {
591 sTmpBuffer.append( m_cNextCh );
592 if( bHexColor )
593 {
594 bHexColor = sTmpBuffer.getLength()<7 &&
595 ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
596 ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
597 ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
598 }
600 } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
601 '-' == m_cNextCh) && !IsEOF() );
602
603 m_aToken += sTmpBuffer;
604
605 if( bHexColor && sTmpBuffer.getLength()==6 )
606 {
607 bNextCh = false;
608 nRet = CSS1_HEXCOLOR;
609
610 break;
611 }
612 if( '('==m_cNextCh &&
613 ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
614 m_aToken.equalsIgnoreAsciiCase( "url" )) ||
615 (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
616 m_aToken.equalsIgnoreAsciiCase( "rgb" )) ) )
617 {
618 int nNestCnt = 0;
619 OUStringBuffer sTmpBuffer2(64);
620 do {
621 sTmpBuffer2.append( m_cNextCh );
622 switch( m_cNextCh )
623 {
624 case '(': nNestCnt++; break;
625 case ')': nNestCnt--; break;
626 }
628 } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
629 sTmpBuffer2.append( m_cNextCh );
630 m_aToken += sTmpBuffer2;
631 bNextCh = true;
632 nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
633 ? CSS1_URL
634 : CSS1_RGB;
635 }
636 else
637 {
638 bNextCh = false;
639 nRet = CSS1_IDENT;
640 }
641 }
642 // error handling: ignore digit
643 break;
644 }
645 if( bNextCh )
647
648 } while( CSS1_NULL==nRet && IsParserWorking() );
649
650 return nRet;
651}
652
653// These functions implement the parser described in
654
655// http://www.w3.org/pub/WWW/TR/WD-css1.html
656// resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
657
658// for CSS1. It's a direct implementation of the
659// described Lex grammar.
660
661// stylesheet
662// : import* rule*
663
664// import
665// : IMPORT_SYM url
666
667// url
668// : STRING
669
671{
673
674 // import*
675 bool bDone = false;
676 while( !bDone && IsParserWorking() )
677 {
678 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
679
680 switch( m_nToken )
681 {
682 case CSS1_IMPORT_SYM:
683 // IMPORT_SYM url
684 // URL are skipped without checks
686 break;
687 case CSS1_IDENT: // Look-Aheads
688 case CSS1_DOT_W_WS:
689 case CSS1_HASH:
690 case CSS1_PAGE_SYM:
691 // rule
692 bDone = true;
693 break;
694 default:
695 // error handling: ignore
696 break;
697 }
698
699 if( !bDone )
701 }
702
704
705 // rule *
706 while( IsParserWorking() )
707 {
708 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
709
710 switch( m_nToken )
711 {
712 case CSS1_IDENT: // Look-Aheads
713 case CSS1_DOT_W_WS:
714 case CSS1_HASH:
715 case CSS1_PAGE_SYM:
716 // rule
717 ParseRule();
718 break;
719 default:
720 // error handling: ignore
722 break;
723 }
724 }
725}
726
727// rule
728// : selector [ ',' selector ]*
729// '{' declaration [ ';' declaration ]* '}'
730
732{
733 // selector
734 std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
735 if( !pSelector )
736 return;
737
738 // process selector
739 SelectorParsed( std::move(pSelector), true );
740
742
743 // [ ',' selector ]*
744 while( CSS1_COMMA==m_nToken && IsParserWorking() )
745 {
746 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
747
748 // ignore ','
750
751 // selector
752 pSelector = ParseSelector();
753 if( !pSelector )
754 return;
755
756 // process selector
757 SelectorParsed( std::move(pSelector), false );
758 }
759
760 // '{'
761 if( CSS1_OBRACE != m_nToken )
762 return;
764
765 // declaration
766 OUString aProperty;
767 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
768 if( !pExpr )
769 return;
770
771 // process expression
772 DeclarationParsed( aProperty, std::move(pExpr) );
773
775
776 // [ ';' declaration ]*
778 {
779 LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
780
781 // ';'
783
784 // declaration
785 if( CSS1_IDENT == m_nToken )
786 {
787 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
788 if( pExp )
789 {
790 // process expression
791 DeclarationParsed( aProperty, std::move(pExp));
792 }
793 }
794 }
795
796 // '}'
797 if( CSS1_CBRACE == m_nToken )
799}
800
801// selector
802// : simple_selector+ [ ':' pseudo_element ]?
803
804// simple_selector
805// : element_name [ DOT_WO_WS class ]?
806// | DOT_W_WS class
807// | id_selector
808
809// element_name
810// : IDENT
811
812// class
813// : IDENT
814
815// id_selector
816// : '#' IDENT
817
818// pseudo_element
819// : IDENT
820
821std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
822{
823 std::unique_ptr<CSS1Selector> pRoot;
824 CSS1Selector *pLast = nullptr;
825
826 bool bDone = false;
827 CSS1Selector *pNew = nullptr;
828
830
831 // simple_selector+
832 while( !bDone && IsParserWorking() )
833 {
834 LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
835
836 bool bNextToken = true;
837
838 switch( m_nToken )
839 {
840 case CSS1_IDENT:
841 {
842 // element_name [ DOT_WO_WS class ]?
843
844 // element_name
845 OUString aElement = m_aToken;
848
849 if( CSS1_DOT_WO_WS == m_nToken )
850 {
851 // DOT_WO_WS
853
854 // class
855 if( CSS1_IDENT == m_nToken )
856 {
857 aElement += "." + m_aToken;
859 }
860 else
861 {
862 // missing class
863 return pRoot;
864 }
865 }
866 else
867 {
868 // that was a look-ahead
869 bNextToken = false;
870 }
871 pNew = new CSS1Selector( eType, aElement );
872 }
873 break;
874 case CSS1_DOT_W_WS:
875 // DOT_W_WS class
876
877 // DOT_W_WS
879
880 if( CSS1_IDENT==m_nToken )
881 {
882 // class
884 }
885 else
886 {
887 // missing class
888 return pRoot;
889 }
890 break;
891 case CSS1_HASH:
892 // '#' id_selector
893
894 // '#'
896
897 if( CSS1_IDENT==m_nToken )
898 {
899 // id_selector
901 }
902 else
903 {
904 // missing id_selector
905 return pRoot;
906 }
907 break;
908
909 case CSS1_PAGE_SYM:
910 {
911 // @page
913 }
914 break;
915
916 default:
917 // stop because we don't know what's next
918 bDone = true;
919 break;
920 }
921
922 // if created a new selector then save it
923 if( pNew )
924 {
925 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
926 "Root-Selector, but no Last" );
927 if( pLast )
928 pLast->SetNext( pNew );
929 else
930 pRoot.reset(pNew);
931
932 pLast = pNew;
933 pNew = nullptr;
934 }
935
936 if( bNextToken && !bDone )
938 }
939
940 if( !pRoot )
941 {
942 // missing simple_selector
943 return pRoot;
944 }
945
946 // [ ':' pseudo_element ]?
948 {
949 // ':' pseudo element
951 if( CSS1_IDENT==m_nToken )
952 {
953 if (pLast)
956 }
957 else
958 {
959 // missing pseudo_element
960 return pRoot;
961 }
962 }
963
964 return pRoot;
965}
966
967// declaration
968// : property ':' expr prio?
969// | /* empty */
970
971// expression
972// : term [ operator term ]*
973
974// term
975// : unary_operator?
976// [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
977// HEXCOLOR | URL | RGB ]
978
979// operator
980// : '/' | ',' | /* empty */
981
982// unary_operator
983// : '-' | '+'
984
985// property
986// : ident
987
988// the sign is only used for numeric values (except PERCENTAGE)
989// and it's applied on nValue!
990std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
991{
992 std::unique_ptr<CSS1Expression> pRoot;
993 CSS1Expression *pLast = nullptr;
994
995 // property
996 if( CSS1_IDENT != m_nToken )
997 {
998 // missing property
999 return pRoot;
1000 }
1001 rProperty = m_aToken;
1002
1004
1005 // ':'
1006 if( CSS1_COLON != m_nToken )
1007 {
1008 // missing ':'
1009 return pRoot;
1010 }
1012
1013 // term [operator term]*
1014 // here we're pretty lax regarding the syntax, but this shouldn't
1015 // be a problem
1016 bool bDone = false;
1017 sal_Unicode cSign = 0, cOp = 0;
1018 CSS1Expression *pNew = nullptr;
1019
1021
1022 while( !bDone && IsParserWorking() )
1023 {
1024 LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1025
1026 switch( m_nToken )
1027 {
1028 case CSS1_MINUS:
1029 cSign = '-';
1030 break;
1031
1032 case CSS1_PLUS:
1033 cSign = '+';
1034 break;
1035
1036 case CSS1_NUMBER:
1037 case CSS1_LENGTH:
1038 case CSS1_PIXLENGTH:
1039 case CSS1_EMS:
1040 case CSS1_EMX:
1041 if( '-'==cSign )
1042 m_nValue = -m_nValue;
1043 [[fallthrough]];
1044 case CSS1_STRING:
1045 case CSS1_PERCENTAGE:
1046 case CSS1_IDENT:
1047 case CSS1_URL:
1048 case CSS1_RGB:
1049 case CSS1_HEXCOLOR:
1050 pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1051 m_nValue = 0; // otherwise this also is applied to next ident
1052 cSign = 0;
1053 cOp = 0;
1054 break;
1055
1056 case CSS1_SLASH:
1057 cOp = '/';
1058 cSign = 0;
1059 break;
1060
1061 case CSS1_COMMA:
1062 cOp = ',';
1063 cSign = 0;
1064 break;
1065
1066 default:
1067 bDone = true;
1068 break;
1069 }
1070
1071 // if created a new expression save it
1072 if( pNew )
1073 {
1074 OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1075 "Root-Selector, but no Last" );
1076 if( pLast )
1077 pLast->SetNext( pNew );
1078 else
1079 pRoot.reset(pNew);
1080
1081 pLast = pNew;
1082 pNew = nullptr;
1083 }
1084
1085 if( !bDone )
1087 }
1088
1089 if( !pRoot )
1090 {
1091 // missing term
1092 return pRoot;
1093 }
1094
1095 // prio?
1097 {
1098 // IMPORTANT_SYM
1100 }
1101
1102 return pRoot;
1103}
1104
1106 : m_bWhiteSpace(false)
1107 , m_bEOF(false)
1108 , m_cNextCh(0)
1109 , m_nInPos(0)
1110 , m_nlLineNr(0)
1111 , m_nlLinePos(0)
1112 , m_nValue(0)
1113 , m_eState(CSS1_PAR_ACCEPTED)
1114 , m_nToken(CSS1_NULL)
1115{
1116}
1117
1119{
1120}
1121
1122void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1123{
1124 OUString aTmp( rIn );
1125
1126 sal_Unicode c;
1127 while( !aTmp.isEmpty() &&
1128 ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1129 aTmp = aTmp.copy( 1 );
1130
1131 while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1132 || '\t'==c || '\r'==c || '\n'==c ) )
1133 aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1134
1135 // remove SGML comments
1136 if( aTmp.getLength() >= 4 &&
1137 aTmp.startsWith( "<!--" ) )
1138 aTmp = aTmp.copy( 4 );
1139
1140 if( aTmp.getLength() >=3 &&
1141 aTmp.endsWith("-->") )
1142 aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1143
1144 if( aTmp.isEmpty() )
1145 return;
1146
1147 InitRead( aTmp );
1148
1150}
1151
1152void CSS1Parser::ParseStyleOption( const OUString& rIn )
1153{
1154 if( rIn.isEmpty() )
1155 return;
1156
1157 InitRead( rIn );
1158
1159 // fdo#41796: skip over spurious semicolons
1160 while (CSS1_SEMICOLON == m_nToken)
1161 {
1163 }
1164
1165 OUString aProperty;
1166 std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1167 if( !pExpr )
1168 return;
1169
1170 // process expression
1171 DeclarationParsed( aProperty, std::move(pExpr) );
1172
1174
1175 // [ ';' declaration ]*
1177 {
1178 LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1179
1181 if( CSS1_IDENT==m_nToken )
1182 {
1183 std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1184 if( pExp )
1185 {
1186 // process expression
1187 DeclarationParsed( aProperty, std::move(pExp) );
1188 }
1189 }
1190 }
1191}
1192
1193void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1194{
1195}
1196
1197void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1198 std::unique_ptr<CSS1Expression> /* pExpr */ )
1199{
1200}
1201
1203{
1204 delete m_pNext;
1205}
1206
1208{
1209 delete pNext;
1210}
1211
1212void CSS1Expression::GetURL( OUString& rURL ) const
1213{
1214 OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1215
1216 OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1217 aValue.getLength() > 5 &&
1218 '(' == aValue[3] &&
1219 ')' == aValue[aValue.getLength()-1],
1220 "no valid URL(...)" );
1221
1222 if( aValue.getLength() <= 5 )
1223 return;
1224
1225 rURL = aValue.copy( 4, aValue.getLength() - 5 );
1226
1227 // tdf#94088 original stripped only spaces, but there may also be
1228 // double quotes in CSS style URLs, so be prepared to spaces followed
1229 // by a single quote followed by spaces
1230 const sal_Unicode aSpace(' ');
1231 const sal_Unicode aSingleQuote('\'');
1232
1233 rURL = comphelper::string::strip(rURL, aSpace);
1234 rURL = comphelper::string::strip(rURL, aSingleQuote);
1235 rURL = comphelper::string::strip(rURL, aSpace);
1236}
1237
1238bool CSS1Expression::GetColor( Color &rColor ) const
1239{
1240 OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1242 "CSS1-Expression cannot be colour" );
1243
1244 bool bRet = false;
1245 sal_uInt32 nColor = SAL_MAX_UINT32;
1246
1247 switch( eType )
1248 {
1249 case CSS1_RGB:
1250 {
1251 sal_uInt8 aColors[3] = { 0, 0, 0 };
1252
1253 if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1254 aValue[3] != '(' || aValue[aValue.getLength()-1] != ')')
1255 {
1256 break;
1257 }
1258
1259 sal_Int32 nPos = 4; // start after "rgb("
1260 for ( int nCol = 0; nCol < 3 && nPos > 0; ++nCol )
1261 {
1262 const std::u16string_view aNumber = o3tl::getToken(aValue, 0, ',', nPos);
1263
1264 sal_Int32 nNumber = o3tl::toInt32(aNumber);
1265 if( nNumber<0 )
1266 {
1267 nNumber = 0;
1268 }
1269 else if( aNumber.find('%') != std::u16string_view::npos )
1270 {
1271 if( nNumber > 100 )
1272 nNumber = 100;
1273 nNumber *= 255;
1274 nNumber /= 100;
1275 }
1276 else if( nNumber > 255 )
1277 nNumber = 255;
1278
1279 aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1280 }
1281
1282 rColor.SetRed( aColors[0] );
1283 rColor.SetGreen( aColors[1] );
1284 rColor.SetBlue( aColors[2] );
1285
1286 bRet = true; // something different than a colour isn't possible
1287 }
1288 break;
1289
1290 case CSS1_IDENT:
1291 case CSS1_STRING:
1292 {
1293 OUString aTmp( aValue.toAsciiUpperCase() );
1294 nColor = GetHTMLColor( aTmp );
1295 bRet = nColor != SAL_MAX_UINT32;
1296 }
1297 if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1298 aValue[0] != '#' )
1299 break;
1300 [[fallthrough]];
1301 case CSS1_HEXCOLOR:
1302 {
1303 // MS-IE hack: colour can also be a string
1304 sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1305 bool bDouble = aValue.getLength()-nOffset == 3;
1306 sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1307
1308 nColor = 0;
1309 for( ; i<nEnd; i++ )
1310 {
1311 sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1312 : '0' );
1313 if( c >= '0' && c <= '9' )
1314 c -= 48;
1315 else if( c >= 'A' && c <= 'F' )
1316 c -= 55;
1317 else if( c >= 'a' && c <= 'f' )
1318 c -= 87;
1319 else
1320 c = 16;
1321
1322 nColor *= 16;
1323 if( c<16 )
1324 nColor += c;
1325 if( bDouble )
1326 {
1327 nColor *= 16;
1328 if( c<16 )
1329 nColor += c;
1330 }
1331 }
1332 bRet = true;
1333 }
1334 break;
1335 default:
1336 ;
1337 }
1338
1339 if( bRet && nColor!=SAL_MAX_UINT32 )
1340 {
1341 rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1342 rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1343 rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1344 }
1345
1346 return bRet;
1347}
1348
1349/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool IsParserWorking() const
Is the parser still working?
Definition: parcss1.hxx:210
double m_nValue
Definition: parcss1.hxx:192
CSS1Token GetNextToken()
Definition: parcss1.cxx:103
virtual ~CSS1Parser()
Definition: parcss1.cxx:1118
bool m_bEOF
Definition: parcss1.hxx:183
bool IsEOF() const
Definition: parcss1.hxx:212
sal_uInt32 m_nlLineNr
Definition: parcss1.hxx:189
bool m_bWhiteSpace
Definition: parcss1.hxx:182
OUString m_aIn
Definition: parcss1.hxx:197
void InitRead(const OUString &rIn)
prepare parsing
Definition: parcss1.cxx:57
virtual void SelectorParsed(std::unique_ptr< CSS1Selector > pSelector, bool bFirst)
Called after a selector was parsed.
Definition: parcss1.cxx:1193
virtual void DeclarationParsed(const OUString &rProperty, std::unique_ptr< CSS1Expression > pExpr)
Called after a declaration or property was parsed.
Definition: parcss1.cxx:1197
sal_Unicode GetNextChar()
Definition: parcss1.cxx:73
std::unique_ptr< CSS1Expression > ParseDeclaration(OUString &rProperty)
Definition: parcss1.cxx:990
sal_Unicode m_cNextCh
Definition: parcss1.hxx:185
void ParseRule()
Definition: parcss1.cxx:731
OUString m_aToken
Definition: parcss1.hxx:198
void ParseStyleSheet()
Definition: parcss1.cxx:670
sal_uInt32 m_nlLinePos
Definition: parcss1.hxx:190
CSS1Token m_nToken
Definition: parcss1.hxx:195
CSS1ParserState m_eState
Definition: parcss1.hxx:194
void ParseStyleOption(const OUString &rIn)
parse the content of a HTML style option
Definition: parcss1.cxx:1152
sal_Int32 m_nInPos
Definition: parcss1.hxx:187
std::unique_ptr< CSS1Selector > ParseSelector()
Definition: parcss1.cxx:821
A simple selector.
Definition: parcss1.hxx:93
void SetNext(CSS1Selector *pNxt)
Definition: parcss1.hxx:108
CSS1Selector * m_pNext
Definition: parcss1.hxx:96
void SetGreen(sal_uInt8 nGreen)
void SetRed(sal_uInt8 nRed)
void SetBlue(sal_uInt8 nBlue)
DocumentType eType
SVT_DLLPUBLIC sal_uInt32 GetHTMLColor(const rtl::OUString &rName)
sal_uInt16 nPos
if(aStr !=aBuf) UpdateName_Impl(m_xFollowLb.get()
OString strip(const OString &rIn, char c)
int i
sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix=10)
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
#define LOOP_CHECK_RESTART
Definition: parcss1.cxx:38
const sal_Int32 MAX_LEN
Definition: parcss1.cxx:55
#define LOOP_CHECK_DECL
Definition: parcss1.cxx:36
#define LOOP_CHECK_CHECK(where)
Definition: parcss1.cxx:40
CSS1SelectorType
Definition: parcss1.hxx:72
@ CSS1_SELTYPE_ELEM_CLASS
Definition: parcss1.hxx:74
@ CSS1_SELTYPE_CLASS
Definition: parcss1.hxx:75
@ CSS1_SELTYPE_PAGE
Definition: parcss1.hxx:78
@ CSS1_SELTYPE_PSEUDO
Definition: parcss1.hxx:77
@ CSS1_SELTYPE_ELEMENT
Definition: parcss1.hxx:73
@ CSS1_SELTYPE_ID
Definition: parcss1.hxx:76
@ CSS1_PAR_ACCEPTED
Definition: parcss1.hxx:67
@ CSS1_PAR_WORKING
Definition: parcss1.hxx:68
CSS1Token
Definition: parcss1.hxx:31
@ CSS1_PAGE_SYM
Definition: parcss1.hxx:57
@ CSS1_MINUS
Definition: parcss1.hxx:49
@ CSS1_SEMICOLON
Definition: parcss1.hxx:52
@ CSS1_DOT_W_WS
Definition: parcss1.hxx:44
@ CSS1_OBRACE
Definition: parcss1.hxx:50
@ CSS1_PIXLENGTH
Definition: parcss1.hxx:39
@ CSS1_HASH
Definition: parcss1.hxx:54
@ CSS1_COLON
Definition: parcss1.hxx:46
@ CSS1_DOT_WO_WS
Definition: parcss1.hxx:45
@ CSS1_NUMBER
Definition: parcss1.hxx:36
@ CSS1_EMX
Definition: parcss1.hxx:41
@ CSS1_IMPORTANT_SYM
Definition: parcss1.hxx:59
@ CSS1_SLASH
Definition: parcss1.hxx:47
@ CSS1_IMPORT_SYM
Definition: parcss1.hxx:56
@ CSS1_RGB
Definition: parcss1.hxx:62
@ CSS1_PERCENTAGE
Definition: parcss1.hxx:37
@ CSS1_URL
Definition: parcss1.hxx:61
@ CSS1_COMMA
Definition: parcss1.hxx:53
@ CSS1_STRING
Definition: parcss1.hxx:35
@ CSS1_LENGTH
Definition: parcss1.hxx:38
@ CSS1_CBRACE
Definition: parcss1.hxx:51
@ CSS1_NULL
Definition: parcss1.hxx:32
@ CSS1_EMS
Definition: parcss1.hxx:40
@ CSS1_HEXCOLOR
Definition: parcss1.hxx:42
@ CSS1_IDENT
Definition: parcss1.hxx:34
@ CSS1_PLUS
Definition: parcss1.hxx:48
sal_uIntPtr sal_uLong
a subexpression of a CSS1 declaration
Definition: parcss1.hxx:120
CSS1Expression * pNext
Definition: parcss1.hxx:125
void SetNext(CSS1Expression *pNxt)
Definition: parcss1.hxx:147
OUString aValue
Definition: parcss1.hxx:123
void GetURL(OUString &rURL) const
Definition: parcss1.cxx:1212
CSS1Token eType
Definition: parcss1.hxx:122
bool GetColor(Color &rRGB) const
Definition: parcss1.cxx:1238
unsigned char sal_uInt8
sal_uInt16 sal_Unicode
#define SAL_MAX_UINT32