LibreOffice Module sw (master)  1
parcss1.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <osl/diagnose.h>
21 #include <rtl/character.hxx>
22 #include <rtl/ustrbuf.hxx>
23 #include <tools/color.hxx>
24 #include <tools/solar.h>
25 #include <svtools/htmltokn.h>
26 #include <comphelper/string.hxx>
27 #include "parcss1.hxx"
28 
29 // Loop-Check: Used to avoid infinite loops, is checked after every
30 // loop, if there is progress of the input position
31 #define LOOP_CHECK
32 
33 #ifdef LOOP_CHECK
34 
35 #define LOOP_CHECK_DECL \
36  sal_Int32 nOldInPos = SAL_MAX_INT32;
37 #define LOOP_CHECK_RESTART \
38  nOldInPos = SAL_MAX_INT32;
39 #define LOOP_CHECK_CHECK( where ) \
40  OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where ); \
41  if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) ) \
42  break; \
43  else \
44  nOldInPos = m_nInPos;
45 
46 #else
47 
48 #define LOOP_CHECK_DECL
49 #define LOOP_CHECK_RESTART
50 #define LOOP_CHECK_CHECK( where )
51 
52 #endif
53 
54 const sal_Int32 MAX_LEN = 1024;
55 
56 void CSS1Parser::InitRead( const OUString& rIn )
57 {
58  m_nlLineNr = 0;
59  m_nlLinePos = 0;
60 
61  m_bWhiteSpace = true; // if nothing was read it's like there was WS
62  m_bEOF = false;
64  m_nValue = 0.;
65 
66  m_aIn = rIn;
67  m_nInPos = 0;
70 }
71 
73 {
74  if( m_nInPos >= m_aIn.getLength() )
75  {
76  m_bEOF = true;
77  return sal_Unicode(EOF);
78  }
79 
81  m_nInPos++;
82 
83  if( c == '\n' )
84  {
85  ++m_nlLineNr;
86  m_nlLinePos = 1;
87  }
88  else
89  ++m_nlLinePos;
90 
91  return c;
92 }
93 
94 // This function implements the scanner described in
95 
96 // http://www.w3.org/pub/WWW/TR/WD-css1.html
97 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
98 
99 // for CSS1. It's a direct implementation of the
100 // described Lex grammar.
101 
103 {
104  CSS1Token nRet = CSS1_NULL;
105  m_aToken.clear();
106 
107  do {
108  // remember if white space was read
109  bool bPrevWhiteSpace = m_bWhiteSpace;
110  m_bWhiteSpace = false;
111 
112  bool bNextCh = true;
113  switch( m_cNextCh )
114  {
115  case '/': // COMMENT | '/'
116  {
118  if( '*' == m_cNextCh )
119  {
120  // COMMENT
122 
123  bool bAsterisk = false;
124  while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
125  {
126  bAsterisk = ('*'==m_cNextCh);
128  }
129  }
130  else
131  {
132  // '/'
133  bNextCh = false;
134  nRet = CSS1_SLASH;
135  }
136  }
137  break;
138 
139  case '@': // '@import' | '@XXX'
140  {
142  if (rtl::isAsciiAlpha(m_cNextCh))
143  {
144  // scan the next identifier
145  OUStringBuffer sTmpBuffer(32);
146  do {
147  sTmpBuffer.append( m_cNextCh );
149  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
150  '-' == m_cNextCh) && !IsEOF() );
151 
152  m_aToken += sTmpBuffer;
153 
154  // check if we know it
155  switch( m_aToken[0] )
156  {
157  case 'i':
158  case 'I':
159  if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
160  nRet = CSS1_IMPORT_SYM;
161  break;
162  case 'p':
163  case 'P':
164  if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
165  nRet = CSS1_PAGE_SYM;
166  break;
167  }
168 
169  // error handling: ignore '@indent' and the rest until
170  // semicolon at end of the next block
171  if( CSS1_NULL==nRet )
172  {
173  m_aToken.clear();
174  int nBlockLvl = 0;
175  sal_Unicode cQuoteCh = 0;
176  bool bDone = false, bEscape = false;
177  while( !bDone && !IsEOF() )
178  {
179  bool bOldEscape = bEscape;
180  bEscape = false;
181  switch( m_cNextCh )
182  {
183  case '{':
184  if( !cQuoteCh && !bOldEscape )
185  nBlockLvl++;
186  break;
187  case ';':
188  if( !cQuoteCh && !bOldEscape )
189  bDone = nBlockLvl==0;
190  break;
191  case '}':
192  if( !cQuoteCh && !bOldEscape )
193  bDone = --nBlockLvl==0;
194  break;
195  case '\"':
196  case '\'':
197  if( !bOldEscape )
198  {
199  if( cQuoteCh )
200  {
201  if( cQuoteCh == m_cNextCh )
202  cQuoteCh = 0;
203  }
204  else
205  {
206  cQuoteCh = m_cNextCh;
207  }
208  }
209  break;
210  case '\\':
211  if( !bOldEscape )
212  bEscape = true;
213  break;
214  }
216  }
217  }
218 
219  bNextCh = false;
220  }
221  }
222  break;
223 
224  case '!': // '!' 'legal' | '!' 'important' | syntax error
225  {
226  // ignore white space
228  while( ( ' ' == m_cNextCh ||
229  (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
230  {
231  m_bWhiteSpace = true;
233  }
234 
235  if( 'i'==m_cNextCh || 'I'==m_cNextCh)
236  {
237  // scan next identifier
238  OUStringBuffer sTmpBuffer(32);
239  do {
240  sTmpBuffer.append( m_cNextCh );
242  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
243  '-' == m_cNextCh) && !IsEOF() );
244 
245  m_aToken += sTmpBuffer;
246 
247  if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
248  m_aToken.equalsIgnoreAsciiCase( "important" ) )
249  {
250  // '!' 'important'
251  nRet = CSS1_IMPORTANT_SYM;
252  }
253  else
254  {
255  // error handling: ignore '!', not IDENT
256  nRet = CSS1_IDENT;
257  }
258 
259  m_bWhiteSpace = false;
260  bNextCh = false;
261  }
262  else
263  {
264  // error handling: ignore '!'
265  bNextCh = false;
266  }
267  }
268  break;
269 
270  case '\"':
271  case '\'': // STRING
272  {
273  // \... isn't possible yet!!!
274  sal_Unicode cQuoteChar = m_cNextCh;
276 
277  OUStringBuffer sTmpBuffer( MAX_LEN );
278  do {
279  sTmpBuffer.append( m_cNextCh );
281  } while( cQuoteChar != m_cNextCh && !IsEOF() );
282 
283  m_aToken += sTmpBuffer;
284 
285  nRet = CSS1_STRING;
286  }
287  break;
288 
289  case '0':
290  case '1':
291  case '2':
292  case '3':
293  case '4':
294  case '5':
295  case '6':
296  case '7':
297  case '8':
298  case '9': // NUMBER | PERCENTAGE | LENGTH
299  {
300  // save current position
301  std::size_t nInPosSave = m_nInPos;
302  sal_Unicode cNextChSave = m_cNextCh;
303  sal_uInt32 nlLineNrSave = m_nlLineNr;
304  sal_uInt32 nlLinePosSave = m_nlLinePos;
305  bool bEOFSave = m_bEOF;
306 
307  // first try to parse a hex digit
308  OUStringBuffer sTmpBuffer( 16 );
309  do {
310  sTmpBuffer.append( m_cNextCh );
312  } while( sTmpBuffer.getLength() < 7 &&
313  ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
314  ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
315  ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
316  !IsEOF() );
317 
318  if( sTmpBuffer.getLength()==6 )
319  {
320  // we found a color in hex
321  m_aToken += sTmpBuffer;
322  nRet = CSS1_HEXCOLOR;
323  bNextCh = false;
324 
325  break;
326  }
327 
328  // otherwise we try a number
329  m_nInPos = nInPosSave;
330  m_cNextCh = cNextChSave;
331  m_nlLineNr = nlLineNrSave;
332  m_nlLinePos = nlLinePosSave;
333  m_bEOF = bEOFSave;
334 
335  // first parse the number
336  sTmpBuffer.setLength( 0 );
337  do {
338  sTmpBuffer.append( m_cNextCh );
340  } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
341  !IsEOF() );
342 
343  m_aToken += sTmpBuffer;
344  m_nValue = m_aToken.toDouble();
345 
346  // ignore white space
347  while( ( ' ' == m_cNextCh ||
348  (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
349  {
350  m_bWhiteSpace = true;
352  }
353 
354  // check now, of there is a unit
355  switch( m_cNextCh )
356  {
357  case '%': // PERCENTAGE
358  m_bWhiteSpace = false;
359  nRet = CSS1_PERCENTAGE;
360  break;
361 
362  case 'c':
363  case 'C': // LENGTH cm | LENGTH IDENT
364  case 'e':
365  case 'E': // LENGTH (em | ex) | LENGTH IDENT
366  case 'i':
367  case 'I': // LENGTH inch | LENGTH IDENT
368  case 'p':
369  case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
370  case 'm':
371  case 'M': // LENGTH mm | LENGTH IDENT
372  {
373  // save current position
374  sal_Int32 nInPosOld = m_nInPos;
375  sal_Unicode cNextChOld = m_cNextCh;
376  sal_uLong nlLineNrOld = m_nlLineNr;
377  sal_uLong nlLinePosOld = m_nlLinePos;
378  bool bEOFOld = m_bEOF;
379 
380  // parse the next identifier
381  OUString aIdent;
382  OUStringBuffer sTmpBuffer2(64);
383  do {
384  sTmpBuffer2.append( m_cNextCh );
386  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
387  '-' == m_cNextCh) && !IsEOF() );
388 
389  aIdent += sTmpBuffer2;
390 
391  // Is it a unit?
392  const char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr;
393  double nScale1 = 1., nScale2 = 1.;
394  CSS1Token nToken1 = CSS1_LENGTH,
395  nToken2 = CSS1_LENGTH,
396  nToken3 = CSS1_LENGTH;
397  switch( aIdent[0] )
398  {
399  case 'c':
400  case 'C':
401  pCmp1 = "cm";
402  nScale1 = (72.*20.)/2.54; // twip
403  break;
404  case 'e':
405  case 'E':
406  pCmp1 = "em";
407  nToken1 = CSS1_EMS;
408 
409  pCmp2 = "ex";
410  nToken2 = CSS1_EMX;
411  break;
412  case 'i':
413  case 'I':
414  pCmp1 = "in";
415  nScale1 = 72.*20.; // twip
416  break;
417  case 'm':
418  case 'M':
419  pCmp1 = "mm";
420  nScale1 = (72.*20.)/25.4; // twip
421  break;
422  case 'p':
423  case 'P':
424  pCmp1 = "pt";
425  nScale1 = 20.; // twip
426 
427  pCmp2 = "pc";
428  nScale2 = 12.*20.; // twip
429 
430  pCmp3 = "px";
431  nToken3 = CSS1_PIXLENGTH;
432  break;
433  }
434 
435  double nScale = 0.0;
436  OSL_ENSURE( pCmp1, "Where does the first digit come from?" );
437  if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) )
438  {
439  nScale = nScale1;
440  nRet = nToken1;
441  }
442  else if( pCmp2 &&
443  aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) )
444  {
445  nScale = nScale2;
446  nRet = nToken2;
447  }
448  else if( pCmp3 &&
449  aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) )
450  {
451  nScale = 1.; // nScale3
452  nRet = nToken3;
453  }
454  else
455  {
456  nRet = CSS1_NUMBER;
457  }
458 
459  if( CSS1_LENGTH==nRet && nScale!=1.0 )
460  m_nValue *= nScale;
461 
462  if( nRet == CSS1_NUMBER )
463  {
464  m_nInPos = nInPosOld;
465  m_cNextCh = cNextChOld;
466  m_nlLineNr = nlLineNrOld;
467  m_nlLinePos = nlLinePosOld;
468  m_bEOF = bEOFOld;
469  }
470  else
471  {
472  m_bWhiteSpace = false;
473  }
474  bNextCh = false;
475  }
476  break;
477  default: // NUMBER IDENT
478  bNextCh = false;
479  nRet = CSS1_NUMBER;
480  break;
481  }
482  }
483  break;
484 
485  case ':': // ':'
486  // catch link/visited/active !!!
487  nRet = CSS1_COLON;
488  break;
489 
490  case '.': // DOT_W_WS | DOT_WO_WS
491  nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
492  break;
493 
494  case '+': // '+'
495  nRet = CSS1_PLUS;
496  break;
497 
498  case '-': // '-'
499  nRet = CSS1_MINUS;
500  break;
501 
502  case '{': // '{'
503  nRet = CSS1_OBRACE;
504  break;
505 
506  case '}': // '}'
507  nRet = CSS1_CBRACE;
508  break;
509 
510  case ';': // ';'
511  nRet = CSS1_SEMICOLON;
512  break;
513 
514  case ',': // ','
515  nRet = CSS1_COMMA;
516  break;
517 
518  case '#': // '#'
520  if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
521  ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
522  ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
523  {
524  // save current position
525  sal_Int32 nInPosSave = m_nInPos;
526  sal_Unicode cNextChSave = m_cNextCh;
527  sal_uLong nlLineNrSave = m_nlLineNr;
528  sal_uLong nlLinePosSave = m_nlLinePos;
529  bool bEOFSave = m_bEOF;
530 
531  // first try to parse a hex digit
532  OUStringBuffer sTmpBuffer(6);
533  do {
534  sTmpBuffer.append( m_cNextCh );
536  } while( sTmpBuffer.getLength() < 7 &&
537  ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
538  ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
539  ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
540  !IsEOF() );
541 
542  if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
543  {
544  // we found a color in hex
545  m_aToken += sTmpBuffer;
546  nRet = CSS1_HEXCOLOR;
547  bNextCh = false;
548 
549  break;
550  }
551 
552  // otherwise we try a number
553  m_nInPos = nInPosSave;
554  m_cNextCh = cNextChSave;
555  m_nlLineNr = nlLineNrSave;
556  m_nlLinePos = nlLinePosSave;
557  m_bEOF = bEOFSave;
558  }
559 
560  nRet = CSS1_HASH;
561  bNextCh = false;
562  break;
563 
564  case ' ':
565  case '\t':
566  case '\r':
567  case '\n': // White-Space
568  m_bWhiteSpace = true;
569  break;
570 
571  case sal_Unicode(EOF):
572  if( IsEOF() )
573  {
575  bNextCh = false;
576  break;
577  }
578  [[fallthrough]];
579 
580  default: // IDENT | syntax error
581  if (rtl::isAsciiAlpha(m_cNextCh))
582  {
583  // IDENT
584 
585  bool bHexColor = true;
586 
587  // parse the next identifier
588  OUStringBuffer sTmpBuffer(64);
589  do {
590  sTmpBuffer.append( m_cNextCh );
591  if( bHexColor )
592  {
593  bHexColor = sTmpBuffer.getLength()<7 &&
594  ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
595  ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
596  ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
597  }
599  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
600  '-' == m_cNextCh) && !IsEOF() );
601 
602  m_aToken += sTmpBuffer;
603 
604  if( bHexColor && sTmpBuffer.getLength()==6 )
605  {
606  bNextCh = false;
607  nRet = CSS1_HEXCOLOR;
608 
609  break;
610  }
611  if( '('==m_cNextCh &&
612  ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
613  m_aToken.equalsIgnoreAsciiCase( "url" )) ||
614  (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
615  m_aToken.equalsIgnoreAsciiCase( "rgb" )) ) )
616  {
617  int nNestCnt = 0;
618  OUStringBuffer sTmpBuffer2(64);
619  do {
620  sTmpBuffer2.append( m_cNextCh );
621  switch( m_cNextCh )
622  {
623  case '(': nNestCnt++; break;
624  case ')': nNestCnt--; break;
625  }
627  } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
628  sTmpBuffer2.append( m_cNextCh );
629  m_aToken += sTmpBuffer2;
630  bNextCh = true;
631  nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
632  ? CSS1_URL
633  : CSS1_RGB;
634  }
635  else
636  {
637  bNextCh = false;
638  nRet = CSS1_IDENT;
639  }
640  }
641  // error handling: ignore digit
642  break;
643  }
644  if( bNextCh )
646 
647  } while( CSS1_NULL==nRet && IsParserWorking() );
648 
649  return nRet;
650 }
651 
652 // These functions implement the parser described in
653 
654 // http://www.w3.org/pub/WWW/TR/WD-css1.html
655 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
656 
657 // for CSS1. It's a direct implementation of the
658 // described Lex grammar.
659 
660 // stylesheet
661 // : import* rule*
662 
663 // import
664 // : IMPORT_SYM url
665 
666 // url
667 // : STRING
668 
670 {
672 
673  // import*
674  bool bDone = false;
675  while( !bDone && IsParserWorking() )
676  {
677  LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
678 
679  switch( m_nToken )
680  {
681  case CSS1_IMPORT_SYM:
682  // IMPORT_SYM url
683  // URL are skipped without checks
685  break;
686  case CSS1_IDENT: // Look-Aheads
687  case CSS1_DOT_W_WS:
688  case CSS1_HASH:
689  case CSS1_PAGE_SYM:
690  // rule
691  bDone = true;
692  break;
693  default:
694  // error handling: ignore
695  break;
696  }
697 
698  if( !bDone )
700  }
701 
703 
704  // rule *
705  while( IsParserWorking() )
706  {
707  LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
708 
709  switch( m_nToken )
710  {
711  case CSS1_IDENT: // Look-Aheads
712  case CSS1_DOT_W_WS:
713  case CSS1_HASH:
714  case CSS1_PAGE_SYM:
715  // rule
716  ParseRule();
717  break;
718  default:
719  // error handling: ignore
721  break;
722  }
723  }
724 }
725 
726 // rule
727 // : selector [ ',' selector ]*
728 // '{' declaration [ ';' declaration ]* '}'
729 
731 {
732  // selector
733  std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
734  if( !pSelector )
735  return;
736 
737  // process selector
738  SelectorParsed( std::move(pSelector), true );
739 
741 
742  // [ ',' selector ]*
743  while( CSS1_COMMA==m_nToken && IsParserWorking() )
744  {
745  LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
746 
747  // ignore ','
749 
750  // selector
751  pSelector = ParseSelector();
752  if( !pSelector )
753  return;
754 
755  // process selector
756  SelectorParsed( std::move(pSelector), false );
757  }
758 
759  // '{'
760  if( CSS1_OBRACE != m_nToken )
761  return;
763 
764  // declaration
765  OUString aProperty;
766  std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
767  if( !pExpr )
768  return;
769 
770  // process expression
771  DeclarationParsed( aProperty, std::move(pExpr) );
772 
774 
775  // [ ';' declaration ]*
777  {
778  LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
779 
780  // ';'
782 
783  // declaration
784  if( CSS1_IDENT == m_nToken )
785  {
786  std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
787  if( pExp )
788  {
789  // process expression
790  DeclarationParsed( aProperty, std::move(pExp));
791  }
792  }
793  }
794 
795  // '}'
796  if( CSS1_CBRACE == m_nToken )
798 }
799 
800 // selector
801 // : simple_selector+ [ ':' pseudo_element ]?
802 
803 // simple_selector
804 // : element_name [ DOT_WO_WS class ]?
805 // | DOT_W_WS class
806 // | id_selector
807 
808 // element_name
809 // : IDENT
810 
811 // class
812 // : IDENT
813 
814 // id_selector
815 // : '#' IDENT
816 
817 // pseudo_element
818 // : IDENT
819 
820 std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
821 {
822  std::unique_ptr<CSS1Selector> pRoot;
823  CSS1Selector *pLast = nullptr;
824 
825  bool bDone = false;
826  CSS1Selector *pNew = nullptr;
827 
829 
830  // simple_selector+
831  while( !bDone && IsParserWorking() )
832  {
833  LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
834 
835  bool bNextToken = true;
836 
837  switch( m_nToken )
838  {
839  case CSS1_IDENT:
840  {
841  // element_name [ DOT_WO_WS class ]?
842 
843  // element_name
844  OUString aElement = m_aToken;
847 
848  if( CSS1_DOT_WO_WS == m_nToken )
849  {
850  // DOT_WO_WS
852 
853  // class
854  if( CSS1_IDENT == m_nToken )
855  {
856  aElement += "." + m_aToken;
857  eType = CSS1_SELTYPE_ELEM_CLASS;
858  }
859  else
860  {
861  // missing class
862  return pRoot;
863  }
864  }
865  else
866  {
867  // that was a look-ahead
868  bNextToken = false;
869  }
870  pNew = new CSS1Selector( eType, aElement );
871  }
872  break;
873  case CSS1_DOT_W_WS:
874  // DOT_W_WS class
875 
876  // DOT_W_WS
878 
879  if( CSS1_IDENT==m_nToken )
880  {
881  // class
883  }
884  else
885  {
886  // missing class
887  return pRoot;
888  }
889  break;
890  case CSS1_HASH:
891  // '#' id_selector
892 
893  // '#'
895 
896  if( CSS1_IDENT==m_nToken )
897  {
898  // id_selector
899  pNew = new CSS1Selector( CSS1_SELTYPE_ID, m_aToken );
900  }
901  else
902  {
903  // missing id_selector
904  return pRoot;
905  }
906  break;
907 
908  case CSS1_PAGE_SYM:
909  {
910  // @page
911  pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, m_aToken );
912  }
913  break;
914 
915  default:
916  // stop because we don't know what's next
917  bDone = true;
918  break;
919  }
920 
921  // if created a new selector then save it
922  if( pNew )
923  {
924  OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
925  "Root-Selector, but no Last" );
926  if( pLast )
927  pLast->SetNext( pNew );
928  else
929  pRoot.reset(pNew);
930 
931  pLast = pNew;
932  pNew = nullptr;
933  }
934 
935  if( bNextToken && !bDone )
937  }
938 
939  if( !pRoot )
940  {
941  // missing simple_selector
942  return pRoot;
943  }
944 
945  // [ ':' pseudo_element ]?
947  {
948  // ':' pseudo element
950  if( CSS1_IDENT==m_nToken )
951  {
952  if (pLast)
955  }
956  else
957  {
958  // missing pseudo_element
959  return pRoot;
960  }
961  }
962 
963  return pRoot;
964 }
965 
966 // declaration
967 // : property ':' expr prio?
968 // | /* empty */
969 
970 // expression
971 // : term [ operator term ]*
972 
973 // term
974 // : unary_operator?
975 // [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
976 // HEXCOLOR | URL | RGB ]
977 
978 // operator
979 // : '/' | ',' | /* empty */
980 
981 // unary_operator
982 // : '-' | '+'
983 
984 // property
985 // : ident
986 
987 // the sign is only used for numeric values (except PERCENTAGE)
988 // and it's applied on nValue!
989 std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
990 {
991  std::unique_ptr<CSS1Expression> pRoot;
992  CSS1Expression *pLast = nullptr;
993 
994  // property
995  if( CSS1_IDENT != m_nToken )
996  {
997  // missing property
998  return pRoot;
999  }
1000  rProperty = m_aToken;
1001 
1002  m_nToken = GetNextToken();
1003 
1004  // ':'
1005  if( CSS1_COLON != m_nToken )
1006  {
1007  // missing ':'
1008  return pRoot;
1009  }
1010  m_nToken = GetNextToken();
1011 
1012  // term [operator term]*
1013  // here we're pretty lax regarding the syntax, but this shouldn't
1014  // be a problem
1015  bool bDone = false;
1016  sal_Unicode cSign = 0, cOp = 0;
1017  CSS1Expression *pNew = nullptr;
1018 
1020 
1021  while( !bDone && IsParserWorking() )
1022  {
1023  LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1024 
1025  switch( m_nToken )
1026  {
1027  case CSS1_MINUS:
1028  cSign = '-';
1029  break;
1030 
1031  case CSS1_PLUS:
1032  cSign = '+';
1033  break;
1034 
1035  case CSS1_NUMBER:
1036  case CSS1_LENGTH:
1037  case CSS1_PIXLENGTH:
1038  case CSS1_EMS:
1039  case CSS1_EMX:
1040  if( '-'==cSign )
1041  m_nValue = -m_nValue;
1042  [[fallthrough]];
1043  case CSS1_STRING:
1044  case CSS1_PERCENTAGE:
1045  case CSS1_IDENT:
1046  case CSS1_URL:
1047  case CSS1_RGB:
1048  case CSS1_HEXCOLOR:
1049  pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1050  m_nValue = 0; // otherwise this also is applied to next ident
1051  cSign = 0;
1052  cOp = 0;
1053  break;
1054 
1055  case CSS1_SLASH:
1056  cOp = '/';
1057  cSign = 0;
1058  break;
1059 
1060  case CSS1_COMMA:
1061  cOp = ',';
1062  cSign = 0;
1063  break;
1064 
1065  default:
1066  bDone = true;
1067  break;
1068  }
1069 
1070  // if created a new expression save it
1071  if( pNew )
1072  {
1073  OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1074  "Root-Selector, but no Last" );
1075  if( pLast )
1076  pLast->SetNext( pNew );
1077  else
1078  pRoot.reset(pNew);
1079 
1080  pLast = pNew;
1081  pNew = nullptr;
1082  }
1083 
1084  if( !bDone )
1085  m_nToken = GetNextToken();
1086  }
1087 
1088  if( !pRoot )
1089  {
1090  // missing term
1091  return pRoot;
1092  }
1093 
1094  // prio?
1096  {
1097  // IMPORTANT_SYM
1098  m_nToken = GetNextToken();
1099  }
1100 
1101  return pRoot;
1102 }
1103 
1105  : m_bWhiteSpace(false)
1106  , m_bEOF(false)
1107  , m_cNextCh(0)
1108  , m_nInPos(0)
1109  , m_nlLineNr(0)
1110  , m_nlLinePos(0)
1111  , m_nValue(0)
1112  , m_eState(CSS1_PAR_ACCEPTED)
1113  , m_nToken(CSS1_NULL)
1114 {
1115 }
1116 
1118 {
1119 }
1120 
1121 void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1122 {
1123  OUString aTmp( rIn );
1124 
1125  sal_Unicode c;
1126  while( !aTmp.isEmpty() &&
1127  ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1128  aTmp = aTmp.copy( 1 );
1129 
1130  while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1131  || '\t'==c || '\r'==c || '\n'==c ) )
1132  aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1133 
1134  // remove SGML comments
1135  if( aTmp.getLength() >= 4 &&
1136  aTmp.startsWith( "<!--" ) )
1137  aTmp = aTmp.copy( 4 );
1138 
1139  if( aTmp.getLength() >=3 &&
1140  aTmp.endsWith("-->") )
1141  aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1142 
1143  if( aTmp.isEmpty() )
1144  return;
1145 
1146  InitRead( aTmp );
1147 
1148  ParseStyleSheet();
1149 }
1150 
1151 void CSS1Parser::ParseStyleOption( const OUString& rIn )
1152 {
1153  if( rIn.isEmpty() )
1154  return;
1155 
1156  InitRead( rIn );
1157 
1158  // fdo#41796: skip over spurious semicolons
1159  while (CSS1_SEMICOLON == m_nToken)
1160  {
1161  m_nToken = GetNextToken();
1162  }
1163 
1164  OUString aProperty;
1165  std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1166  if( !pExpr )
1167  return;
1168 
1169  // process expression
1170  DeclarationParsed( aProperty, std::move(pExpr) );
1171 
1173 
1174  // [ ';' declaration ]*
1175  while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
1176  {
1177  LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1178 
1179  m_nToken = GetNextToken();
1180  if( CSS1_IDENT==m_nToken )
1181  {
1182  std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1183  if( pExp )
1184  {
1185  // process expression
1186  DeclarationParsed( aProperty, std::move(pExp) );
1187  }
1188  }
1189  }
1190 }
1191 
1192 void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1193 {
1194 }
1195 
1196 void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1197  std::unique_ptr<CSS1Expression> /* pExpr */ )
1198 {
1199 }
1200 
1202 {
1203  delete pNext;
1204 }
1205 
1207 {
1208  delete pNext;
1209 }
1210 
1211 void CSS1Expression::GetURL( OUString& rURL ) const
1212 {
1213  OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1214 
1215  OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1216  aValue.getLength() > 5 &&
1217  '(' == aValue[3] &&
1218  ')' == aValue[aValue.getLength()-1],
1219  "no valid URL(...)" );
1220 
1221  if( aValue.getLength() <= 5 )
1222  return;
1223 
1224  rURL = aValue.copy( 4, aValue.getLength() - 5 );
1225 
1226  // tdf#94088 original stripped only spaces, but there may also be
1227  // double quotes in CSS style URLs, so be prepared to spaces followed
1228  // by a single quote followed by spaces
1229  const sal_Unicode aSpace(' ');
1230  const sal_Unicode aSingleQuote('\'');
1231 
1232  rURL = comphelper::string::strip(rURL, aSpace);
1233  rURL = comphelper::string::strip(rURL, aSingleQuote);
1234  rURL = comphelper::string::strip(rURL, aSpace);
1235 }
1236 
1237 bool CSS1Expression::GetColor( Color &rColor ) const
1238 {
1239  OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1241  "CSS1-Expression cannot be colour" );
1242 
1243  bool bRet = false;
1244  sal_uInt32 nColor = SAL_MAX_UINT32;
1245 
1246  switch( eType )
1247  {
1248  case CSS1_RGB:
1249  {
1250  sal_uInt8 aColors[3] = { 0, 0, 0 };
1251 
1252  if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1253  aValue[3] != '(' || aValue[aValue.getLength()-1] != ')')
1254  {
1255  break;
1256  }
1257 
1258  sal_Int32 nPos = 4; // start after "rgb("
1259  for ( int nCol = 0; nCol < 3 && nPos > 0; ++nCol )
1260  {
1261  const OUString aNumber = aValue.getToken(0, ',', nPos);
1262 
1263  sal_Int32 nNumber = aNumber.toInt32();
1264  if( nNumber<0 )
1265  {
1266  nNumber = 0;
1267  }
1268  else if( aNumber.indexOf('%') >= 0 )
1269  {
1270  if( nNumber > 100 )
1271  nNumber = 100;
1272  nNumber *= 255;
1273  nNumber /= 100;
1274  }
1275  else if( nNumber > 255 )
1276  nNumber = 255;
1277 
1278  aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1279  }
1280 
1281  rColor.SetRed( aColors[0] );
1282  rColor.SetGreen( aColors[1] );
1283  rColor.SetBlue( aColors[2] );
1284 
1285  bRet = true; // something different than a colour isn't possible
1286  }
1287  break;
1288 
1289  case CSS1_IDENT:
1290  case CSS1_STRING:
1291  {
1292  OUString aTmp( aValue.toAsciiUpperCase() );
1293  nColor = GetHTMLColor( aTmp );
1294  bRet = nColor != SAL_MAX_UINT32;
1295  }
1296  if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1297  aValue[0] != '#' )
1298  break;
1299  [[fallthrough]];
1300  case CSS1_HEXCOLOR:
1301  {
1302  // MS-IE hack: colour can also be a string
1303  sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1304  bool bDouble = aValue.getLength()-nOffset == 3;
1305  sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1306 
1307  nColor = 0;
1308  for( ; i<nEnd; i++ )
1309  {
1310  sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1311  : '0' );
1312  if( c >= '0' && c <= '9' )
1313  c -= 48;
1314  else if( c >= 'A' && c <= 'F' )
1315  c -= 55;
1316  else if( c >= 'a' && c <= 'f' )
1317  c -= 87;
1318  else
1319  c = 16;
1320 
1321  nColor *= 16;
1322  if( c<16 )
1323  nColor += c;
1324  if( bDouble )
1325  {
1326  nColor *= 16;
1327  if( c<16 )
1328  nColor += c;
1329  }
1330  }
1331  bRet = true;
1332  }
1333  break;
1334  default:
1335  ;
1336  }
1337 
1338  if( bRet && nColor!=SAL_MAX_UINT32 )
1339  {
1340  rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1341  rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1342  rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1343  }
1344 
1345  return bRet;
1346 }
1347 
1348 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
sal_Unicode m_cNextCh
Definition: parcss1.hxx:184
CSS1Token GetNextToken()
Definition: parcss1.cxx:102
void SetBlue(sal_uInt8 nBlue)
OUString m_aToken
Definition: parcss1.hxx:197
sal_uIntPtr sal_uLong
CSS1ParserState m_eState
Definition: parcss1.hxx:193
CSS1Token
Definition: parcss1.hxx:29
const sal_Int32 MAX_LEN
Definition: parcss1.cxx:54
bool m_bWhiteSpace
Definition: parcss1.hxx:181
bool IsParserWorking() const
Is the parser still working?
Definition: parcss1.hxx:209
sal_uInt32 GetHTMLColor(const OUString &rName)
#define LOOP_CHECK_RESTART
Definition: parcss1.cxx:37
sal_Unicode GetNextChar()
Definition: parcss1.cxx:72
a subexpression of a CSS1 declaration
Definition: parcss1.hxx:118
#define SAL_MAX_UINT32
sal_uInt16 sal_Unicode
CSS1SelectorType
Definition: parcss1.hxx:70
void ParseStyleSheet()
Definition: parcss1.cxx:669
sal_Int32 m_nInPos
Definition: parcss1.hxx:186
DocumentType eType
CSS1Token eType
Definition: parcss1.hxx:121
int i
double m_nValue
Definition: parcss1.hxx:191
std::unique_ptr< CSS1Selector > ParseSelector()
Definition: parcss1.cxx:820
void SetRed(sal_uInt8 nRed)
void SetNext(CSS1Expression *pNxt)
Definition: parcss1.hxx:146
#define LOOP_CHECK_CHECK(where)
Definition: parcss1.cxx:39
CSS1Selector * pNext
Definition: parcss1.hxx:95
void ParseStyleOption(const OUString &rIn)
parse the content of a HTML style option
Definition: parcss1.cxx:1151
CSS1Token m_nToken
Definition: parcss1.hxx:194
bool IsEOF() const
Definition: parcss1.hxx:211
std::unique_ptr< CSS1Expression > ParseDeclaration(OUString &rProperty)
Definition: parcss1.cxx:989
OUString aValue
Definition: parcss1.hxx:122
unsigned char sal_uInt8
virtual void SelectorParsed(std::unique_ptr< CSS1Selector > pSelector, bool bFirst)
Called after a selector was parsed.
Definition: parcss1.cxx:1192
#define LOOP_CHECK_DECL
Definition: parcss1.cxx:35
bool GetColor(Color &rRGB) const
Definition: parcss1.cxx:1237
sal_uInt32 m_nlLineNr
Definition: parcss1.hxx:188
void SetGreen(sal_uInt8 nGreen)
OString strip(const OString &rIn, char c)
void ParseRule()
Definition: parcss1.cxx:730
void InitRead(const OUString &rIn)
prepare parsing
Definition: parcss1.cxx:56
OUString m_aIn
Definition: parcss1.hxx:196
void GetURL(OUString &rURL) const
Definition: parcss1.cxx:1211
virtual void DeclarationParsed(const OUString &rProperty, std::unique_ptr< CSS1Expression > pExpr)
Called after a declaration or property was parsed.
Definition: parcss1.cxx:1196
virtual ~CSS1Parser()
Definition: parcss1.cxx:1117
A simple selector.
Definition: parcss1.hxx:91
CSS1Expression * pNext
Definition: parcss1.hxx:124
bool m_bEOF
Definition: parcss1.hxx:182
void SetNext(CSS1Selector *pNxt)
Definition: parcss1.hxx:107
sal_uInt16 nPos
sal_uInt32 m_nlLinePos
Definition: parcss1.hxx:189