LibreOffice Module sw (master)  1
parcss1.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <stdlib.h>
21 #include <limits.h>
22 #include <osl/diagnose.h>
23 #include <rtl/character.hxx>
24 #include <rtl/ustrbuf.hxx>
25 #include <tools/color.hxx>
26 #include <vcl/svapp.hxx>
27 #include <svtools/htmltokn.h>
28 #include <comphelper/string.hxx>
29 #include "css1kywd.hxx"
30 #include "parcss1.hxx"
31 
32 // Loop-Check: Used to avoid infinite loops, is checked after every
33 // loop, if there is progress of the input position
34 #define LOOP_CHECK
35 
36 #ifdef LOOP_CHECK
37 
38 #define LOOP_CHECK_DECL \
39  sal_Int32 nOldInPos = SAL_MAX_INT32;
40 #define LOOP_CHECK_RESTART \
41  nOldInPos = SAL_MAX_INT32;
42 #define LOOP_CHECK_CHECK( where ) \
43  OSL_ENSURE( nOldInPos!=m_nInPos || m_cNextCh==sal_Unicode(EOF), where ); \
44  if( nOldInPos==m_nInPos && m_cNextCh!=sal_Unicode(EOF) ) \
45  break; \
46  else \
47  nOldInPos = m_nInPos;
48 
49 #else
50 
51 #define LOOP_CHECK_DECL
52 #define LOOP_CHECK_RESTART
53 #define LOOP_CHECK_CHECK( where )
54 
55 #endif
56 
57 const sal_Int32 MAX_LEN = 1024;
58 
59 void CSS1Parser::InitRead( const OUString& rIn )
60 {
61  m_nlLineNr = 0;
62  m_nlLinePos = 0;
63 
64  m_bWhiteSpace = true; // if nothing was read it's like there was WS
65  m_bEOF = false;
67  m_nValue = 0.;
68 
69  m_aIn = rIn;
70  m_nInPos = 0;
73 }
74 
76 {
77  if( m_nInPos >= m_aIn.getLength() )
78  {
79  m_bEOF = true;
80  return sal_Unicode(EOF);
81  }
82 
84  m_nInPos++;
85 
86  if( c == '\n' )
87  {
88  ++m_nlLineNr;
89  m_nlLinePos = 1;
90  }
91  else
92  ++m_nlLinePos;
93 
94  return c;
95 }
96 
97 // This function implements the scanner described in
98 
99 // http://www.w3.org/pub/WWW/TR/WD-css1.html
100 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
101 
102 // for CSS1. It's a direct implementation of the
103 // described Lex grammar.
104 
106 {
107  CSS1Token nRet = CSS1_NULL;
108  m_aToken.clear();
109 
110  do {
111  // remember if white space was read
112  bool bPrevWhiteSpace = m_bWhiteSpace;
113  m_bWhiteSpace = false;
114 
115  bool bNextCh = true;
116  switch( m_cNextCh )
117  {
118  case '/': // COMMENT | '/'
119  {
121  if( '*' == m_cNextCh )
122  {
123  // COMMENT
125 
126  bool bAsterisk = false;
127  while( !(bAsterisk && '/'==m_cNextCh) && !IsEOF() )
128  {
129  bAsterisk = ('*'==m_cNextCh);
131  }
132  }
133  else
134  {
135  // '/'
136  bNextCh = false;
137  nRet = CSS1_SLASH;
138  }
139  }
140  break;
141 
142  case '@': // '@import' | '@XXX'
143  {
145  if (rtl::isAsciiAlpha(m_cNextCh))
146  {
147  // scan the next identifier
148  OUStringBuffer sTmpBuffer(32);
149  do {
150  sTmpBuffer.append( m_cNextCh );
152  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
153  '-' == m_cNextCh) && !IsEOF() );
154 
155  m_aToken += sTmpBuffer;
156 
157  // check if we know it
158  switch( m_aToken[0] )
159  {
160  case 'i':
161  case 'I':
162  if( m_aToken.equalsIgnoreAsciiCase( "import" ) )
163  nRet = CSS1_IMPORT_SYM;
164  break;
165  case 'p':
166  case 'P':
167  if( m_aToken.equalsIgnoreAsciiCase( "page" ) )
168  nRet = CSS1_PAGE_SYM;
169  break;
170  }
171 
172  // error handling: ignore '@indent' and the rest until
173  // semicolon at end of the next block
174  if( CSS1_NULL==nRet )
175  {
176  m_aToken.clear();
177  int nBlockLvl = 0;
178  sal_Unicode cQuoteCh = 0;
179  bool bDone = false, bEscape = false;
180  while( !bDone && !IsEOF() )
181  {
182  bool bOldEscape = bEscape;
183  bEscape = false;
184  switch( m_cNextCh )
185  {
186  case '{':
187  if( !cQuoteCh && !bOldEscape )
188  nBlockLvl++;
189  break;
190  case ';':
191  if( !cQuoteCh && !bOldEscape )
192  bDone = nBlockLvl==0;
193  break;
194  case '}':
195  if( !cQuoteCh && !bOldEscape )
196  bDone = --nBlockLvl==0;
197  break;
198  case '\"':
199  case '\'':
200  if( !bOldEscape )
201  {
202  if( cQuoteCh )
203  {
204  if( cQuoteCh == m_cNextCh )
205  cQuoteCh = 0;
206  }
207  else
208  {
209  cQuoteCh = m_cNextCh;
210  }
211  }
212  break;
213  case '\\':
214  if( !bOldEscape )
215  bEscape = true;
216  break;
217  }
219  }
220  }
221 
222  bNextCh = false;
223  }
224  }
225  break;
226 
227  case '!': // '!' 'legal' | '!' 'important' | syntax error
228  {
229  // ignore white space
231  while( ( ' ' == m_cNextCh ||
232  (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
233  {
234  m_bWhiteSpace = true;
236  }
237 
238  if( 'i'==m_cNextCh || 'I'==m_cNextCh)
239  {
240  // scan next identifier
241  OUStringBuffer sTmpBuffer(32);
242  do {
243  sTmpBuffer.append( m_cNextCh );
245  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
246  '-' == m_cNextCh) && !IsEOF() );
247 
248  m_aToken += sTmpBuffer;
249 
250  if( ( 'i'==m_aToken[0] || 'I'==m_aToken[0] ) &&
251  m_aToken.equalsIgnoreAsciiCase( "important" ) )
252  {
253  // '!' 'important'
254  nRet = CSS1_IMPORTANT_SYM;
255  }
256  else
257  {
258  // error handling: ignore '!', not IDENT
259  nRet = CSS1_IDENT;
260  }
261 
262  m_bWhiteSpace = false;
263  bNextCh = false;
264  }
265  else
266  {
267  // error handling: ignore '!'
268  bNextCh = false;
269  }
270  }
271  break;
272 
273  case '\"':
274  case '\'': // STRING
275  {
276  // \... isn't possible yet!!!
277  sal_Unicode cQuoteChar = m_cNextCh;
279 
280  OUStringBuffer sTmpBuffer( MAX_LEN );
281  do {
282  sTmpBuffer.append( m_cNextCh );
284  } while( cQuoteChar != m_cNextCh && !IsEOF() );
285 
286  m_aToken += sTmpBuffer;
287 
288  nRet = CSS1_STRING;
289  }
290  break;
291 
292  case '0':
293  case '1':
294  case '2':
295  case '3':
296  case '4':
297  case '5':
298  case '6':
299  case '7':
300  case '8':
301  case '9': // NUMBER | PERCENTAGE | LENGTH
302  {
303  // save current position
304  std::size_t nInPosSave = m_nInPos;
305  sal_Unicode cNextChSave = m_cNextCh;
306  sal_uInt32 nlLineNrSave = m_nlLineNr;
307  sal_uInt32 nlLinePosSave = m_nlLinePos;
308  bool bEOFSave = m_bEOF;
309 
310  // first try to parse a hex digit
311  OUStringBuffer sTmpBuffer( 16 );
312  do {
313  sTmpBuffer.append( m_cNextCh );
315  } while( sTmpBuffer.getLength() < 7 &&
316  ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
317  ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
318  ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
319  !IsEOF() );
320 
321  if( sTmpBuffer.getLength()==6 )
322  {
323  // we found a color in hex
324  m_aToken += sTmpBuffer;
325  nRet = CSS1_HEXCOLOR;
326  bNextCh = false;
327 
328  break;
329  }
330 
331  // otherwise we try a number
332  m_nInPos = nInPosSave;
333  m_cNextCh = cNextChSave;
334  m_nlLineNr = nlLineNrSave;
335  m_nlLinePos = nlLinePosSave;
336  m_bEOF = bEOFSave;
337 
338  // first parse the number
339  sTmpBuffer.setLength( 0 );
340  do {
341  sTmpBuffer.append( m_cNextCh );
343  } while( (('0'<=m_cNextCh && '9'>=m_cNextCh) || '.'==m_cNextCh) &&
344  !IsEOF() );
345 
346  m_aToken += sTmpBuffer;
347  m_nValue = m_aToken.toDouble();
348 
349  // ignore white space
350  while( ( ' ' == m_cNextCh ||
351  (m_cNextCh >= 0x09 && m_cNextCh <= 0x0d) ) && !IsEOF() )
352  {
353  m_bWhiteSpace = true;
355  }
356 
357  // check now, of there is a unit
358  switch( m_cNextCh )
359  {
360  case '%': // PERCENTAGE
361  m_bWhiteSpace = false;
362  nRet = CSS1_PERCENTAGE;
363  break;
364 
365  case 'c':
366  case 'C': // LENGTH cm | LENGTH IDENT
367  case 'e':
368  case 'E': // LENGTH (em | ex) | LENGTH IDENT
369  case 'i':
370  case 'I': // LENGTH inch | LENGTH IDENT
371  case 'p':
372  case 'P': // LENGTH (pt | px | pc) | LENGTH IDENT
373  case 'm':
374  case 'M': // LENGTH mm | LENGTH IDENT
375  {
376  // save current position
377  sal_Int32 nInPosOld = m_nInPos;
378  sal_Unicode cNextChOld = m_cNextCh;
379  sal_uLong nlLineNrOld = m_nlLineNr;
380  sal_uLong nlLinePosOld = m_nlLinePos;
381  bool bEOFOld = m_bEOF;
382 
383  // parse the next identifier
384  OUString aIdent;
385  OUStringBuffer sTmpBuffer2(64);
386  do {
387  sTmpBuffer2.append( m_cNextCh );
389  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
390  '-' == m_cNextCh) && !IsEOF() );
391 
392  aIdent += sTmpBuffer2;
393 
394  // Is it a unit?
395  const sal_Char *pCmp1 = nullptr, *pCmp2 = nullptr, *pCmp3 = nullptr;
396  double nScale1 = 1., nScale2 = 1.;
397  CSS1Token nToken1 = CSS1_LENGTH,
398  nToken2 = CSS1_LENGTH,
399  nToken3 = CSS1_LENGTH;
400  switch( aIdent[0] )
401  {
402  case 'c':
403  case 'C':
404  pCmp1 = "cm";
405  nScale1 = (72.*20.)/2.54; // twip
406  break;
407  case 'e':
408  case 'E':
409  pCmp1 = "em";
410  nToken1 = CSS1_EMS;
411 
412  pCmp2 = "ex";
413  nToken2 = CSS1_EMX;
414  break;
415  case 'i':
416  case 'I':
417  pCmp1 = "in";
418  nScale1 = 72.*20.; // twip
419  break;
420  case 'm':
421  case 'M':
422  pCmp1 = "mm";
423  nScale1 = (72.*20.)/25.4; // twip
424  break;
425  case 'p':
426  case 'P':
427  pCmp1 = "pt";
428  nScale1 = 20.; // twip
429 
430  pCmp2 = "pc";
431  nScale2 = 12.*20.; // twip
432 
433  pCmp3 = "px";
434  nToken3 = CSS1_PIXLENGTH;
435  break;
436  }
437 
438  double nScale = 0.0;
439  OSL_ENSURE( pCmp1, "Where does the first digit come from?" );
440  if( aIdent.equalsIgnoreAsciiCaseAscii( pCmp1 ) )
441  {
442  nScale = nScale1;
443  nRet = nToken1;
444  }
445  else if( pCmp2 &&
446  aIdent.equalsIgnoreAsciiCaseAscii( pCmp2 ) )
447  {
448  nScale = nScale2;
449  nRet = nToken2;
450  }
451  else if( pCmp3 &&
452  aIdent.equalsIgnoreAsciiCaseAscii( pCmp3 ) )
453  {
454  nScale = 1.; // nScale3
455  nRet = nToken3;
456  }
457  else
458  {
459  nRet = CSS1_NUMBER;
460  }
461 
462  if( CSS1_LENGTH==nRet && nScale!=1.0 )
463  m_nValue *= nScale;
464 
465  if( nRet == CSS1_NUMBER )
466  {
467  m_nInPos = nInPosOld;
468  m_cNextCh = cNextChOld;
469  m_nlLineNr = nlLineNrOld;
470  m_nlLinePos = nlLinePosOld;
471  m_bEOF = bEOFOld;
472  }
473  else
474  {
475  m_bWhiteSpace = false;
476  }
477  bNextCh = false;
478  }
479  break;
480  default: // NUMBER IDENT
481  bNextCh = false;
482  nRet = CSS1_NUMBER;
483  break;
484  }
485  }
486  break;
487 
488  case ':': // ':'
489  // catch link/visited/active !!!
490  nRet = CSS1_COLON;
491  break;
492 
493  case '.': // DOT_W_WS | DOT_WO_WS
494  nRet = bPrevWhiteSpace ? CSS1_DOT_W_WS : CSS1_DOT_WO_WS;
495  break;
496 
497  case '+': // '+'
498  nRet = CSS1_PLUS;
499  break;
500 
501  case '-': // '-'
502  nRet = CSS1_MINUS;
503  break;
504 
505  case '{': // '{'
506  nRet = CSS1_OBRACE;
507  break;
508 
509  case '}': // '}'
510  nRet = CSS1_CBRACE;
511  break;
512 
513  case ';': // ';'
514  nRet = CSS1_SEMICOLON;
515  break;
516 
517  case ',': // ','
518  nRet = CSS1_COMMA;
519  break;
520 
521  case '#': // '#'
523  if( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
524  ('a'<=m_cNextCh && 'f'>=m_cNextCh) ||
525  ('A'<=m_cNextCh && 'F'>=m_cNextCh) )
526  {
527  // save current position
528  sal_Int32 nInPosSave = m_nInPos;
529  sal_Unicode cNextChSave = m_cNextCh;
530  sal_uLong nlLineNrSave = m_nlLineNr;
531  sal_uLong nlLinePosSave = m_nlLinePos;
532  bool bEOFSave = m_bEOF;
533 
534  // first try to parse a hex digit
535  OUStringBuffer sTmpBuffer(6);
536  do {
537  sTmpBuffer.append( m_cNextCh );
539  } while( sTmpBuffer.getLength() < 7 &&
540  ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
541  ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
542  ('a'<=m_cNextCh && 'f'>=m_cNextCh) ) &&
543  !IsEOF() );
544 
545  if( sTmpBuffer.getLength()==6 || sTmpBuffer.getLength()==3 )
546  {
547  // we found a color in hex
548  m_aToken += sTmpBuffer;
549  nRet = CSS1_HEXCOLOR;
550  bNextCh = false;
551 
552  break;
553  }
554 
555  // otherwise we try a number
556  m_nInPos = nInPosSave;
557  m_cNextCh = cNextChSave;
558  m_nlLineNr = nlLineNrSave;
559  m_nlLinePos = nlLinePosSave;
560  m_bEOF = bEOFSave;
561  }
562 
563  nRet = CSS1_HASH;
564  bNextCh = false;
565  break;
566 
567  case ' ':
568  case '\t':
569  case '\r':
570  case '\n': // White-Space
571  m_bWhiteSpace = true;
572  break;
573 
574  case sal_Unicode(EOF):
575  if( IsEOF() )
576  {
578  bNextCh = false;
579  break;
580  }
581  [[fallthrough]];
582 
583  default: // IDENT | syntax error
584  if (rtl::isAsciiAlpha(m_cNextCh))
585  {
586  // IDENT
587 
588  bool bHexColor = true;
589 
590  // parse the next identifier
591  OUStringBuffer sTmpBuffer(64);
592  do {
593  sTmpBuffer.append( m_cNextCh );
594  if( bHexColor )
595  {
596  bHexColor = sTmpBuffer.getLength()<7 &&
597  ( ('0'<=m_cNextCh && '9'>=m_cNextCh) ||
598  ('A'<=m_cNextCh && 'F'>=m_cNextCh) ||
599  ('a'<=m_cNextCh && 'f'>=m_cNextCh) );
600  }
602  } while( (rtl::isAsciiAlphanumeric(m_cNextCh) ||
603  '-' == m_cNextCh) && !IsEOF() );
604 
605  m_aToken += sTmpBuffer;
606 
607  if( bHexColor && sTmpBuffer.getLength()==6 )
608  {
609  bNextCh = false;
610  nRet = CSS1_HEXCOLOR;
611 
612  break;
613  }
614  if( '('==m_cNextCh &&
615  ( (('u'==m_aToken[0] || 'U'==m_aToken[0]) &&
616  m_aToken.equalsIgnoreAsciiCase( "url" )) ||
617  (('r'==m_aToken[0] || 'R'==m_aToken[0]) &&
618  m_aToken.equalsIgnoreAsciiCase( "rgb" )) ) )
619  {
620  int nNestCnt = 0;
621  OUStringBuffer sTmpBuffer2(64);
622  do {
623  sTmpBuffer2.append( m_cNextCh );
624  switch( m_cNextCh )
625  {
626  case '(': nNestCnt++; break;
627  case ')': nNestCnt--; break;
628  }
630  } while( (nNestCnt>1 || ')'!=m_cNextCh) && !IsEOF() );
631  sTmpBuffer2.append( m_cNextCh );
632  m_aToken += sTmpBuffer2;
633  bNextCh = true;
634  nRet = 'u'==m_aToken[0] || 'U'==m_aToken[0]
635  ? CSS1_URL
636  : CSS1_RGB;
637  }
638  else
639  {
640  bNextCh = false;
641  nRet = CSS1_IDENT;
642  }
643  }
644  // error handling: ignore digit
645  break;
646  }
647  if( bNextCh )
649 
650  } while( CSS1_NULL==nRet && IsParserWorking() );
651 
652  return nRet;
653 }
654 
655 // These functions implement the parser described in
656 
657 // http://www.w3.org/pub/WWW/TR/WD-css1.html
658 // resp. http://www.w3.org/pub/WWW/TR/WD-css1-960220.html
659 
660 // for CSS1. It's a direct implementation of the
661 // described Lex grammar.
662 
663 // stylesheet
664 // : import* rule*
665 
666 // import
667 // : IMPORT_SYM url
668 
669 // url
670 // : STRING
671 
673 {
675 
676  // import*
677  bool bDone = false;
678  while( !bDone && IsParserWorking() )
679  {
680  LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/import *" )
681 
682  switch( m_nToken )
683  {
684  case CSS1_IMPORT_SYM:
685  // IMPORT_SYM url
686  // URL are skipped without checks
688  break;
689  case CSS1_IDENT: // Look-Aheads
690  case CSS1_DOT_W_WS:
691  case CSS1_HASH:
692  case CSS1_PAGE_SYM:
693  // rule
694  bDone = true;
695  break;
696  default:
697  // error handling: ignore
698  break;
699  }
700 
701  if( !bDone )
703  }
704 
706 
707  // rule *
708  while( IsParserWorking() )
709  {
710  LOOP_CHECK_CHECK( "Infinite loop in ParseStyleSheet()/rule *" )
711 
712  switch( m_nToken )
713  {
714  case CSS1_IDENT: // Look-Aheads
715  case CSS1_DOT_W_WS:
716  case CSS1_HASH:
717  case CSS1_PAGE_SYM:
718  // rule
719  ParseRule();
720  break;
721  default:
722  // error handling: ignore
724  break;
725  }
726  }
727 }
728 
729 // rule
730 // : selector [ ',' selector ]*
731 // '{' declaration [ ';' declaration ]* '}'
732 
734 {
735  // selector
736  std::unique_ptr<CSS1Selector> pSelector = ParseSelector();
737  if( !pSelector )
738  return;
739 
740  // process selector
741  SelectorParsed( std::move(pSelector), true );
742 
744 
745  // [ ',' selector ]*
746  while( CSS1_COMMA==m_nToken && IsParserWorking() )
747  {
748  LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/selector *" )
749 
750  // ignore ','
752 
753  // selector
754  pSelector = ParseSelector();
755  if( !pSelector )
756  return;
757 
758  // process selector
759  SelectorParsed( std::move(pSelector), false );
760  }
761 
762  // '{'
763  if( CSS1_OBRACE != m_nToken )
764  return;
766 
767  // declaration
768  OUString aProperty;
769  std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
770  if( !pExpr )
771  return;
772 
773  // process expression
774  DeclarationParsed( aProperty, std::move(pExpr) );
775 
777 
778  // [ ';' declaration ]*
780  {
781  LOOP_CHECK_CHECK( "Infinite loop in ParseRule()/declaration *" )
782 
783  // ';'
785 
786  // declaration
787  if( CSS1_IDENT == m_nToken )
788  {
789  std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
790  if( pExp )
791  {
792  // process expression
793  DeclarationParsed( aProperty, std::move(pExp));
794  }
795  }
796  }
797 
798  // '}'
799  if( CSS1_CBRACE == m_nToken )
801 }
802 
803 // selector
804 // : simple_selector+ [ ':' pseudo_element ]?
805 
806 // simple_selector
807 // : element_name [ DOT_WO_WS class ]?
808 // | DOT_W_WS class
809 // | id_selector
810 
811 // element_name
812 // : IDENT
813 
814 // class
815 // : IDENT
816 
817 // id_selector
818 // : '#' IDENT
819 
820 // pseudo_element
821 // : IDENT
822 
823 std::unique_ptr<CSS1Selector> CSS1Parser::ParseSelector()
824 {
825  std::unique_ptr<CSS1Selector> pRoot;
826  CSS1Selector *pLast = nullptr;
827 
828  bool bDone = false;
829  CSS1Selector *pNew = nullptr;
830 
832 
833  // simple_selector+
834  while( !bDone && IsParserWorking() )
835  {
836  LOOP_CHECK_CHECK( "Infinite loop in ParseSelector()" )
837 
838  bool bNextToken = true;
839 
840  switch( m_nToken )
841  {
842  case CSS1_IDENT:
843  {
844  // element_name [ DOT_WO_WS class ]?
845 
846  // element_name
847  OUString aElement = m_aToken;
850 
851  if( CSS1_DOT_WO_WS == m_nToken )
852  {
853  // DOT_WO_WS
855 
856  // class
857  if( CSS1_IDENT == m_nToken )
858  {
859  aElement += "." + m_aToken;
860  eType = CSS1_SELTYPE_ELEM_CLASS;
861  }
862  else
863  {
864  // missing class
865  return pRoot;
866  }
867  }
868  else
869  {
870  // that was a look-ahead
871  bNextToken = false;
872  }
873  pNew = new CSS1Selector( eType, aElement );
874  }
875  break;
876  case CSS1_DOT_W_WS:
877  // DOT_W_WS class
878 
879  // DOT_W_WS
881 
882  if( CSS1_IDENT==m_nToken )
883  {
884  // class
886  }
887  else
888  {
889  // missing class
890  return pRoot;
891  }
892  break;
893  case CSS1_HASH:
894  // '#' id_selector
895 
896  // '#'
898 
899  if( CSS1_IDENT==m_nToken )
900  {
901  // id_selector
902  pNew = new CSS1Selector( CSS1_SELTYPE_ID, m_aToken );
903  }
904  else
905  {
906  // missing id_selector
907  return pRoot;
908  }
909  break;
910 
911  case CSS1_PAGE_SYM:
912  {
913  // @page
914  pNew = new CSS1Selector( CSS1_SELTYPE_PAGE, m_aToken );
915  }
916  break;
917 
918  default:
919  // stop because we don't know what's next
920  bDone = true;
921  break;
922  }
923 
924  // if created a new selector then save it
925  if( pNew )
926  {
927  OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
928  "Root-Selector, but no Last" );
929  if( pLast )
930  pLast->SetNext( pNew );
931  else
932  pRoot.reset(pNew);
933 
934  pLast = pNew;
935  pNew = nullptr;
936  }
937 
938  if( bNextToken && !bDone )
940  }
941 
942  if( !pRoot )
943  {
944  // missing simple_selector
945  return pRoot;
946  }
947 
948  // [ ':' pseudo_element ]?
950  {
951  // ':' pseudo element
953  if( CSS1_IDENT==m_nToken )
954  {
955  if (pLast)
958  }
959  else
960  {
961  // missing pseudo_element
962  return pRoot;
963  }
964  }
965 
966  return pRoot;
967 }
968 
969 // declaration
970 // : property ':' expr prio?
971 // | /* empty */
972 
973 // expression
974 // : term [ operator term ]*
975 
976 // term
977 // : unary_operator?
978 // [ NUMBER | STRING | PERCENTAGE | LENGTH | EMS | EXS | IDENT |
979 // HEXCOLOR | URL | RGB ]
980 
981 // operator
982 // : '/' | ',' | /* empty */
983 
984 // unary_operator
985 // : '-' | '+'
986 
987 // property
988 // : ident
989 
990 // the sign is only used for numeric values (except PERCENTAGE)
991 // and it's applied on nValue!
992 std::unique_ptr<CSS1Expression> CSS1Parser::ParseDeclaration( OUString& rProperty )
993 {
994  std::unique_ptr<CSS1Expression> pRoot;
995  CSS1Expression *pLast = nullptr;
996 
997  // property
998  if( CSS1_IDENT != m_nToken )
999  {
1000  // missing property
1001  return pRoot;
1002  }
1003  rProperty = m_aToken;
1004 
1005  m_nToken = GetNextToken();
1006 
1007  // ':'
1008  if( CSS1_COLON != m_nToken )
1009  {
1010  // missing ':'
1011  return pRoot;
1012  }
1013  m_nToken = GetNextToken();
1014 
1015  // term [operator term]*
1016  // here we're pretty lax regarding the syntax, but this shouldn't
1017  // be a problem
1018  bool bDone = false;
1019  sal_Unicode cSign = 0, cOp = 0;
1020  CSS1Expression *pNew = nullptr;
1021 
1023 
1024  while( !bDone && IsParserWorking() )
1025  {
1026  LOOP_CHECK_CHECK( "Infinite loop in ParseDeclaration()" )
1027 
1028  switch( m_nToken )
1029  {
1030  case CSS1_MINUS:
1031  cSign = '-';
1032  break;
1033 
1034  case CSS1_PLUS:
1035  cSign = '+';
1036  break;
1037 
1038  case CSS1_NUMBER:
1039  case CSS1_LENGTH:
1040  case CSS1_PIXLENGTH:
1041  case CSS1_EMS:
1042  case CSS1_EMX:
1043  if( '-'==cSign )
1044  m_nValue = -m_nValue;
1045  [[fallthrough]];
1046  case CSS1_STRING:
1047  case CSS1_PERCENTAGE:
1048  case CSS1_IDENT:
1049  case CSS1_URL:
1050  case CSS1_RGB:
1051  case CSS1_HEXCOLOR:
1052  pNew = new CSS1Expression( m_nToken, m_aToken, m_nValue, cOp );
1053  m_nValue = 0; // otherwise this also is applied to next ident
1054  cSign = 0;
1055  cOp = 0;
1056  break;
1057 
1058  case CSS1_SLASH:
1059  cOp = '/';
1060  cSign = 0;
1061  break;
1062 
1063  case CSS1_COMMA:
1064  cOp = ',';
1065  cSign = 0;
1066  break;
1067 
1068  default:
1069  bDone = true;
1070  break;
1071  }
1072 
1073  // if created a new expression save it
1074  if( pNew )
1075  {
1076  OSL_ENSURE( (pRoot!=nullptr) == (pLast!=nullptr),
1077  "Root-Selector, but no Last" );
1078  if( pLast )
1079  pLast->SetNext( pNew );
1080  else
1081  pRoot.reset(pNew);
1082 
1083  pLast = pNew;
1084  pNew = nullptr;
1085  }
1086 
1087  if( !bDone )
1088  m_nToken = GetNextToken();
1089  }
1090 
1091  if( !pRoot )
1092  {
1093  // missing term
1094  return pRoot;
1095  }
1096 
1097  // prio?
1099  {
1100  // IMPORTANT_SYM
1101  m_nToken = GetNextToken();
1102  }
1103 
1104  return pRoot;
1105 }
1106 
1108  : m_bWhiteSpace(false)
1109  , m_bEOF(false)
1110  , m_cNextCh(0)
1111  , m_nInPos(0)
1112  , m_nlLineNr(0)
1113  , m_nlLinePos(0)
1114  , m_nValue(0)
1115  , m_eState(CSS1_PAR_ACCEPTED)
1116  , m_nToken(CSS1_NULL)
1117 {
1118 }
1119 
1121 {
1122 }
1123 
1124 void CSS1Parser::ParseStyleSheet( const OUString& rIn )
1125 {
1126  OUString aTmp( rIn );
1127 
1128  sal_Unicode c;
1129  while( !aTmp.isEmpty() &&
1130  ( ' '==(c=aTmp[0]) || '\t'==c || '\r'==c || '\n'==c ) )
1131  aTmp = aTmp.copy( 1 );
1132 
1133  while( !aTmp.isEmpty() && ( ' '==(c=aTmp[aTmp.getLength()-1])
1134  || '\t'==c || '\r'==c || '\n'==c ) )
1135  aTmp = aTmp.copy( 0, aTmp.getLength()-1 );
1136 
1137  // remove SGML comments
1138  if( aTmp.getLength() >= 4 &&
1139  aTmp.startsWith( "<!--" ) )
1140  aTmp = aTmp.copy( 4 );
1141 
1142  if( aTmp.getLength() >=3 &&
1143  aTmp.endsWith("-->") )
1144  aTmp = aTmp.copy( 0, aTmp.getLength() - 3 );
1145 
1146  if( aTmp.isEmpty() )
1147  return;
1148 
1149  InitRead( aTmp );
1150 
1151  ParseStyleSheet();
1152 }
1153 
1154 void CSS1Parser::ParseStyleOption( const OUString& rIn )
1155 {
1156  if( rIn.isEmpty() )
1157  return;
1158 
1159  InitRead( rIn );
1160 
1161  // fdo#41796: skip over spurious semicolons
1162  while (CSS1_SEMICOLON == m_nToken)
1163  {
1164  m_nToken = GetNextToken();
1165  }
1166 
1167  OUString aProperty;
1168  std::unique_ptr<CSS1Expression> pExpr = ParseDeclaration( aProperty );
1169  if( !pExpr )
1170  return;
1171 
1172  // process expression
1173  DeclarationParsed( aProperty, std::move(pExpr) );
1174 
1176 
1177  // [ ';' declaration ]*
1178  while( CSS1_SEMICOLON==m_nToken && IsParserWorking() )
1179  {
1180  LOOP_CHECK_CHECK( "Infinite loop in ParseStyleOption()" )
1181 
1182  m_nToken = GetNextToken();
1183  if( CSS1_IDENT==m_nToken )
1184  {
1185  std::unique_ptr<CSS1Expression> pExp = ParseDeclaration( aProperty );
1186  if( pExp )
1187  {
1188  // process expression
1189  DeclarationParsed( aProperty, std::move(pExp) );
1190  }
1191  }
1192  }
1193 }
1194 
1195 void CSS1Parser::SelectorParsed( std::unique_ptr<CSS1Selector> /* pSelector */, bool /*bFirst*/ )
1196 {
1197 }
1198 
1199 void CSS1Parser::DeclarationParsed( const OUString& /*rProperty*/,
1200  std::unique_ptr<CSS1Expression> /* pExpr */ )
1201 {
1202 }
1203 
1205 {
1206  delete pNext;
1207 }
1208 
1210 {
1211  delete pNext;
1212 }
1213 
1214 void CSS1Expression::GetURL( OUString& rURL ) const
1215 {
1216  OSL_ENSURE( CSS1_URL==eType, "CSS1-Expression is not URL" );
1217 
1218  OSL_ENSURE( aValue.startsWithIgnoreAsciiCase( "url" ) &&
1219  aValue.getLength() > 5 &&
1220  '(' == aValue[3] &&
1221  ')' == aValue[aValue.getLength()-1],
1222  "no valid URL(...)" );
1223 
1224  if( aValue.getLength() > 5 )
1225  {
1226  rURL = aValue.copy( 4, aValue.getLength() - 5 );
1227 
1228  // tdf#94088 original stripped only spaces, but there may also be
1229  // double quotes in CSS style URLs, so be prepared to spaces followed
1230  // by a single quote followed by spaces
1231  const sal_Unicode aSpace(' ');
1232  const sal_Unicode aSingleQuote('\'');
1233 
1234  rURL = comphelper::string::strip(rURL, aSpace);
1235  rURL = comphelper::string::strip(rURL, aSingleQuote);
1236  rURL = comphelper::string::strip(rURL, aSpace);
1237  }
1238 }
1239 
1240 bool CSS1Expression::GetColor( Color &rColor ) const
1241 {
1242  OSL_ENSURE( CSS1_IDENT==eType || CSS1_RGB==eType ||
1244  "CSS1-Expression cannot be colour" );
1245 
1246  bool bRet = false;
1247  sal_uInt32 nColor = SAL_MAX_UINT32;
1248 
1249  switch( eType )
1250  {
1251  case CSS1_RGB:
1252  {
1253  sal_uInt8 aColors[3] = { 0, 0, 0 };
1254 
1255  if (!aValue.startsWithIgnoreAsciiCase( "rgb" ) || aValue.getLength() < 6 ||
1256  aValue[3] != '(' || aValue[aValue.getLength()-1] != ')')
1257  {
1258  break;
1259  }
1260 
1261  sal_Int32 nPos = 4; // start after "rgb("
1262  for ( int nCol = 0; nCol < 3 && nPos > 0; ++nCol )
1263  {
1264  const OUString aNumber = aValue.getToken(0, ',', nPos);
1265 
1266  sal_Int32 nNumber = aNumber.toInt32();
1267  if( nNumber<0 )
1268  {
1269  nNumber = 0;
1270  }
1271  else if( aNumber.indexOf('%') >= 0 )
1272  {
1273  if( nNumber > 100 )
1274  nNumber = 100;
1275  nNumber *= 255;
1276  nNumber /= 100;
1277  }
1278  else if( nNumber > 255 )
1279  nNumber = 255;
1280 
1281  aColors[nCol] = static_cast<sal_uInt8>(nNumber);
1282  }
1283 
1284  rColor.SetRed( aColors[0] );
1285  rColor.SetGreen( aColors[1] );
1286  rColor.SetBlue( aColors[2] );
1287 
1288  bRet = true; // something different than a colour isn't possible
1289  }
1290  break;
1291 
1292  case CSS1_IDENT:
1293  case CSS1_STRING:
1294  {
1295  OUString aTmp( aValue.toAsciiUpperCase() );
1296  nColor = GetHTMLColor( aTmp );
1297  bRet = nColor != SAL_MAX_UINT32;
1298  }
1299  if( bRet || CSS1_STRING != eType || aValue.isEmpty() ||
1300  aValue[0] != '#' )
1301  break;
1302  [[fallthrough]];
1303  case CSS1_HEXCOLOR:
1304  {
1305  // MS-IE hack: colour can also be a string
1306  sal_Int32 nOffset = CSS1_STRING==eType ? 1 : 0;
1307  bool bDouble = aValue.getLength()-nOffset == 3;
1308  sal_Int32 i = nOffset, nEnd = (bDouble ? 3 : 6) + nOffset;
1309 
1310  nColor = 0;
1311  for( ; i<nEnd; i++ )
1312  {
1313  sal_Unicode c = (i<aValue.getLength() ? aValue[i]
1314  : '0' );
1315  if( c >= '0' && c <= '9' )
1316  c -= 48;
1317  else if( c >= 'A' && c <= 'F' )
1318  c -= 55;
1319  else if( c >= 'a' && c <= 'f' )
1320  c -= 87;
1321  else
1322  c = 16;
1323 
1324  nColor *= 16;
1325  if( c<16 )
1326  nColor += c;
1327  if( bDouble )
1328  {
1329  nColor *= 16;
1330  if( c<16 )
1331  nColor += c;
1332  }
1333  }
1334  bRet = true;
1335  }
1336  break;
1337  default:
1338  ;
1339  }
1340 
1341  if( bRet && nColor!=SAL_MAX_UINT32 )
1342  {
1343  rColor.SetRed( static_cast<sal_uInt8>((nColor & 0x00ff0000UL) >> 16) );
1344  rColor.SetGreen( static_cast<sal_uInt8>((nColor & 0x0000ff00UL) >> 8) );
1345  rColor.SetBlue( static_cast<sal_uInt8>(nColor & 0x000000ffUL) );
1346  }
1347 
1348  return bRet;
1349 }
1350 
1351 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
sal_Unicode m_cNextCh
Definition: parcss1.hxx:184
CSS1Token GetNextToken()
Definition: parcss1.cxx:105
void SetBlue(sal_uInt8 nBlue)
OUString m_aToken
Definition: parcss1.hxx:197
sal_uIntPtr sal_uLong
CSS1ParserState m_eState
Definition: parcss1.hxx:193
CSS1Token
Definition: parcss1.hxx:29
const sal_Int32 MAX_LEN
Definition: parcss1.cxx:57
bool m_bWhiteSpace
Definition: parcss1.hxx:181
bool IsParserWorking() const
Is the parser still working?
Definition: parcss1.hxx:209
sal_uInt32 GetHTMLColor(const OUString &rName)
#define LOOP_CHECK_RESTART
Definition: parcss1.cxx:40
sal_Unicode GetNextChar()
Definition: parcss1.cxx:75
a subexpression of a CSS1 declaration
Definition: parcss1.hxx:118
#define SAL_MAX_UINT32
sal_uInt16 sal_Unicode
char sal_Char
CSS1SelectorType
Definition: parcss1.hxx:70
OString strip(const OString &rIn, sal_Char c)
void ParseStyleSheet()
Definition: parcss1.cxx:672
sal_Int32 m_nInPos
Definition: parcss1.hxx:186
CSS1Token eType
Definition: parcss1.hxx:121
double m_nValue
Definition: parcss1.hxx:191
std::unique_ptr< CSS1Selector > ParseSelector()
Definition: parcss1.cxx:823
int i
void SetRed(sal_uInt8 nRed)
void SetNext(CSS1Expression *pNxt)
Definition: parcss1.hxx:146
#define LOOP_CHECK_CHECK(where)
Definition: parcss1.cxx:42
CSS1Selector * pNext
Definition: parcss1.hxx:95
DocumentType const eType
void ParseStyleOption(const OUString &rIn)
parse the content of a HTML style option
Definition: parcss1.cxx:1154
CSS1Token m_nToken
Definition: parcss1.hxx:194
bool IsEOF() const
Definition: parcss1.hxx:211
std::unique_ptr< CSS1Expression > ParseDeclaration(OUString &rProperty)
Definition: parcss1.cxx:992
OUString aValue
Definition: parcss1.hxx:122
unsigned char sal_uInt8
virtual void SelectorParsed(std::unique_ptr< CSS1Selector > pSelector, bool bFirst)
Called after a selector was parsed.
Definition: parcss1.cxx:1195
#define LOOP_CHECK_DECL
Definition: parcss1.cxx:38
bool GetColor(Color &rRGB) const
Definition: parcss1.cxx:1240
sal_uInt32 m_nlLineNr
Definition: parcss1.hxx:188
void SetGreen(sal_uInt8 nGreen)
void ParseRule()
Definition: parcss1.cxx:733
void InitRead(const OUString &rIn)
prepare parsing
Definition: parcss1.cxx:59
OUString m_aIn
Definition: parcss1.hxx:196
void GetURL(OUString &rURL) const
Definition: parcss1.cxx:1214
virtual void DeclarationParsed(const OUString &rProperty, std::unique_ptr< CSS1Expression > pExpr)
Called after a declaration or property was parsed.
Definition: parcss1.cxx:1199
virtual ~CSS1Parser()
Definition: parcss1.cxx:1120
A simple selector.
Definition: parcss1.hxx:91
CSS1Expression * pNext
Definition: parcss1.hxx:124
bool m_bEOF
Definition: parcss1.hxx:182
sal_Int32 nPos
void SetNext(CSS1Selector *pNxt)
Definition: parcss1.hxx:107
sal_uInt32 m_nlLinePos
Definition: parcss1.hxx:189