LibreOffice Module sc (master)  1
stringutil.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <stringutil.hxx>
21 #include <svl/numformat.hxx>
22 #include <svl/zforlist.hxx>
23 
24 #include <rtl/ustrbuf.hxx>
25 #include <rtl/strbuf.hxx>
26 #include <rtl/math.hxx>
27 
29  mpNumFormatter(nullptr),
30  mbDetectNumberFormat(true),
31  meSetTextNumFormat(Never),
32  mbHandleApostrophe(true),
33  meStartListening(sc::SingleCellListening),
34  mbCheckLinkFormula(false)
35 {
36 }
37 
39 {
40  mbDetectNumberFormat = false;
41  mbHandleApostrophe = false;
43 }
44 
46 {
47  mbDetectNumberFormat = true;
48  mbHandleApostrophe = true;
50 }
51 
53  const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal)
54 {
55  // Actually almost the entire pre-check is unnecessary and we could call
56  // rtl::math::stringToDouble() just after having exchanged ascii space with
57  // non-breaking space, if it wasn't for check of grouped digits. The NaN
58  // and Inf cases that are accepted by stringToDouble() could be detected
59  // using std::isfinite() on the result.
60 
61  /* TODO: The grouped digits check isn't even valid for locales that do not
62  * group in thousands ... e.g. Indian locales. But that's something also
63  * the number scanner doesn't implement yet, only the formatter. */
64 
65  OUStringBuffer aBuf;
66 
67  sal_Int32 i = 0;
68  sal_Int32 n = rStr.getLength();
69  const sal_Unicode* p = rStr.getStr();
70  const sal_Unicode* pLast = p + (n-1);
71  sal_Int32 nPosDSep = -1, nPosGSep = -1;
72  sal_uInt32 nDigitCount = 0;
73  bool haveSeenDigit = false;
74  sal_Int32 nPosExponent = -1;
75 
76  // Skip preceding spaces.
77  for (i = 0; i < n; ++i, ++p)
78  {
79  sal_Unicode c = *p;
80  if (c != 0x0020 && c != 0x00A0)
81  // first non-space character. Exit.
82  break;
83  }
84 
85  if (i == n)
86  // the whole string is space. Fail.
87  return false;
88 
89  n -= i; // Subtract the length of the preceding spaces.
90 
91  // Determine the last non-space character.
92  for (; p != pLast; --pLast, --n)
93  {
94  sal_Unicode c = *pLast;
95  if (c != 0x0020 && c != 0x00A0)
96  // Non space character. Exit.
97  break;
98  }
99 
100  for (i = 0; i < n; ++i, ++p)
101  {
102  sal_Unicode c = *p;
103  if (c == 0x0020 && gsep == 0x00A0)
104  // ascii space to unicode space if that is group separator
105  c = 0x00A0;
106 
107  if ('0' <= c && c <= '9')
108  {
109  // this is a digit.
110  aBuf.append(c);
111  haveSeenDigit = true;
112  ++nDigitCount;
113  }
114  else if (c == dsep || (dsepa && c == dsepa))
115  {
116  // this is a decimal separator.
117 
118  if (nPosDSep >= 0)
119  // a second decimal separator -> not a valid number.
120  return false;
121 
122  if (nPosGSep >= 0 && i - nPosGSep != 4)
123  // the number has a group separator and the decimal sep is not
124  // positioned correctly.
125  return false;
126 
127  nPosDSep = i;
128  nPosGSep = -1;
129  aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
130  nDigitCount = 0;
131  }
132  else if (c == gsep)
133  {
134  // this is a group (thousand) separator.
135 
136  if (!haveSeenDigit)
137  // not allowed before digits.
138  return false;
139 
140  if (nPosDSep >= 0)
141  // not allowed after the decimal separator.
142  return false;
143 
144  if (nPosGSep >= 0 && nDigitCount != 3)
145  // must be exactly 3 digits since the last group separator.
146  return false;
147 
148  if (nPosExponent >= 0)
149  // not allowed in exponent.
150  return false;
151 
152  nPosGSep = i;
153  nDigitCount = 0;
154  }
155  else if (c == '-' || c == '+')
156  {
157  // A sign must be the first character if it's given, or immediately
158  // follow the exponent character if present.
159  if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
160  aBuf.append(c);
161  else
162  return false;
163  }
164  else if (c == 'E' || c == 'e')
165  {
166  // this is an exponent designator.
167 
168  if (nPosExponent >= 0)
169  // Only one exponent allowed.
170  return false;
171 
172  if (nPosGSep >= 0 && nDigitCount != 3)
173  // must be exactly 3 digits since the last group separator.
174  return false;
175 
176  aBuf.append(c);
177  nPosExponent = i;
178  nPosDSep = -1;
179  nPosGSep = -1;
180  nDigitCount = 0;
181  }
182  else
183  return false;
184  }
185 
186  // finished parsing the number.
187 
188  if (nPosGSep >= 0 && nDigitCount != 3)
189  // must be exactly 3 digits since the last group separator.
190  return false;
191 
192  rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
193  sal_Int32 nParseEnd = 0;
194  rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
195  if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
196  // Not a valid number or not entire string consumed.
197  return false;
198 
199  return true;
200 }
201 
203  const char* p, size_t n, char dsep, char gsep, double& rVal)
204 {
205  // Actually almost the entire pre-check is unnecessary and we could call
206  // rtl::math::stringToDouble() just after having exchanged ascii space with
207  // non-breaking space, if it wasn't for check of grouped digits. The NaN
208  // and Inf cases that are accepted by stringToDouble() could be detected
209  // using std::isfinite() on the result.
210 
211  /* TODO: The grouped digits check isn't even valid for locales that do not
212  * group in thousands ... e.g. Indian locales. But that's something also
213  * the number scanner doesn't implement yet, only the formatter. */
214 
215  OStringBuffer aBuf;
216 
217  size_t i = 0;
218  const char* pLast = p + (n-1);
219  sal_Int32 nPosDSep = -1, nPosGSep = -1;
220  sal_uInt32 nDigitCount = 0;
221  bool haveSeenDigit = false;
222  sal_Int32 nPosExponent = -1;
223 
224  // Skip preceding spaces.
225  for (i = 0; i < n; ++i, ++p)
226  {
227  char c = *p;
228  if (c != ' ')
229  // first non-space character. Exit.
230  break;
231  }
232 
233  if (i == n)
234  // the whole string is space. Fail.
235  return false;
236 
237  n -= i; // Subtract the length of the preceding spaces.
238 
239  // Determine the last non-space character.
240  for (; p != pLast; --pLast, --n)
241  {
242  char c = *pLast;
243  if (c != ' ')
244  // Non space character. Exit.
245  break;
246  }
247 
248  for (i = 0; i < n; ++i, ++p)
249  {
250  char c = *p;
251 
252  if ('0' <= c && c <= '9')
253  {
254  // this is a digit.
255  aBuf.append(c);
256  haveSeenDigit = true;
257  ++nDigitCount;
258  }
259  else if (c == dsep)
260  {
261  // this is a decimal separator.
262 
263  if (nPosDSep >= 0)
264  // a second decimal separator -> not a valid number.
265  return false;
266 
267  if (nPosGSep >= 0 && i - nPosGSep != 4)
268  // the number has a group separator and the decimal sep is not
269  // positioned correctly.
270  return false;
271 
272  nPosDSep = i;
273  nPosGSep = -1;
274  aBuf.append(c);
275  nDigitCount = 0;
276  }
277  else if (c == gsep)
278  {
279  // this is a group (thousand) separator.
280 
281  if (!haveSeenDigit)
282  // not allowed before digits.
283  return false;
284 
285  if (nPosDSep >= 0)
286  // not allowed after the decimal separator.
287  return false;
288 
289  if (nPosGSep >= 0 && nDigitCount != 3)
290  // must be exactly 3 digits since the last group separator.
291  return false;
292 
293  if (nPosExponent >= 0)
294  // not allowed in exponent.
295  return false;
296 
297  nPosGSep = i;
298  nDigitCount = 0;
299  }
300  else if (c == '-' || c == '+')
301  {
302  // A sign must be the first character if it's given, or immediately
303  // follow the exponent character if present.
304  if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
305  aBuf.append(c);
306  else
307  return false;
308  }
309  else if (c == 'E' || c == 'e')
310  {
311  // this is an exponent designator.
312 
313  if (nPosExponent >= 0)
314  // Only one exponent allowed.
315  return false;
316 
317  if (nPosGSep >= 0 && nDigitCount != 3)
318  // must be exactly 3 digits since the last group separator.
319  return false;
320 
321  aBuf.append(c);
322  nPosExponent = i;
323  nPosDSep = -1;
324  nPosGSep = -1;
325  nDigitCount = 0;
326  }
327  else
328  return false;
329  }
330 
331  // finished parsing the number.
332 
333  if (nPosGSep >= 0 && nDigitCount != 3)
334  // must be exactly 3 digits since the last group separator.
335  return false;
336 
337  rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
338  sal_Int32 nParseEnd = 0;
339  rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
340  if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
341  // Not a valid number or not entire string consumed.
342  return false;
343 
344  return true;
345 }
346 
347 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
348  sal_Unicode cTok, sal_Int32& rIndex )
349 {
350  assert( !(rQuotedPairs.getLength()%2) );
351  assert( rQuotedPairs.indexOf(cTok) == -1 );
352 
353  const sal_Unicode* pStr = rIn.getStr();
354  const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
355  sal_Unicode cQuotedEndChar = 0;
356  sal_Int32 nQuotedLen = rQuotedPairs.getLength();
357  sal_Int32 nLen = rIn.getLength();
358  sal_Int32 nTok = 0;
359  sal_Int32 nFirstChar = rIndex;
360  sal_Int32 i = nFirstChar;
361 
362  // detect token position and length
363  pStr += i;
364  while ( i < nLen )
365  {
366  sal_Unicode c = *pStr;
367  if ( cQuotedEndChar )
368  {
369  // end of the quote reached ?
370  if ( c == cQuotedEndChar )
371  cQuotedEndChar = 0;
372  }
373  else
374  {
375  // Is the char a quote-begin char ?
376  sal_Int32 nQuoteIndex = 0;
377  while ( nQuoteIndex < nQuotedLen )
378  {
379  if ( pQuotedStr[nQuoteIndex] == c )
380  {
381  cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
382  break;
383  }
384  else
385  nQuoteIndex += 2;
386  }
387 
388  // If the token-char matches then increase TokCount
389  if ( c == cTok )
390  {
391  ++nTok;
392 
393  if ( nTok == nToken )
394  nFirstChar = i+1;
395  else
396  {
397  if ( nTok > nToken )
398  break;
399  }
400  }
401  }
402 
403  ++pStr;
404  ++i;
405  }
406 
407  if ( nTok >= nToken )
408  {
409  if ( i < nLen )
410  rIndex = i+1;
411  else
412  rIndex = -1;
413  return rIn.copy( nFirstChar, i-nFirstChar );
414  }
415  else
416  {
417  rIndex = -1;
418  return OUString();
419  }
420 }
421 
422 bool ScStringUtil::isMultiline( std::u16string_view rStr )
423 {
424  return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
425 }
426 
428  SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
429 {
430  ScInputStringType aRet;
431  aRet.mnFormatType = SvNumFormatType::ALL;
433  aRet.maText = rStr;
434  aRet.mfValue = 0.0;
435 
436  if (rStr.getLength() > 1 && rStr[0] == '=')
437  {
439  }
440  else if (rStr.getLength() > 1 && rStr[0] == '\'')
441  {
442  // for bEnglish, "'" at the beginning is always interpreted as text
443  // marker and stripped
444  aRet.maText = rStr.copy(1);
446  }
447  else // test for English number format (only)
448  {
449  sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
450 
451  if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
452  {
454  aRet.mnFormatType = rFormatter.GetType(nNumFormat);
455  }
456  else if (!rStr.isEmpty())
458 
459  // the (English) number format is not set
460  //TODO: find and replace with matching local format???
461  }
462 
463  return aRet;
464 }
465 
466 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static bool parseSimpleNumber(const OUString &rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double &rVal)
Check if a given string is a simple decimal number (e.g.
Definition: stringutil.cxx:52
bool mbHandleApostrophe
When true, treat input with a leading apostrophe as an escape character for a numeric value content...
Definition: stringutil.hxx:91
void setTextInput()
Call this whenever you need to unconditionally set input as text, no matter what the input is...
Definition: stringutil.cxx:38
sal_Int64 n
aBuf
SvNumFormatType GetType(sal_uInt32 nFIndex) const
TextFormatPolicy meSetTextNumFormat
Determine when to set the 'Text' number format to the cell where the input string is being set...
Definition: stringutil.hxx:83
sal_uInt16 sal_Unicode
sal_uInt32 GetStandardIndex(LanguageType eLnge=LANGUAGE_DONTKNOW)
static ScInputStringType parseInputString(SvNumberFormatter &rFormatter, const OUString &rStr, LanguageType eLang)
Definition: stringutil.cxx:427
StringType meType
Definition: stringutil.hxx:121
int i
CAUTION! The following defines must be in the same namespace as the respective type.
float u
bool IsNumberFormat(const OUString &sString, sal_uInt32 &F_Index, double &fOutNumber, SvNumInputOptions eInputOptions=SvNumInputOptions::NONE)
static bool SC_DLLPUBLIC isMultiline(std::u16string_view rStr)
Definition: stringutil.cxx:422
DefTokenId nToken
Definition: qproform.cxx:397
bool mbDetectNumberFormat
When true, we try to detect special number format (dates etc) from the input string, when false, we only try to detect a basic decimal number format.
Definition: stringutil.hxx:77
SvNumFormatType mnFormatType
Definition: stringutil.hxx:125
static OUString SC_DLLPUBLIC GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString &rQuotedPairs, sal_Unicode cTok, sal_Int32 &rIndex)
Definition: stringutil.cxx:347
void * p
Set Text number format if the input string can be parsed as a number or formula text.
Definition: stringutil.hxx:45
Never set Text number format.
Definition: stringutil.hxx:62
void setNumericInput()
Call this whenever you need to maximize the chance of input being detected as a numeric value (number...
Definition: stringutil.cxx:45
bool m_bDetectedRangeSegmentation false