LibreOffice Module sc (master)  1
stringutil.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <stringutil.hxx>
21 #include <svl/zforlist.hxx>
22 
23 #include <rtl/ustrbuf.hxx>
24 #include <rtl/strbuf.hxx>
25 #include <rtl/math.hxx>
26 
28  mpNumFormatter(nullptr),
29  mbDetectNumberFormat(true),
30  meSetTextNumFormat(Never),
31  mbHandleApostrophe(true),
32  meStartListening(sc::SingleCellListening),
33  mbCheckLinkFormula(false)
34 {
35 }
36 
38 {
39  mbDetectNumberFormat = false;
40  mbHandleApostrophe = false;
42 }
43 
45 {
46  mbDetectNumberFormat = true;
47  mbHandleApostrophe = true;
49 }
50 
52  const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal)
53 {
54  // Actually almost the entire pre-check is unnecessary and we could call
55  // rtl::math::stringToDouble() just after having exchanged ascii space with
56  // non-breaking space, if it wasn't for check of grouped digits. The NaN
57  // and Inf cases that are accepted by stringToDouble() could be detected
58  // using std::isfinite() on the result.
59 
60  /* TODO: The grouped digits check isn't even valid for locales that do not
61  * group in thousands ... e.g. Indian locales. But that's something also
62  * the number scanner doesn't implement yet, only the formatter. */
63 
64  OUStringBuffer aBuf;
65 
66  sal_Int32 i = 0;
67  sal_Int32 n = rStr.getLength();
68  const sal_Unicode* p = rStr.getStr();
69  const sal_Unicode* pLast = p + (n-1);
70  sal_Int32 nPosDSep = -1, nPosGSep = -1;
71  sal_uInt32 nDigitCount = 0;
72  bool haveSeenDigit = false;
73  sal_Int32 nPosExponent = -1;
74 
75  // Skip preceding spaces.
76  for (i = 0; i < n; ++i, ++p)
77  {
78  sal_Unicode c = *p;
79  if (c != 0x0020 && c != 0x00A0)
80  // first non-space character. Exit.
81  break;
82  }
83 
84  if (i == n)
85  // the whole string is space. Fail.
86  return false;
87 
88  n -= i; // Subtract the length of the preceding spaces.
89 
90  // Determine the last non-space character.
91  for (; p != pLast; --pLast, --n)
92  {
93  sal_Unicode c = *pLast;
94  if (c != 0x0020 && c != 0x00A0)
95  // Non space character. Exit.
96  break;
97  }
98 
99  for (i = 0; i < n; ++i, ++p)
100  {
101  sal_Unicode c = *p;
102  if (c == 0x0020 && gsep == 0x00A0)
103  // ascii space to unicode space if that is group separator
104  c = 0x00A0;
105 
106  if ('0' <= c && c <= '9')
107  {
108  // this is a digit.
109  aBuf.append(c);
110  haveSeenDigit = true;
111  ++nDigitCount;
112  }
113  else if (c == dsep || (dsepa && c == dsepa))
114  {
115  // this is a decimal separator.
116 
117  if (nPosDSep >= 0)
118  // a second decimal separator -> not a valid number.
119  return false;
120 
121  if (nPosGSep >= 0 && i - nPosGSep != 4)
122  // the number has a group separator and the decimal sep is not
123  // positioned correctly.
124  return false;
125 
126  nPosDSep = i;
127  nPosGSep = -1;
128  aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
129  nDigitCount = 0;
130  }
131  else if (c == gsep)
132  {
133  // this is a group (thousand) separator.
134 
135  if (!haveSeenDigit)
136  // not allowed before digits.
137  return false;
138 
139  if (nPosDSep >= 0)
140  // not allowed after the decimal separator.
141  return false;
142 
143  if (nPosGSep >= 0 && nDigitCount != 3)
144  // must be exactly 3 digits since the last group separator.
145  return false;
146 
147  if (nPosExponent >= 0)
148  // not allowed in exponent.
149  return false;
150 
151  nPosGSep = i;
152  nDigitCount = 0;
153  }
154  else if (c == '-' || c == '+')
155  {
156  // A sign must be the first character if it's given, or immediately
157  // follow the exponent character if present.
158  if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
159  aBuf.append(c);
160  else
161  return false;
162  }
163  else if (c == 'E' || c == 'e')
164  {
165  // this is an exponent designator.
166 
167  if (nPosExponent >= 0)
168  // Only one exponent allowed.
169  return false;
170 
171  if (nPosGSep >= 0 && nDigitCount != 3)
172  // must be exactly 3 digits since the last group separator.
173  return false;
174 
175  aBuf.append(c);
176  nPosExponent = i;
177  nPosDSep = -1;
178  nPosGSep = -1;
179  nDigitCount = 0;
180  }
181  else
182  return false;
183  }
184 
185  // finished parsing the number.
186 
187  if (nPosGSep >= 0 && nDigitCount != 3)
188  // must be exactly 3 digits since the last group separator.
189  return false;
190 
191  rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
192  sal_Int32 nParseEnd = 0;
193  OUString aString( aBuf.makeStringAndClear());
194  rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
195  if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
196  // Not a valid number or not entire string consumed.
197  return false;
198 
199  return true;
200 }
201 
203  const char* p, size_t n, char dsep, char gsep, double& rVal)
204 {
205  // Actually almost the entire pre-check is unnecessary and we could call
206  // rtl::math::stringToDouble() just after having exchanged ascii space with
207  // non-breaking space, if it wasn't for check of grouped digits. The NaN
208  // and Inf cases that are accepted by stringToDouble() could be detected
209  // using std::isfinite() on the result.
210 
211  /* TODO: The grouped digits check isn't even valid for locales that do not
212  * group in thousands ... e.g. Indian locales. But that's something also
213  * the number scanner doesn't implement yet, only the formatter. */
214 
215  OStringBuffer aBuf;
216 
217  size_t i = 0;
218  const char* pLast = p + (n-1);
219  sal_Int32 nPosDSep = -1, nPosGSep = -1;
220  sal_uInt32 nDigitCount = 0;
221  bool haveSeenDigit = false;
222  sal_Int32 nPosExponent = -1;
223 
224  // Skip preceding spaces.
225  for (i = 0; i < n; ++i, ++p)
226  {
227  char c = *p;
228  if (c != ' ')
229  // first non-space character. Exit.
230  break;
231  }
232 
233  if (i == n)
234  // the whole string is space. Fail.
235  return false;
236 
237  n -= i; // Subtract the length of the preceding spaces.
238 
239  // Determine the last non-space character.
240  for (; p != pLast; --pLast, --n)
241  {
242  char c = *pLast;
243  if (c != ' ')
244  // Non space character. Exit.
245  break;
246  }
247 
248  for (i = 0; i < n; ++i, ++p)
249  {
250  char c = *p;
251 
252  if ('0' <= c && c <= '9')
253  {
254  // this is a digit.
255  aBuf.append(c);
256  haveSeenDigit = true;
257  ++nDigitCount;
258  }
259  else if (c == dsep)
260  {
261  // this is a decimal separator.
262 
263  if (nPosDSep >= 0)
264  // a second decimal separator -> not a valid number.
265  return false;
266 
267  if (nPosGSep >= 0 && i - nPosGSep != 4)
268  // the number has a group separator and the decimal sep is not
269  // positioned correctly.
270  return false;
271 
272  nPosDSep = i;
273  nPosGSep = -1;
274  aBuf.append(c);
275  nDigitCount = 0;
276  }
277  else if (c == gsep)
278  {
279  // this is a group (thousand) separator.
280 
281  if (!haveSeenDigit)
282  // not allowed before digits.
283  return false;
284 
285  if (nPosDSep >= 0)
286  // not allowed after the decimal separator.
287  return false;
288 
289  if (nPosGSep >= 0 && nDigitCount != 3)
290  // must be exactly 3 digits since the last group separator.
291  return false;
292 
293  if (nPosExponent >= 0)
294  // not allowed in exponent.
295  return false;
296 
297  nPosGSep = i;
298  nDigitCount = 0;
299  }
300  else if (c == '-' || c == '+')
301  {
302  // A sign must be the first character if it's given, or immediately
303  // follow the exponent character if present.
304  if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
305  aBuf.append(c);
306  else
307  return false;
308  }
309  else if (c == 'E' || c == 'e')
310  {
311  // this is an exponent designator.
312 
313  if (nPosExponent >= 0)
314  // Only one exponent allowed.
315  return false;
316 
317  if (nPosGSep >= 0 && nDigitCount != 3)
318  // must be exactly 3 digits since the last group separator.
319  return false;
320 
321  aBuf.append(c);
322  nPosExponent = i;
323  nPosDSep = -1;
324  nPosGSep = -1;
325  nDigitCount = 0;
326  }
327  else
328  return false;
329  }
330 
331  // finished parsing the number.
332 
333  if (nPosGSep >= 0 && nDigitCount != 3)
334  // must be exactly 3 digits since the last group separator.
335  return false;
336 
337  rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
338  sal_Int32 nParseEnd = 0;
339  OString aString( aBuf.makeStringAndClear());
340  rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
341  if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
342  // Not a valid number or not entire string consumed.
343  return false;
344 
345  return true;
346 }
347 
348 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
349  sal_Unicode cTok, sal_Int32& rIndex )
350 {
351  assert( !(rQuotedPairs.getLength()%2) );
352  assert( rQuotedPairs.indexOf(cTok) == -1 );
353 
354  const sal_Unicode* pStr = rIn.getStr();
355  const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
356  sal_Unicode cQuotedEndChar = 0;
357  sal_Int32 nQuotedLen = rQuotedPairs.getLength();
358  sal_Int32 nLen = rIn.getLength();
359  sal_Int32 nTok = 0;
360  sal_Int32 nFirstChar = rIndex;
361  sal_Int32 i = nFirstChar;
362 
363  // detect token position and length
364  pStr += i;
365  while ( i < nLen )
366  {
367  sal_Unicode c = *pStr;
368  if ( cQuotedEndChar )
369  {
370  // end of the quote reached ?
371  if ( c == cQuotedEndChar )
372  cQuotedEndChar = 0;
373  }
374  else
375  {
376  // Is the char a quote-begin char ?
377  sal_Int32 nQuoteIndex = 0;
378  while ( nQuoteIndex < nQuotedLen )
379  {
380  if ( pQuotedStr[nQuoteIndex] == c )
381  {
382  cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
383  break;
384  }
385  else
386  nQuoteIndex += 2;
387  }
388 
389  // If the token-char matches then increase TokCount
390  if ( c == cTok )
391  {
392  ++nTok;
393 
394  if ( nTok == nToken )
395  nFirstChar = i+1;
396  else
397  {
398  if ( nTok > nToken )
399  break;
400  }
401  }
402  }
403 
404  ++pStr;
405  ++i;
406  }
407 
408  if ( nTok >= nToken )
409  {
410  if ( i < nLen )
411  rIndex = i+1;
412  else
413  rIndex = -1;
414  return rIn.copy( nFirstChar, i-nFirstChar );
415  }
416  else
417  {
418  rIndex = -1;
419  return OUString();
420  }
421 }
422 
423 bool ScStringUtil::isMultiline( const OUString& rStr )
424 {
425  if (rStr.indexOf('\n') != -1)
426  return true;
427 
428  if (rStr.indexOf('\r') != -1)
429  return true;
430 
431  return false;
432 }
433 
435  SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
436 {
437  ScInputStringType aRet;
438  aRet.mnFormatType = SvNumFormatType::ALL;
440  aRet.maText = rStr;
441  aRet.mfValue = 0.0;
442 
443  if (rStr.getLength() > 1 && rStr[0] == '=')
444  {
446  }
447  else if (rStr.getLength() > 1 && rStr[0] == '\'')
448  {
449  // for bEnglish, "'" at the beginning is always interpreted as text
450  // marker and stripped
451  aRet.maText = rStr.copy(1);
453  }
454  else // test for English number format (only)
455  {
456  sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
457 
458  if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
459  {
461  aRet.mnFormatType = rFormatter.GetType(nNumFormat);
462  }
463  else if (!rStr.isEmpty())
465 
466  // the (English) number format is not set
467  //TODO: find and replace with matching local format???
468  }
469 
470  return aRet;
471 }
472 
473 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static bool parseSimpleNumber(const OUString &rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double &rVal)
Check if a given string is a simple decimal number (e.g.
Definition: stringutil.cxx:51
bool mbHandleApostrophe
When true, treat input with a leading apostrophe as an escape character for a numeric value content...
Definition: stringutil.hxx:91
void setTextInput()
Call this whenever you need to unconditionally set input as text, no matter what the input is...
Definition: stringutil.cxx:37
sal_Int64 n
aBuf
SvNumFormatType GetType(sal_uInt32 nFIndex) const
TextFormatPolicy meSetTextNumFormat
Determine when to set the 'Text' number format to the cell where the input string is being set...
Definition: stringutil.hxx:83
static bool SC_DLLPUBLIC isMultiline(const OUString &rStr)
Definition: stringutil.cxx:423
sal_uInt16 sal_Unicode
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
sal_uInt32 GetStandardIndex(LanguageType eLnge=LANGUAGE_DONTKNOW)
static ScInputStringType parseInputString(SvNumberFormatter &rFormatter, const OUString &rStr, LanguageType eLang)
Definition: stringutil.cxx:434
StringType meType
Definition: stringutil.hxx:121
int i
bool IsNumberFormat(const OUString &sString, sal_uInt32 &F_Index, double &fOutNumber, SvNumInputOptions eInputOptions=SvNumInputOptions::NONE)
DefTokenId nToken
Definition: qproform.cxx:399
bool mbDetectNumberFormat
When true, we try to detect special number format (dates etc) from the input string, when false, we only try to detect a basic decimal number format.
Definition: stringutil.hxx:77
SvNumFormatType mnFormatType
Definition: stringutil.hxx:125
static OUString SC_DLLPUBLIC GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString &rQuotedPairs, sal_Unicode cTok, sal_Int32 &rIndex)
Definition: stringutil.cxx:348
void * p
Set Text number format if the input string can be parsed as a number or formula text.
Definition: stringutil.hxx:45
Never set Text number format.
Definition: stringutil.hxx:62
void setNumericInput()
Call this whenever you need to maximize the chance of input being detected as a numeric value (number...
Definition: stringutil.cxx:44