LibreOffice Module sc (master)  1
stringutil.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <stringutil.hxx>
21 #include <global.hxx>
22 #include <svl/zforlist.hxx>
23 
24 #include <rtl/ustrbuf.hxx>
25 #include <rtl/strbuf.hxx>
26 #include <rtl/math.hxx>
27 
29  mpNumFormatter(nullptr),
30  mbDetectNumberFormat(true),
31  meSetTextNumFormat(Never),
32  mbHandleApostrophe(true),
33  meStartListening(sc::SingleCellListening),
34  mbCheckLinkFormula(false)
35 {
36 }
37 
39 {
40  mbDetectNumberFormat = false;
41  mbHandleApostrophe = false;
43 }
44 
46 {
47  mbDetectNumberFormat = true;
48  mbHandleApostrophe = true;
50 }
51 
53  const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal)
54 {
55  // Actually almost the entire pre-check is unnecessary and we could call
56  // rtl::math::stringToDouble() just after having exchanged ascii space with
57  // non-breaking space, if it wasn't for check of grouped digits. The NaN
58  // and Inf cases that are accepted by stringToDouble() could be detected
59  // using std::isfinite() on the result.
60 
61  /* TODO: The grouped digits check isn't even valid for locales that do not
62  * group in thousands ... e.g. Indian locales. But that's something also
63  * the number scanner doesn't implement yet, only the formatter. */
64 
65  OUStringBuffer aBuf;
66 
67  sal_Int32 i = 0;
68  sal_Int32 n = rStr.getLength();
69  const sal_Unicode* p = rStr.getStr();
70  const sal_Unicode* pLast = p + (n-1);
71  sal_Int32 nPosDSep = -1, nPosGSep = -1;
72  sal_uInt32 nDigitCount = 0;
73  bool haveSeenDigit = false;
74  sal_Int32 nPosExponent = -1;
75 
76  // Skip preceding spaces.
77  for (i = 0; i < n; ++i, ++p)
78  {
79  sal_Unicode c = *p;
80  if (c != 0x0020 && c != 0x00A0)
81  // first non-space character. Exit.
82  break;
83  }
84 
85  if (i == n)
86  // the whole string is space. Fail.
87  return false;
88 
89  n -= i; // Subtract the length of the preceding spaces.
90 
91  // Determine the last non-space character.
92  for (; p != pLast; --pLast, --n)
93  {
94  sal_Unicode c = *pLast;
95  if (c != 0x0020 && c != 0x00A0)
96  // Non space character. Exit.
97  break;
98  }
99 
100  for (i = 0; i < n; ++i, ++p)
101  {
102  sal_Unicode c = *p;
103  if (c == 0x0020 && gsep == 0x00A0)
104  // ascii space to unicode space if that is group separator
105  c = 0x00A0;
106 
107  if ('0' <= c && c <= '9')
108  {
109  // this is a digit.
110  aBuf.append(c);
111  haveSeenDigit = true;
112  ++nDigitCount;
113  }
114  else if (c == dsep || (dsepa && c == dsepa))
115  {
116  // this is a decimal separator.
117 
118  if (nPosDSep >= 0)
119  // a second decimal separator -> not a valid number.
120  return false;
121 
122  if (nPosGSep >= 0 && i - nPosGSep != 4)
123  // the number has a group separator and the decimal sep is not
124  // positioned correctly.
125  return false;
126 
127  nPosDSep = i;
128  nPosGSep = -1;
129  aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
130  nDigitCount = 0;
131  }
132  else if (c == gsep)
133  {
134  // this is a group (thousand) separator.
135 
136  if (!haveSeenDigit)
137  // not allowed before digits.
138  return false;
139 
140  if (nPosDSep >= 0)
141  // not allowed after the decimal separator.
142  return false;
143 
144  if (nPosGSep >= 0 && nDigitCount != 3)
145  // must be exactly 3 digits since the last group separator.
146  return false;
147 
148  if (nPosExponent >= 0)
149  // not allowed in exponent.
150  return false;
151 
152  nPosGSep = i;
153  nDigitCount = 0;
154  }
155  else if (c == '-' || c == '+')
156  {
157  // A sign must be the first character if it's given, or immediately
158  // follow the exponent character if present.
159  if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
160  aBuf.append(c);
161  else
162  return false;
163  }
164  else if (c == 'E' || c == 'e')
165  {
166  // this is an exponent designator.
167 
168  if (nPosExponent >= 0)
169  // Only one exponent allowed.
170  return false;
171 
172  if (nPosGSep >= 0 && nDigitCount != 3)
173  // must be exactly 3 digits since the last group separator.
174  return false;
175 
176  aBuf.append(c);
177  nPosExponent = i;
178  nPosDSep = -1;
179  nPosGSep = -1;
180  nDigitCount = 0;
181  }
182  else
183  return false;
184  }
185 
186  // finished parsing the number.
187 
188  if (nPosGSep >= 0 && nDigitCount != 3)
189  // must be exactly 3 digits since the last group separator.
190  return false;
191 
192  rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
193  sal_Int32 nParseEnd = 0;
194  OUString aString( aBuf.makeStringAndClear());
195  rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
196  if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
197  // Not a valid number or not entire string consumed.
198  return false;
199 
200  return true;
201 }
202 
204  const char* p, size_t n, char dsep, char gsep, double& rVal)
205 {
206  // Actually almost the entire pre-check is unnecessary and we could call
207  // rtl::math::stringToDouble() just after having exchanged ascii space with
208  // non-breaking space, if it wasn't for check of grouped digits. The NaN
209  // and Inf cases that are accepted by stringToDouble() could be detected
210  // using std::isfinite() on the result.
211 
212  /* TODO: The grouped digits check isn't even valid for locales that do not
213  * group in thousands ... e.g. Indian locales. But that's something also
214  * the number scanner doesn't implement yet, only the formatter. */
215 
216  OStringBuffer aBuf;
217 
218  size_t i = 0;
219  const char* pLast = p + (n-1);
220  sal_Int32 nPosDSep = -1, nPosGSep = -1;
221  sal_uInt32 nDigitCount = 0;
222  bool haveSeenDigit = false;
223  sal_Int32 nPosExponent = -1;
224 
225  // Skip preceding spaces.
226  for (i = 0; i < n; ++i, ++p)
227  {
228  char c = *p;
229  if (c != ' ')
230  // first non-space character. Exit.
231  break;
232  }
233 
234  if (i == n)
235  // the whole string is space. Fail.
236  return false;
237 
238  n -= i; // Subtract the length of the preceding spaces.
239 
240  // Determine the last non-space character.
241  for (; p != pLast; --pLast, --n)
242  {
243  char c = *pLast;
244  if (c != ' ')
245  // Non space character. Exit.
246  break;
247  }
248 
249  for (i = 0; i < n; ++i, ++p)
250  {
251  char c = *p;
252 
253  if ('0' <= c && c <= '9')
254  {
255  // this is a digit.
256  aBuf.append(c);
257  haveSeenDigit = true;
258  ++nDigitCount;
259  }
260  else if (c == dsep)
261  {
262  // this is a decimal separator.
263 
264  if (nPosDSep >= 0)
265  // a second decimal separator -> not a valid number.
266  return false;
267 
268  if (nPosGSep >= 0 && i - nPosGSep != 4)
269  // the number has a group separator and the decimal sep is not
270  // positioned correctly.
271  return false;
272 
273  nPosDSep = i;
274  nPosGSep = -1;
275  aBuf.append(c);
276  nDigitCount = 0;
277  }
278  else if (c == gsep)
279  {
280  // this is a group (thousand) separator.
281 
282  if (!haveSeenDigit)
283  // not allowed before digits.
284  return false;
285 
286  if (nPosDSep >= 0)
287  // not allowed after the decimal separator.
288  return false;
289 
290  if (nPosGSep >= 0 && nDigitCount != 3)
291  // must be exactly 3 digits since the last group separator.
292  return false;
293 
294  if (nPosExponent >= 0)
295  // not allowed in exponent.
296  return false;
297 
298  nPosGSep = i;
299  nDigitCount = 0;
300  }
301  else if (c == '-' || c == '+')
302  {
303  // A sign must be the first character if it's given, or immediately
304  // follow the exponent character if present.
305  if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
306  aBuf.append(c);
307  else
308  return false;
309  }
310  else if (c == 'E' || c == 'e')
311  {
312  // this is an exponent designator.
313 
314  if (nPosExponent >= 0)
315  // Only one exponent allowed.
316  return false;
317 
318  if (nPosGSep >= 0 && nDigitCount != 3)
319  // must be exactly 3 digits since the last group separator.
320  return false;
321 
322  aBuf.append(c);
323  nPosExponent = i;
324  nPosDSep = -1;
325  nPosGSep = -1;
326  nDigitCount = 0;
327  }
328  else
329  return false;
330  }
331 
332  // finished parsing the number.
333 
334  if (nPosGSep >= 0 && nDigitCount != 3)
335  // must be exactly 3 digits since the last group separator.
336  return false;
337 
338  rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
339  sal_Int32 nParseEnd = 0;
340  OString aString( aBuf.makeStringAndClear());
341  rVal = ::rtl::math::stringToDouble( aString, dsep, gsep, &eStatus, &nParseEnd);
342  if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aString.getLength())
343  // Not a valid number or not entire string consumed.
344  return false;
345 
346  return true;
347 }
348 
349 OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
350  sal_Unicode cTok, sal_Int32& rIndex )
351 {
352  assert( !(rQuotedPairs.getLength()%2) );
353  assert( rQuotedPairs.indexOf(cTok) == -1 );
354 
355  const sal_Unicode* pStr = rIn.getStr();
356  const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
357  sal_Unicode cQuotedEndChar = 0;
358  sal_Int32 nQuotedLen = rQuotedPairs.getLength();
359  sal_Int32 nLen = rIn.getLength();
360  sal_Int32 nTok = 0;
361  sal_Int32 nFirstChar = rIndex;
362  sal_Int32 i = nFirstChar;
363 
364  // detect token position and length
365  pStr += i;
366  while ( i < nLen )
367  {
368  sal_Unicode c = *pStr;
369  if ( cQuotedEndChar )
370  {
371  // end of the quote reached ?
372  if ( c == cQuotedEndChar )
373  cQuotedEndChar = 0;
374  }
375  else
376  {
377  // Is the char a quote-begin char ?
378  sal_Int32 nQuoteIndex = 0;
379  while ( nQuoteIndex < nQuotedLen )
380  {
381  if ( pQuotedStr[nQuoteIndex] == c )
382  {
383  cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
384  break;
385  }
386  else
387  nQuoteIndex += 2;
388  }
389 
390  // If the token-char matches then increase TokCount
391  if ( c == cTok )
392  {
393  ++nTok;
394 
395  if ( nTok == nToken )
396  nFirstChar = i+1;
397  else
398  {
399  if ( nTok > nToken )
400  break;
401  }
402  }
403  }
404 
405  ++pStr;
406  ++i;
407  }
408 
409  if ( nTok >= nToken )
410  {
411  if ( i < nLen )
412  rIndex = i+1;
413  else
414  rIndex = -1;
415  return rIn.copy( nFirstChar, i-nFirstChar );
416  }
417  else
418  {
419  rIndex = -1;
420  return OUString();
421  }
422 }
423 
424 bool ScStringUtil::isMultiline( const OUString& rStr )
425 {
426  if (rStr.indexOf('\n') != -1)
427  return true;
428 
429  if (rStr.indexOf('\r') != -1)
430  return true;
431 
432  return false;
433 }
434 
436  SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
437 {
438  ScInputStringType aRet;
439  aRet.mnFormatType = SvNumFormatType::ALL;
441  aRet.maText = rStr;
442  aRet.mfValue = 0.0;
443 
444  if (rStr.getLength() > 1 && rStr[0] == '=')
445  {
447  }
448  else if (rStr.getLength() > 1 && rStr[0] == '\'')
449  {
450  // for bEnglish, "'" at the beginning is always interpreted as text
451  // marker and stripped
452  aRet.maText = rStr.copy(1);
454  }
455  else // test for English number format (only)
456  {
457  sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
458 
459  if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
460  {
462  aRet.mnFormatType = rFormatter.GetType(nNumFormat);
463  }
464  else if (!rStr.isEmpty())
466 
467  // the (English) number format is not set
468  //TODO: find and replace with matching local format???
469  }
470 
471  return aRet;
472 }
473 
474 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static bool parseSimpleNumber(const OUString &rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double &rVal)
Check if a given string is a simple decimal number (e.g.
Definition: stringutil.cxx:52
bool mbHandleApostrophe
When true, treat input with a leading apostrophe as an escape character for a numeric value content...
Definition: stringutil.hxx:92
void setTextInput()
Call this whenever you need to unconditionally set input as text, no matter what the input is...
Definition: stringutil.cxx:38
sal_Int64 n
aBuf
SvNumFormatType GetType(sal_uInt32 nFIndex) const
TextFormatPolicy meSetTextNumFormat
Determine when to set the 'Text' number format to the cell where the input string is being set...
Definition: stringutil.hxx:84
static bool SC_DLLPUBLIC isMultiline(const OUString &rStr)
Definition: stringutil.cxx:424
sal_uInt16 sal_Unicode
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
sal_uInt32 GetStandardIndex(LanguageType eLnge=LANGUAGE_DONTKNOW)
static ScInputStringType parseInputString(SvNumberFormatter &rFormatter, const OUString &rStr, LanguageType eLang)
Definition: stringutil.cxx:435
StringType meType
Definition: stringutil.hxx:122
int i
bool IsNumberFormat(const OUString &sString, sal_uInt32 &F_Index, double &fOutNumber, SvNumInputOptions eInputOptions=SvNumInputOptions::NONE)
DefTokenId nToken
Definition: qproform.cxx:400
bool mbDetectNumberFormat
When true, we try to detect special number format (dates etc) from the input string, when false, we only try to detect a basic decimal number format.
Definition: stringutil.hxx:78
SvNumFormatType mnFormatType
Definition: stringutil.hxx:126
static OUString SC_DLLPUBLIC GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString &rQuotedPairs, sal_Unicode cTok, sal_Int32 &rIndex)
Definition: stringutil.cxx:349
void * p
Set Text number format if the input string can be parsed as a number or formula text.
Definition: stringutil.hxx:46
Never set Text number format.
Definition: stringutil.hxx:63
void setNumericInput()
Call this whenever you need to maximize the chance of input being detected as a numeric value (number...
Definition: stringutil.cxx:45