LibreOffice Module sc (master) 1
stringutil.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <stringutil.hxx>
21#include <svl/numformat.hxx>
22#include <svl/zforlist.hxx>
23
24#include <rtl/ustrbuf.hxx>
25#include <rtl/strbuf.hxx>
26#include <rtl/math.hxx>
27
29 mpNumFormatter(nullptr),
30 mbDetectNumberFormat(true),
31 mbDetectScientificNumberFormat(true),
32 meSetTextNumFormat(Never),
33 mbHandleApostrophe(true),
34 meStartListening(sc::SingleCellListening),
35 mbCheckLinkFormula(false)
36{
37}
38
40{
43 mbHandleApostrophe = false;
45}
46
48{
51 mbHandleApostrophe = true;
53}
54
56 const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal, bool bDetectScientificNumber)
57{
58 // Actually almost the entire pre-check is unnecessary and we could call
59 // rtl::math::stringToDouble() just after having exchanged ascii space with
60 // non-breaking space, if it wasn't for check of grouped digits. The NaN
61 // and Inf cases that are accepted by stringToDouble() could be detected
62 // using std::isfinite() on the result.
63
64 /* TODO: The grouped digits check isn't even valid for locales that do not
65 * group in thousands ... e.g. Indian locales. But that's something also
66 * the number scanner doesn't implement yet, only the formatter. */
67
68 OUStringBuffer aBuf;
69
70 sal_Int32 i = 0;
71 sal_Int32 n = rStr.getLength();
72 const sal_Unicode* p = rStr.getStr();
73 const sal_Unicode* pLast = p + (n-1);
74 sal_Int32 nPosDSep = -1, nPosGSep = -1;
75 sal_uInt32 nDigitCount = 0;
76 bool haveSeenDigit = false;
77 sal_Int32 nPosExponent = -1;
78
79 // Skip preceding spaces.
80 for (i = 0; i < n; ++i, ++p)
81 {
82 sal_Unicode c = *p;
83 if (c != 0x0020 && c != 0x00A0)
84 // first non-space character. Exit.
85 break;
86 }
87
88 if (i == n)
89 // the whole string is space. Fail.
90 return false;
91
92 n -= i; // Subtract the length of the preceding spaces.
93
94 // Determine the last non-space character.
95 for (; p != pLast; --pLast, --n)
96 {
97 sal_Unicode c = *pLast;
98 if (c != 0x0020 && c != 0x00A0)
99 // Non space character. Exit.
100 break;
101 }
102
103 for (i = 0; i < n; ++i, ++p)
104 {
105 sal_Unicode c = *p;
106 if (c == 0x0020 && gsep == 0x00A0)
107 // ascii space to unicode space if that is group separator
108 c = 0x00A0;
109
110 if ('0' <= c && c <= '9')
111 {
112 // this is a digit.
113 aBuf.append(c);
114 haveSeenDigit = true;
115 ++nDigitCount;
116 }
117 else if (c == dsep || (dsepa && c == dsepa))
118 {
119 // this is a decimal separator.
120
121 if (nPosDSep >= 0)
122 // a second decimal separator -> not a valid number.
123 return false;
124
125 if (nPosGSep >= 0 && i - nPosGSep != 4)
126 // the number has a group separator and the decimal sep is not
127 // positioned correctly.
128 return false;
129
130 nPosDSep = i;
131 nPosGSep = -1;
132 aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
133 nDigitCount = 0;
134 }
135 else if (c == gsep)
136 {
137 // this is a group (thousand) separator.
138
139 if (!haveSeenDigit)
140 // not allowed before digits.
141 return false;
142
143 if (nPosDSep >= 0)
144 // not allowed after the decimal separator.
145 return false;
146
147 if (nPosGSep >= 0 && nDigitCount != 3)
148 // must be exactly 3 digits since the last group separator.
149 return false;
150
151 if (nPosExponent >= 0)
152 // not allowed in exponent.
153 return false;
154
155 nPosGSep = i;
156 nDigitCount = 0;
157 }
158 else if (c == '-' || c == '+')
159 {
160 // A sign must be the first character if it's given, or immediately
161 // follow the exponent character if present.
162 if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
163 aBuf.append(c);
164 else
165 return false;
166 }
167 else if (c == 'E' || c == 'e')
168 {
169 // this is an exponent designator.
170
171 if (nPosExponent >= 0 || !bDetectScientificNumber)
172 // Only one exponent allowed.
173 return false;
174
175 if (nPosGSep >= 0 && nDigitCount != 3)
176 // must be exactly 3 digits since the last group separator.
177 return false;
178
179 aBuf.append(c);
180 nPosExponent = i;
181 nPosDSep = -1;
182 nPosGSep = -1;
183 nDigitCount = 0;
184 }
185 else
186 return false;
187 }
188
189 // finished parsing the number.
190
191 if (nPosGSep >= 0 && nDigitCount != 3)
192 // must be exactly 3 digits since the last group separator.
193 return false;
194
195 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
196 sal_Int32 nParseEnd = 0;
197 rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
198 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
199 // Not a valid number or not entire string consumed.
200 return false;
201
202 return true;
203}
204
206 const char* p, size_t n, char dsep, char gsep, double& rVal)
207{
208 // Actually almost the entire pre-check is unnecessary and we could call
209 // rtl::math::stringToDouble() just after having exchanged ascii space with
210 // non-breaking space, if it wasn't for check of grouped digits. The NaN
211 // and Inf cases that are accepted by stringToDouble() could be detected
212 // using std::isfinite() on the result.
213
214 /* TODO: The grouped digits check isn't even valid for locales that do not
215 * group in thousands ... e.g. Indian locales. But that's something also
216 * the number scanner doesn't implement yet, only the formatter. */
217
218 OStringBuffer aBuf;
219
220 size_t i = 0;
221 const char* pLast = p + (n-1);
222 sal_Int32 nPosDSep = -1, nPosGSep = -1;
223 sal_uInt32 nDigitCount = 0;
224 bool haveSeenDigit = false;
225 sal_Int32 nPosExponent = -1;
226
227 // Skip preceding spaces.
228 for (i = 0; i < n; ++i, ++p)
229 {
230 char c = *p;
231 if (c != ' ')
232 // first non-space character. Exit.
233 break;
234 }
235
236 if (i == n)
237 // the whole string is space. Fail.
238 return false;
239
240 n -= i; // Subtract the length of the preceding spaces.
241
242 // Determine the last non-space character.
243 for (; p != pLast; --pLast, --n)
244 {
245 char c = *pLast;
246 if (c != ' ')
247 // Non space character. Exit.
248 break;
249 }
250
251 for (i = 0; i < n; ++i, ++p)
252 {
253 char c = *p;
254
255 if ('0' <= c && c <= '9')
256 {
257 // this is a digit.
258 aBuf.append(c);
259 haveSeenDigit = true;
260 ++nDigitCount;
261 }
262 else if (c == dsep)
263 {
264 // this is a decimal separator.
265
266 if (nPosDSep >= 0)
267 // a second decimal separator -> not a valid number.
268 return false;
269
270 if (nPosGSep >= 0 && i - nPosGSep != 4)
271 // the number has a group separator and the decimal sep is not
272 // positioned correctly.
273 return false;
274
275 nPosDSep = i;
276 nPosGSep = -1;
277 aBuf.append(c);
278 nDigitCount = 0;
279 }
280 else if (c == gsep)
281 {
282 // this is a group (thousand) separator.
283
284 if (!haveSeenDigit)
285 // not allowed before digits.
286 return false;
287
288 if (nPosDSep >= 0)
289 // not allowed after the decimal separator.
290 return false;
291
292 if (nPosGSep >= 0 && nDigitCount != 3)
293 // must be exactly 3 digits since the last group separator.
294 return false;
295
296 if (nPosExponent >= 0)
297 // not allowed in exponent.
298 return false;
299
300 nPosGSep = i;
301 nDigitCount = 0;
302 }
303 else if (c == '-' || c == '+')
304 {
305 // A sign must be the first character if it's given, or immediately
306 // follow the exponent character if present.
307 if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
308 aBuf.append(c);
309 else
310 return false;
311 }
312 else if (c == 'E' || c == 'e')
313 {
314 // this is an exponent designator.
315
316 if (nPosExponent >= 0)
317 // Only one exponent allowed.
318 return false;
319
320 if (nPosGSep >= 0 && nDigitCount != 3)
321 // must be exactly 3 digits since the last group separator.
322 return false;
323
324 aBuf.append(c);
325 nPosExponent = i;
326 nPosDSep = -1;
327 nPosGSep = -1;
328 nDigitCount = 0;
329 }
330 else
331 return false;
332 }
333
334 // finished parsing the number.
335
336 if (nPosGSep >= 0 && nDigitCount != 3)
337 // must be exactly 3 digits since the last group separator.
338 return false;
339
340 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
341 sal_Int32 nParseEnd = 0;
342 rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
343 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
344 // Not a valid number or not entire string consumed.
345 return false;
346
347 return true;
348}
349
350OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
351 sal_Unicode cTok, sal_Int32& rIndex )
352{
353 assert( !(rQuotedPairs.getLength()%2) );
354 assert( rQuotedPairs.indexOf(cTok) == -1 );
355
356 const sal_Unicode* pStr = rIn.getStr();
357 const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
358 sal_Unicode cQuotedEndChar = 0;
359 sal_Int32 nQuotedLen = rQuotedPairs.getLength();
360 sal_Int32 nLen = rIn.getLength();
361 sal_Int32 nTok = 0;
362 sal_Int32 nFirstChar = rIndex;
363 sal_Int32 i = nFirstChar;
364
365 // detect token position and length
366 pStr += i;
367 while ( i < nLen )
368 {
369 sal_Unicode c = *pStr;
370 if ( cQuotedEndChar )
371 {
372 // end of the quote reached ?
373 if ( c == cQuotedEndChar )
374 cQuotedEndChar = 0;
375 }
376 else
377 {
378 // Is the char a quote-begin char ?
379 sal_Int32 nQuoteIndex = 0;
380 while ( nQuoteIndex < nQuotedLen )
381 {
382 if ( pQuotedStr[nQuoteIndex] == c )
383 {
384 cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
385 break;
386 }
387 else
388 nQuoteIndex += 2;
389 }
390
391 // If the token-char matches then increase TokCount
392 if ( c == cTok )
393 {
394 ++nTok;
395
396 if ( nTok == nToken )
397 nFirstChar = i+1;
398 else
399 {
400 if ( nTok > nToken )
401 break;
402 }
403 }
404 }
405
406 ++pStr;
407 ++i;
408 }
409
410 if ( nTok >= nToken )
411 {
412 if ( i < nLen )
413 rIndex = i+1;
414 else
415 rIndex = -1;
416 return rIn.copy( nFirstChar, i-nFirstChar );
417 }
418 else
419 {
420 rIndex = -1;
421 return OUString();
422 }
423}
424
425bool ScStringUtil::isMultiline( std::u16string_view rStr )
426{
427 return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
428}
429
431 SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
432{
434 aRet.mnFormatType = SvNumFormatType::ALL;
436 aRet.maText = rStr;
437 aRet.mfValue = 0.0;
438
439 if (rStr.getLength() > 1 && rStr[0] == '=')
440 {
442 }
443 else if (rStr.getLength() > 1 && rStr[0] == '\'')
444 {
445 // for bEnglish, "'" at the beginning is always interpreted as text
446 // marker and stripped
447 aRet.maText = rStr.copy(1);
449 }
450 else // test for English number format (only)
451 {
452 sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
453
454 if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
455 {
457 aRet.mnFormatType = rFormatter.GetType(nNumFormat);
458 }
459 else if (!rStr.isEmpty())
461
462 // the (English) number format is not set
463 //TODO: find and replace with matching local format???
464 }
465
466 return aRet;
467}
468
469/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static OUString SC_DLLPUBLIC GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString &rQuotedPairs, sal_Unicode cTok, sal_Int32 &rIndex)
Definition: stringutil.cxx:350
static ScInputStringType parseInputString(SvNumberFormatter &rFormatter, const OUString &rStr, LanguageType eLang)
Definition: stringutil.cxx:430
static bool SC_DLLPUBLIC isMultiline(std::u16string_view rStr)
Definition: stringutil.cxx:425
static bool parseSimpleNumber(const OUString &rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double &rVal, bool bDetectScientificNumber=true)
Check if a given string is a simple decimal number (e.g.
Definition: stringutil.cxx:55
sal_uInt32 GetStandardIndex(LanguageType eLnge=LANGUAGE_DONTKNOW)
SvNumFormatType GetType(sal_uInt32 nFIndex) const
bool IsNumberFormat(const OUString &sString, sal_uInt32 &F_Index, double &fOutNumber, SvNumInputOptions eInputOptions=SvNumInputOptions::NONE)
float u
void * p
sal_Int64 n
aBuf
int i
CAUTION! The following defines must be in the same namespace as the respective type.
Definition: broadcast.cxx:15
@ SingleCellListening
Definition: types.hxx:126
DefTokenId nToken
Definition: qproform.cxx:397
SvNumFormatType mnFormatType
Definition: stringutil.hxx:128
StringType meType
Definition: stringutil.hxx:124
bool mbHandleApostrophe
When true, treat input with a leading apostrophe as an escape character for all content,...
Definition: stringutil.hxx:94
void setTextInput()
Call this whenever you need to unconditionally set input as text, no matter what the input is.
Definition: stringutil.cxx:39
bool mbDetectScientificNumberFormat
Definition: stringutil.hxx:80
void setNumericInput()
Call this whenever you need to maximize the chance of input being detected as a numeric value (number...
Definition: stringutil.cxx:47
bool mbDetectNumberFormat
Specify which number formats are detected: mbDetectNumberFormat=true && mbDetectScientificNumberForma...
Definition: stringutil.hxx:79
@ Always
Set Text number format if the input string can be parsed as a number or formula text.
Definition: stringutil.hxx:45
@ Never
Never set Text number format.
Definition: stringutil.hxx:62
TextFormatPolicy meSetTextNumFormat
Determine when to set the 'Text' number format to the cell where the input string is being set.
Definition: stringutil.hxx:86
sal_uInt16 sal_Unicode