LibreOffice Module sc (master) 1
stringutil.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <stringutil.hxx>
21#include <svl/numformat.hxx>
22#include <svl/zforlist.hxx>
23
24#include <rtl/ustrbuf.hxx>
25#include <rtl/strbuf.hxx>
26#include <rtl/math.hxx>
27
29 mpNumFormatter(nullptr),
30 mbDetectNumberFormat(true),
31 meSetTextNumFormat(Never),
32 mbHandleApostrophe(true),
33 meStartListening(sc::SingleCellListening),
34 mbCheckLinkFormula(false)
35{
36}
37
39{
41 mbHandleApostrophe = false;
43}
44
46{
48 mbHandleApostrophe = true;
50}
51
53 const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal)
54{
55 // Actually almost the entire pre-check is unnecessary and we could call
56 // rtl::math::stringToDouble() just after having exchanged ascii space with
57 // non-breaking space, if it wasn't for check of grouped digits. The NaN
58 // and Inf cases that are accepted by stringToDouble() could be detected
59 // using std::isfinite() on the result.
60
61 /* TODO: The grouped digits check isn't even valid for locales that do not
62 * group in thousands ... e.g. Indian locales. But that's something also
63 * the number scanner doesn't implement yet, only the formatter. */
64
65 OUStringBuffer aBuf;
66
67 sal_Int32 i = 0;
68 sal_Int32 n = rStr.getLength();
69 const sal_Unicode* p = rStr.getStr();
70 const sal_Unicode* pLast = p + (n-1);
71 sal_Int32 nPosDSep = -1, nPosGSep = -1;
72 sal_uInt32 nDigitCount = 0;
73 bool haveSeenDigit = false;
74 sal_Int32 nPosExponent = -1;
75
76 // Skip preceding spaces.
77 for (i = 0; i < n; ++i, ++p)
78 {
79 sal_Unicode c = *p;
80 if (c != 0x0020 && c != 0x00A0)
81 // first non-space character. Exit.
82 break;
83 }
84
85 if (i == n)
86 // the whole string is space. Fail.
87 return false;
88
89 n -= i; // Subtract the length of the preceding spaces.
90
91 // Determine the last non-space character.
92 for (; p != pLast; --pLast, --n)
93 {
94 sal_Unicode c = *pLast;
95 if (c != 0x0020 && c != 0x00A0)
96 // Non space character. Exit.
97 break;
98 }
99
100 for (i = 0; i < n; ++i, ++p)
101 {
102 sal_Unicode c = *p;
103 if (c == 0x0020 && gsep == 0x00A0)
104 // ascii space to unicode space if that is group separator
105 c = 0x00A0;
106
107 if ('0' <= c && c <= '9')
108 {
109 // this is a digit.
110 aBuf.append(c);
111 haveSeenDigit = true;
112 ++nDigitCount;
113 }
114 else if (c == dsep || (dsepa && c == dsepa))
115 {
116 // this is a decimal separator.
117
118 if (nPosDSep >= 0)
119 // a second decimal separator -> not a valid number.
120 return false;
121
122 if (nPosGSep >= 0 && i - nPosGSep != 4)
123 // the number has a group separator and the decimal sep is not
124 // positioned correctly.
125 return false;
126
127 nPosDSep = i;
128 nPosGSep = -1;
129 aBuf.append(dsep); // append the separator that is parsed in stringToDouble() below
130 nDigitCount = 0;
131 }
132 else if (c == gsep)
133 {
134 // this is a group (thousand) separator.
135
136 if (!haveSeenDigit)
137 // not allowed before digits.
138 return false;
139
140 if (nPosDSep >= 0)
141 // not allowed after the decimal separator.
142 return false;
143
144 if (nPosGSep >= 0 && nDigitCount != 3)
145 // must be exactly 3 digits since the last group separator.
146 return false;
147
148 if (nPosExponent >= 0)
149 // not allowed in exponent.
150 return false;
151
152 nPosGSep = i;
153 nDigitCount = 0;
154 }
155 else if (c == '-' || c == '+')
156 {
157 // A sign must be the first character if it's given, or immediately
158 // follow the exponent character if present.
159 if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))
160 aBuf.append(c);
161 else
162 return false;
163 }
164 else if (c == 'E' || c == 'e')
165 {
166 // this is an exponent designator.
167
168 if (nPosExponent >= 0)
169 // Only one exponent allowed.
170 return false;
171
172 if (nPosGSep >= 0 && nDigitCount != 3)
173 // must be exactly 3 digits since the last group separator.
174 return false;
175
176 aBuf.append(c);
177 nPosExponent = i;
178 nPosDSep = -1;
179 nPosGSep = -1;
180 nDigitCount = 0;
181 }
182 else
183 return false;
184 }
185
186 // finished parsing the number.
187
188 if (nPosGSep >= 0 && nDigitCount != 3)
189 // must be exactly 3 digits since the last group separator.
190 return false;
191
192 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
193 sal_Int32 nParseEnd = 0;
194 rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
195 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
196 // Not a valid number or not entire string consumed.
197 return false;
198
199 return true;
200}
201
203 const char* p, size_t n, char dsep, char gsep, double& rVal)
204{
205 // Actually almost the entire pre-check is unnecessary and we could call
206 // rtl::math::stringToDouble() just after having exchanged ascii space with
207 // non-breaking space, if it wasn't for check of grouped digits. The NaN
208 // and Inf cases that are accepted by stringToDouble() could be detected
209 // using std::isfinite() on the result.
210
211 /* TODO: The grouped digits check isn't even valid for locales that do not
212 * group in thousands ... e.g. Indian locales. But that's something also
213 * the number scanner doesn't implement yet, only the formatter. */
214
215 OStringBuffer aBuf;
216
217 size_t i = 0;
218 const char* pLast = p + (n-1);
219 sal_Int32 nPosDSep = -1, nPosGSep = -1;
220 sal_uInt32 nDigitCount = 0;
221 bool haveSeenDigit = false;
222 sal_Int32 nPosExponent = -1;
223
224 // Skip preceding spaces.
225 for (i = 0; i < n; ++i, ++p)
226 {
227 char c = *p;
228 if (c != ' ')
229 // first non-space character. Exit.
230 break;
231 }
232
233 if (i == n)
234 // the whole string is space. Fail.
235 return false;
236
237 n -= i; // Subtract the length of the preceding spaces.
238
239 // Determine the last non-space character.
240 for (; p != pLast; --pLast, --n)
241 {
242 char c = *pLast;
243 if (c != ' ')
244 // Non space character. Exit.
245 break;
246 }
247
248 for (i = 0; i < n; ++i, ++p)
249 {
250 char c = *p;
251
252 if ('0' <= c && c <= '9')
253 {
254 // this is a digit.
255 aBuf.append(c);
256 haveSeenDigit = true;
257 ++nDigitCount;
258 }
259 else if (c == dsep)
260 {
261 // this is a decimal separator.
262
263 if (nPosDSep >= 0)
264 // a second decimal separator -> not a valid number.
265 return false;
266
267 if (nPosGSep >= 0 && i - nPosGSep != 4)
268 // the number has a group separator and the decimal sep is not
269 // positioned correctly.
270 return false;
271
272 nPosDSep = i;
273 nPosGSep = -1;
274 aBuf.append(c);
275 nDigitCount = 0;
276 }
277 else if (c == gsep)
278 {
279 // this is a group (thousand) separator.
280
281 if (!haveSeenDigit)
282 // not allowed before digits.
283 return false;
284
285 if (nPosDSep >= 0)
286 // not allowed after the decimal separator.
287 return false;
288
289 if (nPosGSep >= 0 && nDigitCount != 3)
290 // must be exactly 3 digits since the last group separator.
291 return false;
292
293 if (nPosExponent >= 0)
294 // not allowed in exponent.
295 return false;
296
297 nPosGSep = i;
298 nDigitCount = 0;
299 }
300 else if (c == '-' || c == '+')
301 {
302 // A sign must be the first character if it's given, or immediately
303 // follow the exponent character if present.
304 if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))
305 aBuf.append(c);
306 else
307 return false;
308 }
309 else if (c == 'E' || c == 'e')
310 {
311 // this is an exponent designator.
312
313 if (nPosExponent >= 0)
314 // Only one exponent allowed.
315 return false;
316
317 if (nPosGSep >= 0 && nDigitCount != 3)
318 // must be exactly 3 digits since the last group separator.
319 return false;
320
321 aBuf.append(c);
322 nPosExponent = i;
323 nPosDSep = -1;
324 nPosGSep = -1;
325 nDigitCount = 0;
326 }
327 else
328 return false;
329 }
330
331 // finished parsing the number.
332
333 if (nPosGSep >= 0 && nDigitCount != 3)
334 // must be exactly 3 digits since the last group separator.
335 return false;
336
337 rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;
338 sal_Int32 nParseEnd = 0;
339 rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);
340 if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())
341 // Not a valid number or not entire string consumed.
342 return false;
343
344 return true;
345}
346
347OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,
348 sal_Unicode cTok, sal_Int32& rIndex )
349{
350 assert( !(rQuotedPairs.getLength()%2) );
351 assert( rQuotedPairs.indexOf(cTok) == -1 );
352
353 const sal_Unicode* pStr = rIn.getStr();
354 const sal_Unicode* pQuotedStr = rQuotedPairs.getStr();
355 sal_Unicode cQuotedEndChar = 0;
356 sal_Int32 nQuotedLen = rQuotedPairs.getLength();
357 sal_Int32 nLen = rIn.getLength();
358 sal_Int32 nTok = 0;
359 sal_Int32 nFirstChar = rIndex;
360 sal_Int32 i = nFirstChar;
361
362 // detect token position and length
363 pStr += i;
364 while ( i < nLen )
365 {
366 sal_Unicode c = *pStr;
367 if ( cQuotedEndChar )
368 {
369 // end of the quote reached ?
370 if ( c == cQuotedEndChar )
371 cQuotedEndChar = 0;
372 }
373 else
374 {
375 // Is the char a quote-begin char ?
376 sal_Int32 nQuoteIndex = 0;
377 while ( nQuoteIndex < nQuotedLen )
378 {
379 if ( pQuotedStr[nQuoteIndex] == c )
380 {
381 cQuotedEndChar = pQuotedStr[nQuoteIndex+1];
382 break;
383 }
384 else
385 nQuoteIndex += 2;
386 }
387
388 // If the token-char matches then increase TokCount
389 if ( c == cTok )
390 {
391 ++nTok;
392
393 if ( nTok == nToken )
394 nFirstChar = i+1;
395 else
396 {
397 if ( nTok > nToken )
398 break;
399 }
400 }
401 }
402
403 ++pStr;
404 ++i;
405 }
406
407 if ( nTok >= nToken )
408 {
409 if ( i < nLen )
410 rIndex = i+1;
411 else
412 rIndex = -1;
413 return rIn.copy( nFirstChar, i-nFirstChar );
414 }
415 else
416 {
417 rIndex = -1;
418 return OUString();
419 }
420}
421
422bool ScStringUtil::isMultiline( std::u16string_view rStr )
423{
424 return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;
425}
426
428 SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )
429{
431 aRet.mnFormatType = SvNumFormatType::ALL;
433 aRet.maText = rStr;
434 aRet.mfValue = 0.0;
435
436 if (rStr.getLength() > 1 && rStr[0] == '=')
437 {
439 }
440 else if (rStr.getLength() > 1 && rStr[0] == '\'')
441 {
442 // for bEnglish, "'" at the beginning is always interpreted as text
443 // marker and stripped
444 aRet.maText = rStr.copy(1);
446 }
447 else // test for English number format (only)
448 {
449 sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);
450
451 if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))
452 {
454 aRet.mnFormatType = rFormatter.GetType(nNumFormat);
455 }
456 else if (!rStr.isEmpty())
458
459 // the (English) number format is not set
460 //TODO: find and replace with matching local format???
461 }
462
463 return aRet;
464}
465
466/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
static bool parseSimpleNumber(const OUString &rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double &rVal)
Check if a given string is a simple decimal number (e.g.
Definition: stringutil.cxx:52
static OUString SC_DLLPUBLIC GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString &rQuotedPairs, sal_Unicode cTok, sal_Int32 &rIndex)
Definition: stringutil.cxx:347
static ScInputStringType parseInputString(SvNumberFormatter &rFormatter, const OUString &rStr, LanguageType eLang)
Definition: stringutil.cxx:427
static bool SC_DLLPUBLIC isMultiline(std::u16string_view rStr)
Definition: stringutil.cxx:422
sal_uInt32 GetStandardIndex(LanguageType eLnge=LANGUAGE_DONTKNOW)
SvNumFormatType GetType(sal_uInt32 nFIndex) const
bool IsNumberFormat(const OUString &sString, sal_uInt32 &F_Index, double &fOutNumber, SvNumInputOptions eInputOptions=SvNumInputOptions::NONE)
float u
void * p
sal_Int64 n
aBuf
int i
CAUTION! The following defines must be in the same namespace as the respective type.
@ SingleCellListening
Definition: types.hxx:126
DefTokenId nToken
Definition: qproform.cxx:397
SvNumFormatType mnFormatType
Definition: stringutil.hxx:125
StringType meType
Definition: stringutil.hxx:121
bool mbHandleApostrophe
When true, treat input with a leading apostrophe as an escape character for all content,...
Definition: stringutil.hxx:91
void setTextInput()
Call this whenever you need to unconditionally set input as text, no matter what the input is.
Definition: stringutil.cxx:38
void setNumericInput()
Call this whenever you need to maximize the chance of input being detected as a numeric value (number...
Definition: stringutil.cxx:45
bool mbDetectNumberFormat
When true, we try to detect special number format (dates etc) from the input string,...
Definition: stringutil.hxx:77
@ Always
Set Text number format if the input string can be parsed as a number or formula text.
Definition: stringutil.hxx:45
@ Never
Never set Text number format.
Definition: stringutil.hxx:62
TextFormatPolicy meSetTextNumFormat
Determine when to set the 'Text' number format to the cell where the input string is being set.
Definition: stringutil.hxx:83
sal_uInt16 sal_Unicode