sc/html/stringutil_8cxx_source.html

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/*

 * This file is part of the LibreOffice project.

 *

 * This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/.

 *

 * This file incorporates work covered by the following license notice:

 *

 *   Licensed to the Apache Software Foundation (ASF) under one or more

 *   contributor license agreements. See the NOTICE file distributed

 *   with this work for additional information regarding copyright

 *   ownership. The ASF licenses this file to you under the Apache

 *   License, Version 2.0 (the "License"); you may not use this file

 *   except in compliance with the License. You may obtain a copy of

 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .

 */


#include <stringutil.hxx>

#include <svl/numformat.hxx>

#include <svl/zforlist.hxx>


#include <rtl/ustrbuf.hxx>

#include <rtl/strbuf.hxx>

#include <rtl/math.hxx>


ScSetStringParam::ScSetStringParam() :

    mpNumFormatter(nullptr),

    mbDetectNumberFormat(true),

    mbDetectScientificNumberFormat(true),

    meSetTextNumFormat(Never),

    mbHandleApostrophe(true),

    meStartListening(sc::SingleCellListening),

    mbCheckLinkFormula(false)

{

}


void ScSetStringParam::setTextInput()

{

    mbDetectNumberFormat = false;

    mbDetectScientificNumberFormat = false;

    mbHandleApostrophe = false;

    meSetTextNumFormat = Always;

}


void ScSetStringParam::setNumericInput()

{

    mbDetectNumberFormat = true;

    mbDetectScientificNumberFormat = true;

    mbHandleApostrophe = true;

    meSetTextNumFormat = Never;

}


bool ScStringUtil::parseSimpleNumber(

    const OUString& rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double& rVal, bool bDetectScientificNumber)

{

    // Actually almost the entire pre-check is unnecessary and we could call

    // rtl::math::stringToDouble() just after having exchanged ascii space with

    // non-breaking space, if it wasn't for check of grouped digits. The NaN

    // and Inf cases that are accepted by stringToDouble() could be detected

    // using std::isfinite() on the result.


    /* TODO: The grouped digits check isn't even valid for locales that do not

     * group in thousands ... e.g. Indian locales. But that's something also

     * the number scanner doesn't implement yet, only the formatter. */


    OUStringBuffer aBuf;


    sal_Int32 i = 0;

    sal_Int32 n = rStr.getLength();

    const sal_Unicode* p = rStr.getStr();

    const sal_Unicode* pLast = p + (n-1);

    sal_Int32 nPosDSep = -1, nPosGSep = -1;

    sal_uInt32 nDigitCount = 0;

    bool haveSeenDigit = false;

    sal_Int32 nPosExponent = -1;


    // Skip preceding spaces.

    for (i = 0; i < n; ++i, ++p)

    {

        sal_Unicode c = *p;

        if (c != 0x0020 && c != 0x00A0)

            // first non-space character.  Exit.

            break;

    }


    if (i == n)

        // the whole string is space.  Fail.

        return false;


    n -= i; // Subtract the length of the preceding spaces.


    // Determine the last non-space character.

    for (; p != pLast; --pLast, --n)

    {

        sal_Unicode c = *pLast;

        if (c != 0x0020 && c != 0x00A0)

            // Non space character. Exit.

            break;

    }


    for (i = 0; i < n; ++i, ++p)

    {

        sal_Unicode c = *p;

        if (c == 0x0020 && gsep == 0x00A0)

            // ascii space to unicode space if that is group separator

            c = 0x00A0;


        if ('0' <= c && c <= '9')

        {

            // this is a digit.

            aBuf.append(c);

            haveSeenDigit = true;

            ++nDigitCount;

        }

        else if (c == dsep || (dsepa && c == dsepa))

        {

            // this is a decimal separator.


            if (nPosDSep >= 0)

                // a second decimal separator -> not a valid number.

                return false;


            if (nPosGSep >= 0 && i - nPosGSep != 4)

                // the number has a group separator and the decimal sep is not

                // positioned correctly.

                return false;


            nPosDSep = i;

            nPosGSep = -1;

            aBuf.append(dsep);  // append the separator that is parsed in stringToDouble() below

            nDigitCount = 0;

        }

        else if (c == gsep)

        {

            // this is a group (thousand) separator.


            if (!haveSeenDigit)

                // not allowed before digits.

                return false;


            if (nPosDSep >= 0)

                // not allowed after the decimal separator.

                return false;


            if (nPosGSep >= 0 && nDigitCount != 3)

                // must be exactly 3 digits since the last group separator.

                return false;


            if (nPosExponent >= 0)

                // not allowed in exponent.

                return false;


            nPosGSep = i;

            nDigitCount = 0;

        }

        else if (c == '-' || c == '+')

        {

            // A sign must be the first character if it's given, or immediately

            // follow the exponent character if present.

            if (i == 0 || (nPosExponent >= 0 && i == nPosExponent + 1))

                aBuf.append(c);

            else

                return false;

        }

        else if (c == 'E' || c == 'e')

        {

            // this is an exponent designator.


            if (nPosExponent >= 0 || !bDetectScientificNumber)

                // Only one exponent allowed.

                return false;


            if (nPosGSep >= 0 && nDigitCount != 3)

                // must be exactly 3 digits since the last group separator.

                return false;


            aBuf.append(c);

            nPosExponent = i;

            nPosDSep = -1;

            nPosGSep = -1;

            nDigitCount = 0;

        }

        else

            return false;

    }


    // finished parsing the number.


    if (nPosGSep >= 0 && nDigitCount != 3)

        // must be exactly 3 digits since the last group separator.

        return false;


    rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;

    sal_Int32 nParseEnd = 0;

    rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);

    if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())

        // Not a valid number or not entire string consumed.

        return false;


    return true;

}


bool ScStringUtil::parseSimpleNumber(

    const char* p, size_t n, char dsep, char gsep, double& rVal)

{

    // Actually almost the entire pre-check is unnecessary and we could call

    // rtl::math::stringToDouble() just after having exchanged ascii space with

    // non-breaking space, if it wasn't for check of grouped digits. The NaN

    // and Inf cases that are accepted by stringToDouble() could be detected

    // using std::isfinite() on the result.


    /* TODO: The grouped digits check isn't even valid for locales that do not

     * group in thousands ... e.g. Indian locales. But that's something also

     * the number scanner doesn't implement yet, only the formatter. */


    OStringBuffer aBuf;


    size_t i = 0;

    const char* pLast = p + (n-1);

    sal_Int32 nPosDSep = -1, nPosGSep = -1;

    sal_uInt32 nDigitCount = 0;

    bool haveSeenDigit = false;

    sal_Int32 nPosExponent = -1;


    // Skip preceding spaces.

    for (i = 0; i < n; ++i, ++p)

    {

        char c = *p;

        if (c != ' ')

            // first non-space character.  Exit.

            break;

    }


    if (i == n)

        // the whole string is space.  Fail.

        return false;


    n -= i; // Subtract the length of the preceding spaces.


    // Determine the last non-space character.

    for (; p != pLast; --pLast, --n)

    {

        char c = *pLast;

        if (c != ' ')

            // Non space character. Exit.

            break;

    }


    for (i = 0; i < n; ++i, ++p)

    {

        char c = *p;


        if ('0' <= c && c <= '9')

        {

            // this is a digit.

            aBuf.append(c);

            haveSeenDigit = true;

            ++nDigitCount;

        }

        else if (c == dsep)

        {

            // this is a decimal separator.


            if (nPosDSep >= 0)

                // a second decimal separator -> not a valid number.

                return false;


            if (nPosGSep >= 0 && i - nPosGSep != 4)

                // the number has a group separator and the decimal sep is not

                // positioned correctly.

                return false;


            nPosDSep = i;

            nPosGSep = -1;

            aBuf.append(c);

            nDigitCount = 0;

        }

        else if (c == gsep)

        {

            // this is a group (thousand) separator.


            if (!haveSeenDigit)

                // not allowed before digits.

                return false;


            if (nPosDSep >= 0)

                // not allowed after the decimal separator.

                return false;


            if (nPosGSep >= 0 && nDigitCount != 3)

                // must be exactly 3 digits since the last group separator.

                return false;


            if (nPosExponent >= 0)

                // not allowed in exponent.

                return false;


            nPosGSep = i;

            nDigitCount = 0;

        }

        else if (c == '-' || c == '+')

        {

            // A sign must be the first character if it's given, or immediately

            // follow the exponent character if present.

            if (i == 0 || (nPosExponent >= 0 && i == static_cast<size_t>(nPosExponent+1)))

                aBuf.append(c);

            else

                return false;

        }

        else if (c == 'E' || c == 'e')

        {

            // this is an exponent designator.


            if (nPosExponent >= 0)

                // Only one exponent allowed.

                return false;


            if (nPosGSep >= 0 && nDigitCount != 3)

                // must be exactly 3 digits since the last group separator.

                return false;


            aBuf.append(c);

            nPosExponent = i;

            nPosDSep = -1;

            nPosGSep = -1;

            nDigitCount = 0;

        }

        else

            return false;

    }


    // finished parsing the number.


    if (nPosGSep >= 0 && nDigitCount != 3)

        // must be exactly 3 digits since the last group separator.

        return false;


    rtl_math_ConversionStatus eStatus = rtl_math_ConversionStatus_Ok;

    sal_Int32 nParseEnd = 0;

    rVal = ::rtl::math::stringToDouble( aBuf, dsep, gsep, &eStatus, &nParseEnd);

    if (eStatus != rtl_math_ConversionStatus_Ok || nParseEnd < aBuf.getLength())

        // Not a valid number or not entire string consumed.

        return false;


    return true;

}


OUString ScStringUtil::GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString& rQuotedPairs,

                               sal_Unicode cTok, sal_Int32& rIndex )

{

    assert( !(rQuotedPairs.getLength()%2) );

    assert( rQuotedPairs.indexOf(cTok) == -1 );


    const sal_Unicode*  pStr            = rIn.getStr();

    const sal_Unicode*  pQuotedStr      = rQuotedPairs.getStr();

    sal_Unicode         cQuotedEndChar  = 0;

    sal_Int32      nQuotedLen      = rQuotedPairs.getLength();

    sal_Int32      nLen            = rIn.getLength();

    sal_Int32      nTok            = 0;

    sal_Int32      nFirstChar      = rIndex;

    sal_Int32      i               = nFirstChar;


    // detect token position and length

    pStr += i;

    while ( i < nLen )

    {

        sal_Unicode c = *pStr;

        if ( cQuotedEndChar )

        {

            // end of the quote reached ?

            if ( c == cQuotedEndChar )

                cQuotedEndChar = 0;

        }

        else

        {

            // Is the char a quote-begin char ?

            sal_Int32 nQuoteIndex = 0;

            while ( nQuoteIndex < nQuotedLen )

            {

                if ( pQuotedStr[nQuoteIndex] == c )

                {

                    cQuotedEndChar = pQuotedStr[nQuoteIndex+1];

                    break;

                }

                else

                    nQuoteIndex += 2;

            }


            // If the token-char matches then increase TokCount

            if ( c == cTok )

            {

                ++nTok;


                if ( nTok == nToken )

                    nFirstChar = i+1;

                else

                {

                    if ( nTok > nToken )

                        break;

                }

            }

        }


        ++pStr;

        ++i;

    }


    if ( nTok >= nToken )

    {

        if ( i < nLen )

            rIndex = i+1;

        else

            rIndex = -1;

        return rIn.copy( nFirstChar, i-nFirstChar );

    }

    else

    {

        rIndex = -1;

        return OUString();

    }

}


bool ScStringUtil::isMultiline( std::u16string_view rStr )

{

    return rStr.find_first_of(u"\n\r") != std::u16string_view::npos;

}


ScInputStringType ScStringUtil::parseInputString(

    SvNumberFormatter& rFormatter, const OUString& rStr, LanguageType eLang )

{

    ScInputStringType aRet;

    aRet.mnFormatType = SvNumFormatType::ALL;

    aRet.meType = ScInputStringType::Unknown;

    aRet.maText = rStr;

    aRet.mfValue = 0.0;


    if (rStr.getLength() > 1 && rStr[0] == '=')

    {

        aRet.meType = ScInputStringType::Formula;

    }

    else if (rStr.getLength() > 1 && rStr[0] == '\'')

    {

        //  for bEnglish, "'" at the beginning is always interpreted as text

        //  marker and stripped

        aRet.maText = rStr.copy(1);

        aRet.meType = ScInputStringType::Text;

    }

    else        // test for English number format (only)

    {

        sal_uInt32 nNumFormat = rFormatter.GetStandardIndex(eLang);


        if (rFormatter.IsNumberFormat(rStr, nNumFormat, aRet.mfValue))

        {

            aRet.meType = ScInputStringType::Number;

            aRet.mnFormatType = rFormatter.GetType(nNumFormat);

        }

        else if (!rStr.isEmpty())

            aRet.meType = ScInputStringType::Text;


        // the (English) number format is not set

        //TODO: find and replace with matching local format???

    }


    return aRet;

}


/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

ScStringUtil::GetQuotedToken
static OUString SC_DLLPUBLIC GetQuotedToken(const OUString &rIn, sal_Int32 nToken, const OUString &rQuotedPairs, sal_Unicode cTok, sal_Int32 &rIndex)
Definition: stringutil.cxx:350

ScStringUtil::parseInputString
static ScInputStringType parseInputString(SvNumberFormatter &rFormatter, const OUString &rStr, LanguageType eLang)
Definition: stringutil.cxx:430

ScStringUtil::isMultiline
static bool SC_DLLPUBLIC isMultiline(std::u16string_view rStr)
Definition: stringutil.cxx:425

ScStringUtil::parseSimpleNumber
static bool parseSimpleNumber(const OUString &rStr, sal_Unicode dsep, sal_Unicode gsep, sal_Unicode dsepa, double &rVal, bool bDetectScientificNumber=true)
Check if a given string is a simple decimal number (e.g.
Definition: stringutil.cxx:55

SvNumberFormatter::GetStandardIndex
sal_uInt32 GetStandardIndex(LanguageType eLnge=LANGUAGE_DONTKNOW)

SvNumberFormatter::GetType
SvNumFormatType GetType(sal_uInt32 nFIndex) const

SvNumberFormatter::IsNumberFormat
bool IsNumberFormat(const OUString &sString, sal_uInt32 &F_Index, double &fOutNumber, SvNumInputOptions eInputOptions=SvNumInputOptions::NONE)

u
float u

p
void * p

n
sal_Int64 n

aBuf
aBuf

i
int i

sc
CAUTION! The following defines must be in the same namespace as the respective type.
Definition: broadcast.cxx:15

sc::SingleCellListening
@ SingleCellListening
Definition: types.hxx:126

numformat.hxx

nToken
DefTokenId nToken
Definition: qproform.cxx:397

stringutil.hxx

ScInputStringType
Definition: stringutil.hxx:121

ScInputStringType::mnFormatType
SvNumFormatType mnFormatType
Definition: stringutil.hxx:128

ScInputStringType::meType
StringType meType
Definition: stringutil.hxx:124

ScInputStringType::Text
@ Text
Definition: stringutil.hxx:122

ScInputStringType::Number
@ Number
Definition: stringutil.hxx:122

ScInputStringType::Unknown
@ Unknown
Definition: stringutil.hxx:122

ScInputStringType::Formula
@ Formula
Definition: stringutil.hxx:122

ScInputStringType::mfValue
double mfValue
Definition: stringutil.hxx:127

ScInputStringType::maText
OUString maText
Definition: stringutil.hxx:126

ScSetStringParam::mbHandleApostrophe
bool mbHandleApostrophe
When true, treat input with a leading apostrophe as an escape character for all content,...
Definition: stringutil.hxx:94

ScSetStringParam::setTextInput
void setTextInput()
Call this whenever you need to unconditionally set input as text, no matter what the input is.
Definition: stringutil.cxx:39

ScSetStringParam::mbDetectScientificNumberFormat
bool mbDetectScientificNumberFormat
Definition: stringutil.hxx:80

ScSetStringParam::setNumericInput
void setNumericInput()
Call this whenever you need to maximize the chance of input being detected as a numeric value (number...
Definition: stringutil.cxx:47

ScSetStringParam::mbDetectNumberFormat
bool mbDetectNumberFormat
Specify which number formats are detected: mbDetectNumberFormat=true && mbDetectScientificNumberForma...
Definition: stringutil.hxx:79

ScSetStringParam::ScSetStringParam
ScSetStringParam()
Definition: stringutil.cxx:28

ScSetStringParam::Always
@ Always
Set Text number format if the input string can be parsed as a number or formula text.
Definition: stringutil.hxx:45

ScSetStringParam::Never
@ Never
Never set Text number format.
Definition: stringutil.hxx:62

ScSetStringParam::meSetTextNumFormat
TextFormatPolicy meSetTextNumFormat
Determine when to set the 'Text' number format to the cell where the input string is being set.
Definition: stringutil.hxx:86

SvNumberFormatter

o3tl::strong_int< sal_uInt16, struct LanguageTypeTag >

sal_Unicode
sal_uInt16 sal_Unicode

zforlist.hxx