LibreOffice Module sc (master)  1
Typedefs | Enumerations | Functions | Variables
impex.cxx File Reference
#include <comphelper/processfactory.hxx>
#include <i18nlangtag/languagetag.hxx>
#include <sot/formats.hxx>
#include <sfx2/mieclip.hxx>
#include <com/sun/star/i18n/CalendarFieldIndex.hpp>
#include <sal/log.hxx>
#include <unotools/charclass.hxx>
#include <osl/module.hxx>
#include <global.hxx>
#include <docsh.hxx>
#include <undoblk.hxx>
#include <rangenam.hxx>
#include <tabvwsh.hxx>
#include <filter.hxx>
#include <asciiopt.hxx>
#include <formulacell.hxx>
#include <cellform.hxx>
#include <progress.hxx>
#include <scitems.hxx>
#include <editable.hxx>
#include <compiler.hxx>
#include <warnbox.hxx>
#include <clipparam.hxx>
#include <impex.hxx>
#include <editutil.hxx>
#include <patattr.hxx>
#include <docpool.hxx>
#include <stringutil.hxx>
#include <cellvalue.hxx>
#include <tokenarray.hxx>
#include <documentimport.hxx>
#include <refundo.hxx>
#include <mtvelements.hxx>
#include <globstr.hrc>
#include <scresid.hxx>
#include <o3tl/safeint.hxx>
#include <tools/svlibrary.h>
#include <unotools/configmgr.hxx>
#include <vcl/svapp.hxx>
#include <vcl/weld.hxx>
#include <editeng/editobj.hxx>
#include <memory>
#include <osl/endian.h>
Include dependency graph for impex.cxx:

Go to the source code of this file.

Typedefs

typedef ScFormatFilterPlugin *(* FilterFn) ()
 

Enumerations

enum  SylkVersion
 
enum  QuoteType
 
enum  DoubledQuoteMode
 

Functions

static QuoteType lcl_isFieldEndQuote (const sal_Unicode *p, const sal_Unicode *pSeps, sal_Unicode &rcDetectSep)
 Determine if *p is a quote that ends a quoted field. More...
 
static QuoteType lcl_isEscapedOrFieldEndQuote (sal_Int32 nQuotes, const sal_Unicode *p, const sal_Unicode *pSeps, sal_Unicode cStr, sal_Unicode &rcDetectSep)
 Determine if *p is a quote that is escaped by being doubled or ends a quoted field. More...
 
static bool lcl_appendLineData (OUString &rField, const sal_Unicode *p1, const sal_Unicode *p2)
 Append characters of [p1,p2) to rField. More...
 
static const sal_Unicodelcl_ScanString (const sal_Unicode *p, OUString &rString, const sal_Unicode *pSeps, sal_Unicode cStr, DoubledQuoteMode eMode, bool &rbOverflowCell)
 
static void lcl_UnescapeSylk (OUString &rString, SylkVersion eVersion)
 
static const sal_Unicodelcl_ScanSylkString (const sal_Unicode *p, OUString &rString, SylkVersion eVersion)
 
static const sal_Unicodelcl_ScanSylkFormula (const sal_Unicode *p, OUString &rString, SylkVersion eVersion)
 
static void lcl_DoubleEscapeChar (OUString &rString, sal_Unicode cStr)
 
static void lcl_WriteString (SvStream &rStrm, OUString &rString, sal_Unicode cQuote, sal_Unicode cEsc)
 
static void lcl_WriteSimpleString (SvStream &rStrm, const OUString &rString)
 
static bool lcl_PutString (ScDocumentImport &rDocImport, bool bUseDocImport, SCCOL nCol, SCROW nRow, SCTAB nTab, const OUString &rStr, sal_uInt8 nColFormat, SvNumberFormatter *pFormatter, bool bDetectNumFormat, bool bSkipEmptyCells, const ::utl::TransliterationWrapper &rTransliteration, CalendarWrapper &rCalendar, const ::utl::TransliterationWrapper *pSecondTransliteration, CalendarWrapper *pSecondCalendar)
 
static OUString lcl_GetFixed (const OUString &rLine, sal_Int32 nStart, sal_Int32 nNext, bool &rbIsQuoted, bool &rbOverflowCell)
 
static void thisModule ()
 
static const sal_Unicodelcl_UnicodeStrChr (const sal_Unicode *pStr, sal_Unicode c)
 
OUString ReadCsvLine (SvStream &rStream, bool bEmbeddedLineBreak, OUString &rFieldSeparators, sal_Unicode cFieldQuote, sal_Unicode &rcDetectSep)
 Read a CSV (comma separated values) data line using ReadUniOrByteStringLine(). More...
 

Variables

constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16
 
constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit
 

Typedef Documentation

typedef ScFormatFilterPlugin*(* FilterFn) ()

Definition at line 2358 of file impex.cxx.

Enumeration Type Documentation

enum DoubledQuoteMode
strong

Definition at line 651 of file impex.cxx.

enum QuoteType

Definition at line 553 of file impex.cxx.

enum SylkVersion
strong

Definition at line 91 of file impex.cxx.

Function Documentation

static bool lcl_appendLineData ( OUString &  rField,
const sal_Unicode p1,
const sal_Unicode p2 
)
static

Append characters of [p1,p2) to rField.

Returns
TRUE if ok; FALSE if data overflow, truncated

Definition at line 634 of file impex.cxx.

References nArbitraryCellLengthLimit, and SAL_WARN.

Referenced by lcl_ScanString(), ScImportExport::ScanNextFieldFromString(), and ScImportExport::Text2Doc().

static void lcl_DoubleEscapeChar ( OUString &  rString,
sal_Unicode  cStr 
)
static

Definition at line 834 of file impex.cxx.

References n.

Referenced by lcl_WriteString().

static OUString lcl_GetFixed ( const OUString &  rLine,
sal_Int32  nStart,
sal_Int32  nNext,
bool &  rbIsQuoted,
bool &  rbOverflowCell 
)
static

Definition at line 1259 of file impex.cxx.

References EMPTY_OUSTRING, nArbitraryCellLengthLimit, and SAL_WARN.

Referenced by ScImportExport::ExtText2Doc().

static QuoteType lcl_isEscapedOrFieldEndQuote ( sal_Int32  nQuotes,
const sal_Unicode p,
const sal_Unicode pSeps,
sal_Unicode  cStr,
sal_Unicode rcDetectSep 
)
static

Determine if *p is a quote that is escaped by being doubled or ends a quoted field.

Precondition: *p is a quote.

Parameters
nQuotesQuote characters encountered so far. Odd (after opening quote) means either no embedded quotes or only quote pairs so far. Even means either not in a quoted field or already one quote encountered, the first of a pair.
Returns
FIELDSTART_QUOTE if first quote in a field, either starting content or embedded so caller should check beforehand. FIRST_QUOTE if first of a doubled quote SECOND_QUOTE if second of a doubled quote FIELDEND_QUOTE if end of field quote DONTKNOW_QUOTE if an unescaped quote we don't consider as end of field, do not increment nQuotes in caller then!

Definition at line 612 of file impex.cxx.

References lcl_isFieldEndQuote(), and SAL_WARN.

Referenced by ReadCsvLine().

static QuoteType lcl_isFieldEndQuote ( const sal_Unicode p,
const sal_Unicode pSeps,
sal_Unicode rcDetectSep 
)
static

Determine if *p is a quote that ends a quoted field.

Precondition: we are parsing a quoted field already and *p is a quote.

Returns
FIELDEND_QUOTE if end of field quote DONTKNOW_QUOTE anything else

Definition at line 572 of file impex.cxx.

References ScGlobal::UnicodeStrChr().

Referenced by lcl_isEscapedOrFieldEndQuote(), and lcl_ScanString().

static bool lcl_PutString ( ScDocumentImport rDocImport,
bool  bUseDocImport,
SCCOL  nCol,
SCROW  nRow,
SCTAB  nTab,
const OUString &  rStr,
sal_uInt8  nColFormat,
SvNumberFormatter pFormatter,
bool  bDetectNumFormat,
bool  bSkipEmptyCells,
const ::utl::TransliterationWrapper rTransliteration,
CalendarWrapper rCalendar,
const ::utl::TransliterationWrapper pSecondTransliteration,
CalendarWrapper pSecondCalendar 
)
static

SetString with Extra-Flag ???

which language for date formats?

Definition at line 961 of file impex.cxx.

References ScDocument::ApplyPattern(), ATTR_VALUE_FORMAT(), EditEngine::CreateTextObject(), SvNumberFormatter::ExpandTwoDigitYear(), ScGlobal::getCharClassPtr(), ScDocumentImport::getDoc(), ScDocument::GetEditEngine(), CalendarWrapper::getEpochStart(), ScDocument::GetFormatTable(), ScDocument::GetLanguage(), CalendarWrapper::getLocalDateTime(), CalendarWrapper::getMonths(), SvNumberFormatter::GetNullDate(), CalendarWrapper::getNumberOfMonthsInYear(), ScDocument::GetPool(), SvNumberFormatter::GetStandardFormat(), SvNumberFormatter::GetStandardIndex(), i, CharClass::isDigit(), ScStringUtil::isMultiline(), SvNumberFormatter::IsNumberFormat(), CharClass::isNumeric(), CalendarWrapper::isValid(), LANGUAGE_ENGLISH_US, ScSetStringParam::mbCheckLinkFormula, ScSetStringParam::mbDetectNumberFormat, ScSetStringParam::mbHandleApostrophe, ScSetStringParam::meSetTextNumFormat, ScSetStringParam::mpNumFormatter, nIndex, nPos, nType, SC_COL_DMY, SC_COL_ENGLISH, SC_COL_MDY, SC_COL_SKIP, SC_COL_STANDARD, SC_COL_TEXT, SC_COL_YMD, ScDocumentImport::setAutoInput(), ScDocumentImport::setEditCell(), ScDocument::SetEditText(), ScDocument::SetNumberFormat(), ScDocumentImport::setNumericCell(), ScDocument::SetString(), ScDocumentImport::setStringCell(), ScDocument::SetTextCell(), ScEditEngineDefaulter::SetTextCurrentDefaults(), CalendarWrapper::setValue(), ScDocument::SetValue(), ScSetStringParam::SpecialNumberOnly, ScDocument::ValidCol(), and ScDocument::ValidRow().

Referenced by ScImportExport::ExtText2Doc().

static const sal_Unicode* lcl_ScanString ( const sal_Unicode p,
OUString &  rString,
const sal_Unicode pSeps,
sal_Unicode  cStr,
DoubledQuoteMode  eMode,
bool &  rbOverflowCell 
)
static

jump over opening quote

Definition at line 659 of file impex.cxx.

References lcl_appendLineData(), and lcl_isFieldEndQuote().

Referenced by ScImportExport::ScanNextFieldFromString(), and ScImportExport::Text2Doc().

static const sal_Unicode* lcl_ScanSylkFormula ( const sal_Unicode p,
OUString &  rString,
SylkVersion  eVersion 
)
static

Definition at line 770 of file impex.cxx.

References lcl_ScanSylkString(), and lcl_UnescapeSylk().

Referenced by ScImportExport::Sylk2Doc().

static const sal_Unicode* lcl_ScanSylkString ( const sal_Unicode p,
OUString &  rString,
SylkVersion  eVersion 
)
static

Definition at line 728 of file impex.cxx.

References lcl_UnescapeSylk().

Referenced by lcl_ScanSylkFormula(), and ScImportExport::Sylk2Doc().

static void lcl_UnescapeSylk ( OUString &  rString,
SylkVersion  eVersion 
)
static

Definition at line 715 of file impex.cxx.

Referenced by lcl_ScanSylkFormula(), and lcl_ScanSylkString().

static const sal_Unicode* lcl_UnicodeStrChr ( const sal_Unicode pStr,
sal_Unicode  c 
)
static

Definition at line 2389 of file impex.cxx.

Referenced by ReadCsvLine().

static void lcl_WriteSimpleString ( SvStream rStrm,
const OUString &  rString 
)
static
static void lcl_WriteString ( SvStream rStrm,
OUString &  rString,
sal_Unicode  cQuote,
sal_Unicode  cEsc 
)
static
OUString ReadCsvLine ( SvStream rStream,
bool  bEmbeddedLineBreak,
OUString &  rFieldSeparators,
sal_Unicode  cFieldQuote,
sal_Unicode rcDetectSep 
)

Read a CSV (comma separated values) data line using ReadUniOrByteStringLine().

Parameters
bEmbeddedLineBreakIf TRUE and a line-break occurs inside a field of data, a line feed LF '
' and the next line are appended. Repeats until a line-break is not in a field. A field is determined by delimiting rFieldSeparators and optionally surrounded by a pair of cFieldQuote characters. For a line-break to be within a field, the field content MUST be surrounded by cFieldQuote characters, and the opening cFieldQuote MUST be at the very start of a line or follow right behind a field separator with no extra characters in between, with the exception of blanks contradictory to RFC 4180. Anything, including field separators and escaped quotes (by doubling them) may appear in a quoted field.

If bEmbeddedLineBreak==FALSE, nothing is parsed and the string returned is simply one ReadUniOrByteStringLine().

Parameters
rFieldSeparatorsA list of characters that each may act as a field separator. If rcDetectSep was 0 and a separator is detected then it is appended to rFieldSeparators.
cFieldQuoteThe quote character used.
rcDetectSepIf 0 then attempt to detect a possible space (blank) separator if rFieldSeparators doesn't include it already. This can be necessary because of the "accept broken misquoted CSV fields" feature that tries to ignore trailing blanks after a quoted field and if no separator follows continues to add content to the field assuming the single double quote was in error. If this blank separator is detected it is added to rFieldSeparators and the line is reread with the new separators

check Stream::good() to detect IO problems during read

Note that the string returned may be truncated even inside a quoted field if some (arbitrary) maximum length was reached. There currently is no way to exactly determine the conditions, whether this was at a line end, or whether open quotes would have closed the field before the line end, as even a ReadUniOrByteStringLine() may return prematurely but the stream was positioned ahead until the real end of line. Additionally, due to character encoding conversions, string length and bytes read don't necessarily match, and resyncing to a previous position matching the string's length isn't always possible. As a result, a logical line with embedded line breaks and more than the maximum length characters will be spoiled, and a subsequent ReadCsvLine() may start under false preconditions.

Definition at line 2413 of file impex.cxx.

References aStr, SvStream::eof(), SvStream::GetStreamCharSet(), lcl_isEscapedOrFieldEndQuote(), lcl_UnicodeStrChr(), nArbitraryLineLengthLimit, SvStream::ReadUniOrByteStringLine(), SvStream::Seek(), and SvStream::Tell().

Referenced by ScImportExport::ExtText2Doc(), and ScImportAsciiDlg::GetLine().

static void thisModule ( )
static

Definition at line 2348 of file impex.cxx.

Referenced by ScFormatFilter::Get().

Variable Documentation

constexpr sal_Int32 nArbitraryCellLengthLimit = SAL_MAX_UINT16

Definition at line 72 of file impex.cxx.

Referenced by lcl_appendLineData(), and lcl_GetFixed().

constexpr sal_Int32 nArbitraryLineLengthLimit = 2 * MAXCOLCOUNT * nArbitraryCellLengthLimit

Definition at line 73 of file impex.cxx.

Referenced by ReadCsvLine(), and ScImportExport::Text2Doc().