24 #include <rtl/tencinfo.h>
42 #include <strings.hrc>
48 #include <osl/diagnose.h>
50 #define ASC_BUFFLEN 4096
57 std::unique_ptr<SwPaM> pPam;
59 std::unique_ptr<sal_Char[]> pArr;
61 std::unique_ptr<SfxItemSet> pItemSet;
69 SwASCIIParser(
const SwASCIIParser&) =
delete;
70 SwASCIIParser& operator=(
const SwASCIIParser&) =
delete;
86 OSL_ENSURE(
false,
"ASCII read without a stream" );
90 std::unique_ptr<SwASCIIParser> pParser(
new SwASCIIParser( &rDoc, rPam, *
m_pStream,
92 ErrCode nRet = pParser->CallParser();
103 , bNewDoc(bReadNewDoc)
108 pItemSet = std::make_unique<SfxItemSet>( pDoc->GetAttrPool(),
114 if( rOpt.GetLanguage() )
117 pItemSet->Put( aLang );
118 aLang.SetWhich(RES_CHRATR_CJK_LANGUAGE);
119 pItemSet->Put( aLang );
121 pItemSet->Put( aLang );
123 if( !rOpt.GetFontName().isEmpty() )
126 if( pDoc->getIDocumentDeviceAccess().getPrinter(
false ) )
127 aTextFont = pDoc->getIDocumentDeviceAccess().getPrinter(
false )->GetFontMetric( aTextFont );
128 SvxFontItem aFont( aTextFont.GetFamilyType(), aTextFont.GetFamilyName(),
129 OUString(), aTextFont.GetPitch(), aTextFont.GetCharSet(),
RES_CHRATR_FONT );
130 pItemSet->Put( aFont );
131 aFont.SetWhich(RES_CHRATR_CJK_FONT);
132 pItemSet->Put( aFont );
133 aFont.SetWhich(RES_CHRATR_CTL_FONT);
134 pItemSet->Put( aFont );
139 ErrCode SwASCIIParser::CallParser()
142 nFileSize = rInput.TellEnd();
143 rInput.Seek(STREAM_SEEK_TO_BEGIN);
146 ::StartProgress( STR_STATSTR_W4WREAD, 0, nFileSize, pDoc->GetDocShell() );
148 std::unique_ptr<SwPaM> pInsPam;
149 sal_Int32 nSttContent = 0;
153 pInsPam.reset(
new SwPaM( rTmp, rTmp, 0, -1 ));
154 nSttContent = pPam->GetPoint()->nContent.
GetIndex();
165 pDoc->SetTextFormatColl(*pPam, pColl);
173 if( !( SvtScriptType::LATIN & nScript ))
175 pItemSet->ClearItem( RES_CHRATR_FONT );
176 pItemSet->ClearItem( RES_CHRATR_LANGUAGE );
178 if( !( SvtScriptType::ASIAN & nScript ))
180 pItemSet->ClearItem( RES_CHRATR_CJK_FONT );
181 pItemSet->ClearItem( RES_CHRATR_CJK_LANGUAGE );
183 if( !( SvtScriptType::COMPLEX & nScript ))
185 pItemSet->ClearItem( RES_CHRATR_CTL_FONT );
188 if( pItemSet->Count() )
205 sal_uInt16 aWhichIds[4] =
210 sal_uInt16 *pWhichIds = aWhichIds;
214 if (SfxItemState::SET == pItemSet->GetItemState(*pWhichIds,
218 pItemSet->ClearItem( *pWhichIds );
223 if (pItemSet->Count())
224 pDoc->SetDefault(*pItemSet);
229 *pInsPam->GetMark() = *pPam->GetPoint();
230 ++pInsPam->GetPoint()->nNode;
231 pInsPam->GetPoint()->nContent.Assign(
232 pInsPam->GetContentNode(), nSttContent );
235 OSL_ENSURE(
false,
"Have to change - hard attr. to para. style" );
236 pDoc->getIDocumentContentOperations().InsertItemSet( *pInsPam, *pItemSet );
248 ErrCode SwASCIIParser::ReadChars()
250 sal_Unicode *pStt =
nullptr, *pEnd =
nullptr, *pLastStt =
nullptr;
251 long nReadCnt = 0, nLineLen = 0;
253 bool bSwapUnicode =
false;
257 if (nFileSize >= 2 &&
264 nOrig = nLen = rInput.ReadBytes(pArr.get(),
ASC_BUFFLEN);
265 rtl_TextEncoding eCharSet;
268 OSL_ENSURE(bRet,
"Autodetect of text import without nag dialog must have failed");
269 if (bRet && eCharSet != RTL_TEXTENCODING_DONTKNOW)
273 rInput.SeekRel(-(
long(nLen)));
276 rInput.SeekRel(-(
long(nOrig)));
280 rtl_TextToUnicodeConverter hConverter=
nullptr;
281 rtl_TextToUnicodeContext hContext=
nullptr;
282 rtl_TextEncoding currentCharSet = pUseMe->
GetCharSet();
283 if (RTL_TEXTENCODING_UCS2 != currentCharSet)
285 if( currentCharSet == RTL_TEXTENCODING_DONTKNOW )
286 currentCharSet = RTL_TEXTENCODING_ASCII_US;
287 hConverter = rtl_createTextToUnicodeConverter( currentCharSet );
288 OSL_ENSURE( hConverter,
"no string convert available" );
290 return ErrCode(ErrCodeArea::Sw, ErrCodeClass::Read, 0);
291 bSwapUnicode =
false;
292 hContext = rtl_createTextToUnicodeContext( hConverter );
294 else if (pUseMe != &aEmpty)
296 rInput.StartReadingUnicodeText( currentCharSet );
297 bSwapUnicode = rInput.IsEndianSwap();
300 std::unique_ptr<sal_Unicode[]> aWork;
306 if( pLastStt != pStt )
311 if( ERRCODE_NONE != rInput.GetError() || 0 == (lGCount =
312 rInput.ReadBytes( pArr.get() + nArrOffset,
327 sal_Size nNewLen = lGCount, nCntBytes;
332 nNewLen = rtl_convertTextToUnicode( hConverter, hContext,
333 pArr.get(), lGCount, pBuf, nNewLen,
335 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
336 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
337 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT |
338 RTL_TEXTTOUNICODE_FLAGS_GLOBAL_SIGNATURE
342 if( 0 != ( nArrOffset = lGCount - nCntBytes ) )
343 memmove( pArr.get(), pArr.get() + nCntBytes, nArrOffset );
345 pStt = pLastStt = aWork.get();
346 pEnd = pStt + nNewLen;
350 pStt = pLastStt =
reinterpret_cast<sal_Unicode*
>(pArr.get());
351 pEnd =
reinterpret_cast<sal_Unicode*
>(pArr.get() + lGCount);
355 sal_Char* pF = pArr.get(), *pN = pArr.get() + 1;
356 for(
sal_uLong n = 0;
n < lGCount;
n += 2, pF += 2, pN += 2 )
372 if( 0x0a == *pStt && 0x0d == cLastCR )
377 if( !rInput.eof() || !(pEnd == pStt ||
378 ( !*pEnd && pEnd == pStt+1 ) ) )
379 pDoc->getIDocumentContentOperations().SplitNode( *pPam->GetPoint(), false );
383 bool bIns =
true, bSplitNode =
false;
394 if( !rInput.eof() || pEnd != pStt )
405 bool bChkSplit =
false;
410 else if( 0x0a == *pStt )
420 if( bChkSplit && ( !rInput.eof() || pEnd != pStt ))
433 pDoc->getIDocumentContentOperations().SplitNode( *pPam->GetPoint(), false );
434 pDoc->getIDocumentContentOperations().InsertPoolItem(
443 if( nReadCnt == nFileSize && pStt+1 == pEnd )
466 pDoc->getIDocumentContentOperations().SplitNode( *pPam->GetPoint(), false );
474 else if( bSplitNode )
479 pDoc->getIDocumentContentOperations().AppendTextNode( *pPam->GetPoint() );
481 pDoc->getIDocumentContentOperations().SplitNode( *pPam->GetPoint(), false );
489 rtl_destroyTextToUnicodeContext( hConverter, hContext );
490 rtl_destroyTextToUnicodeConverter( hConverter );
495 void SwASCIIParser::InsertText(
const OUString& rStr )
497 pDoc->getIDocumentContentOperations().InsertString( *pPam, rStr );
499 if( pItemSet &&
g_pBreakIt && nScript != ( SvtScriptType::LATIN |
500 SvtScriptType::ASIAN |
501 SvtScriptType::COMPLEX ) )
rtl_TextEncoding GetCharSet() const
Represents the style of a paragraph.
#define RES_CHRATR_CJK_LANGUAGE
#define RES_CHRATR_LANGUAGE
const OUString & GetFontName() const
LanguageType GetLanguage() const
SvtScriptType GetAllScriptsOfText(const OUString &rText) const
void EndProgress(SwDocShell const *pDocShell)
#define RES_CHRATR_CJK_FONT
void SetParaFlags(LineEnd eVal)
virtual bool SetFormatAttr(const SfxPoolItem &rAttr) override
Override to recognize changes on the and register/unregister the paragragh style at t...
void StartProgress(const char *pMessResId, long nStartValue, long nEndValue, SwDocShell *pDocShell)
static bool IsDetectableText(const sal_Char *pBuf, sal_uLong &rLen, rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd)
sal_uLong GetIndex() const
const SwAsciiOptions & GetASCIIOpts() const
void SetProgressState(long nPosition, SwDocShell const *pDocShell)
void SetCharSet(rtl_TextEncoding nVal)
PaM is Point and Mark: a selection of the document model.
#define RES_CHRATR_CTL_FONT
const SwPosition * GetPoint() const
virtual ErrCode Read(SwDoc &, const OUString &rBaseURL, SwPaM &, const OUString &) override
Marks a node in the document model.
SwgReaderOption m_aOption
#define RES_CHRATR_CTL_LANGUAGE
#define ERR_SWG_READ_ERROR
LineEnd GetParaFlags() const