LibreOffice Module io (master) 1
TextInputStream.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <string.h>
21
25
26#include <rtl/textenc.h>
27#include <rtl/tencinfo.h>
28
29#include <com/sun/star/io/BufferSizeExceededException.hpp>
30#include <com/sun/star/io/IOException.hpp>
31#include <com/sun/star/io/NotConnectedException.hpp>
32#include <com/sun/star/io/XTextInputStream2.hpp>
33#include <com/sun/star/lang/XServiceInfo.hpp>
34
35#include <vector>
36
37namespace com::sun::star::uno { class XComponentContext; }
38
39using namespace ::osl;
40using namespace ::cppu;
41using namespace ::com::sun::star::uno;
42using namespace ::com::sun::star::lang;
43using namespace ::com::sun::star::io;
44
45
46// Implementation XTextInputStream
47
48#define INITIAL_UNICODE_BUFFER_CAPACITY 0x100
49#define READ_BYTE_COUNT 0x100
50
51namespace {
52
53class OTextInputStream : public WeakImplHelper< XTextInputStream2, XServiceInfo >
54{
55 Reference< XInputStream > mxStream;
56
57 // Encoding
58 bool mbEncodingInitialized;
59 rtl_TextToUnicodeConverter mConvText2Unicode;
60 rtl_TextToUnicodeContext mContextText2Unicode;
61 Sequence<sal_Int8> mSeqSource;
62
63 // Internal buffer for characters that are already converted successfully
64 std::vector<sal_Unicode> mvBuffer;
65 sal_Int32 mnCharsInBuffer;
66 bool mbReachedEOF;
67
70 OUString implReadString( const Sequence< sal_Unicode >& Delimiters,
71 bool bRemoveDelimiter, bool bFindLineEnd );
74 sal_Int32 implReadNext();
76 void checkNull();
77
78public:
79 OTextInputStream();
80 virtual ~OTextInputStream() override;
81
82 // Methods XTextInputStream
83 virtual OUString SAL_CALL readLine( ) override;
84 virtual OUString SAL_CALL readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter ) override;
85 virtual sal_Bool SAL_CALL isEOF( ) override;
86 virtual void SAL_CALL setEncoding( const OUString& Encoding ) override;
87
88 // Methods XInputStream
89 virtual sal_Int32 SAL_CALL readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead ) override;
90 virtual sal_Int32 SAL_CALL readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead ) override;
91 virtual void SAL_CALL skipBytes( sal_Int32 nBytesToSkip ) override;
92 virtual sal_Int32 SAL_CALL available( ) override;
93 virtual void SAL_CALL closeInput( ) override;
94
95 // Methods XActiveDataSink
96 virtual void SAL_CALL setInputStream( const Reference< XInputStream >& aStream ) override;
97 virtual Reference< XInputStream > SAL_CALL getInputStream() override;
98
99 // Methods XServiceInfo
100 virtual OUString SAL_CALL getImplementationName() override;
101 virtual Sequence< OUString > SAL_CALL getSupportedServiceNames() override;
102 virtual sal_Bool SAL_CALL supportsService(const OUString& ServiceName) override;
103};
104
105}
106
107OTextInputStream::OTextInputStream()
108 : mbEncodingInitialized(false)
109 , mConvText2Unicode(nullptr)
110 , mContextText2Unicode(nullptr)
111 , mSeqSource(READ_BYTE_COUNT)
113 , mnCharsInBuffer(0)
114 , mbReachedEOF(false)
115{
116}
117
118OTextInputStream::~OTextInputStream()
119{
120 if( mbEncodingInitialized )
121 {
122 rtl_destroyTextToUnicodeContext( mConvText2Unicode, mContextText2Unicode );
123 rtl_destroyTextToUnicodeConverter( mConvText2Unicode );
124 }
125}
126
127// Check uninitialized object
128
129void OTextInputStream::checkNull()
130{
131 if (mxStream==nullptr){
132 throw RuntimeException("Uninitialized object");
133 }
134}
135
136// XTextInputStream
137
138OUString OTextInputStream::readLine( )
139{
140 checkNull();
141 static Sequence< sal_Unicode > aDummySeq;
142 return implReadString( aDummySeq, true, true );
143}
144
145OUString OTextInputStream::readString( const Sequence< sal_Unicode >& Delimiters, sal_Bool bRemoveDelimiter )
146{
147 checkNull();
148 return implReadString( Delimiters, bRemoveDelimiter, false );
149}
150
151sal_Bool OTextInputStream::isEOF()
152{
153 checkNull();
154 bool bRet = false;
155 if( mnCharsInBuffer == 0 && mbReachedEOF )
156 bRet = true;
157 return bRet;
158}
159
160
161OUString OTextInputStream::implReadString( const Sequence< sal_Unicode >& Delimiters,
162 bool bRemoveDelimiter, bool bFindLineEnd )
163{
164 OUString aRetStr;
165 if( !mbEncodingInitialized )
166 {
167 setEncoding( "utf8" );
168 }
169 if( !mbEncodingInitialized )
170 return aRetStr;
171
172 // Only for bFindLineEnd
173 sal_Unicode cLineEndChar1 = 0x0D;
174 sal_Unicode cLineEndChar2 = 0x0A;
175
176 sal_Int32 nBufferReadPos = 0;
177 sal_Int32 nCopyLen = 0;
178 bool bFound = false;
179 bool bFoundFirstLineEndChar = false;
180 sal_Unicode cFirstLineEndChar = 0;
181 while( !bFound )
182 {
183 // Still characters available?
184 if( nBufferReadPos == mnCharsInBuffer )
185 {
186 // Already reached EOF? Then we can't read any more
187 if( mbReachedEOF )
188 break;
189
190 // No, so read new characters
191 if( !implReadNext() )
192 break;
193 }
194
195 // Now there should be characters available
196 // (otherwise the loop should have been broken before)
197 sal_Unicode c = mvBuffer[ nBufferReadPos++ ];
198
199 if( bFindLineEnd )
200 {
201 if( bFoundFirstLineEndChar )
202 {
203 bFound = true;
204 nCopyLen = nBufferReadPos - 2;
205 if( c == cLineEndChar1 || c == cLineEndChar2 )
206 {
207 // Same line end char -> new line break
208 if( c == cFirstLineEndChar )
209 {
210 nBufferReadPos--;
211 }
212 }
213 else
214 {
215 // No second line end char
216 nBufferReadPos--;
217 }
218 }
219 else if( c == cLineEndChar1 || c == cLineEndChar2 )
220 {
221 bFoundFirstLineEndChar = true;
222 cFirstLineEndChar = c;
223 }
224 }
225 else if( comphelper::findValue(Delimiters, c) != -1 )
226 {
227 bFound = true;
228 nCopyLen = nBufferReadPos;
229 if( bRemoveDelimiter )
230 nCopyLen--;
231 }
232 }
233
234 // Nothing found? Return all
235 if( !nCopyLen && !bFound && mbReachedEOF )
236 nCopyLen = nBufferReadPos;
237
238 // Create string
239 if( nCopyLen )
240 aRetStr = OUString( mvBuffer.data(), nCopyLen );
241
242 // Copy rest of buffer
243 memmove( mvBuffer.data(), mvBuffer.data() + nBufferReadPos,
244 (mnCharsInBuffer - nBufferReadPos) * sizeof( sal_Unicode ) );
245 mnCharsInBuffer -= nBufferReadPos;
246
247 return aRetStr;
248}
249
250
251sal_Int32 OTextInputStream::implReadNext()
252{
253 sal_Int32 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
254 if( nFreeBufferSize < READ_BYTE_COUNT )
255 mvBuffer.resize(mvBuffer.size() * 2);
256 nFreeBufferSize = mvBuffer.size() - mnCharsInBuffer;
257
258 try
259 {
260 sal_Int32 nRead = mxStream->readSomeBytes( mSeqSource, READ_BYTE_COUNT );
261 sal_Int32 nTotalRead = nRead;
262 if( nRead == 0 )
263 mbReachedEOF = true;
264
265 // Try to convert
266 sal_uInt32 uiInfo;
267 sal_Size nSrcCvtBytes = 0;
268 sal_Size nTargetCount = 0;
269 sal_Size nSourceCount = 0;
270 while( true )
271 {
272 const sal_Int8 *pbSource = mSeqSource.getConstArray();
273
274 // All invalid characters are transformed to the unicode undefined char
275 nTargetCount += rtl_convertTextToUnicode(
276 mConvText2Unicode,
277 mContextText2Unicode,
278 reinterpret_cast<const char*>(&( pbSource[nSourceCount] )),
279 nTotalRead - nSourceCount,
280 mvBuffer.data() + mnCharsInBuffer + nTargetCount,
281 nFreeBufferSize - nTargetCount,
282 RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
283 RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
284 RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT,
285 &uiInfo,
286 &nSrcCvtBytes );
287 nSourceCount += nSrcCvtBytes;
288
289 bool bCont = false;
290 if( uiInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL )
291 {
292 mvBuffer.resize(mvBuffer.size() * 2);
293 bCont = true;
294 }
295
296 if( uiInfo & RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL )
297 {
298 // read next byte
299 static Sequence< sal_Int8 > aOneByteSeq( 1 );
300 nRead = mxStream->readSomeBytes( aOneByteSeq, 1 );
301 if( nRead == 0 )
302 {
303 mbReachedEOF = true;
304 break;
305 }
306
307 sal_Int32 nOldLen = mSeqSource.getLength();
308 nTotalRead++;
309 if( nTotalRead > nOldLen )
310 {
311 mSeqSource.realloc( nTotalRead );
312 }
313 mSeqSource.getArray()[ nOldLen ] = aOneByteSeq.getConstArray()[ 0 ];
314 bCont = true;
315 }
316
317 if( bCont )
318 continue;
319 break;
320 }
321
322 mnCharsInBuffer += nTargetCount;
323 return nTargetCount;
324 }
325 catch( NotConnectedException& )
326 {
327 throw IOException("Not connected");
328 //throw IOException( L"OTextInputStream::implReadString failed" );
329 }
330 catch( BufferSizeExceededException& )
331 {
332 throw IOException("Buffer size exceeded");
333 }
334}
335
336void OTextInputStream::setEncoding( const OUString& Encoding )
337{
338 OString aOEncodingStr = OUStringToOString( Encoding, RTL_TEXTENCODING_ASCII_US );
339 rtl_TextEncoding encoding = rtl_getTextEncodingFromMimeCharset( aOEncodingStr.getStr() );
340 if( RTL_TEXTENCODING_DONTKNOW == encoding )
341 return;
342
343 mbEncodingInitialized = true;
344 mConvText2Unicode = rtl_createTextToUnicodeConverter( encoding );
345 mContextText2Unicode = rtl_createTextToUnicodeContext( mConvText2Unicode );
346}
347
348
349// XInputStream
350
351sal_Int32 OTextInputStream::readBytes( Sequence< sal_Int8 >& aData, sal_Int32 nBytesToRead )
352{
353 checkNull();
354 return mxStream->readBytes( aData, nBytesToRead );
355}
356
357sal_Int32 OTextInputStream::readSomeBytes( Sequence< sal_Int8 >& aData, sal_Int32 nMaxBytesToRead )
358{
359 checkNull();
360 return mxStream->readSomeBytes( aData, nMaxBytesToRead );
361}
362
363void OTextInputStream::skipBytes( sal_Int32 nBytesToSkip )
364{
365 checkNull();
366 mxStream->skipBytes( nBytesToSkip );
367}
368
369sal_Int32 OTextInputStream::available( )
370{
371 checkNull();
372 return mxStream->available();
373}
374
375void OTextInputStream::closeInput( )
376{
377 checkNull();
378 mxStream->closeInput();
379}
380
381
382// XActiveDataSink
383
384void OTextInputStream::setInputStream( const Reference< XInputStream >& aStream )
385{
386 mxStream = aStream;
387}
388
389Reference< XInputStream > OTextInputStream::getInputStream()
390{
391 return mxStream;
392}
393
394OUString OTextInputStream::getImplementationName()
395{
396 return "com.sun.star.comp.io.TextInputStream";
397}
398
399sal_Bool OTextInputStream::supportsService(const OUString& ServiceName)
400{
401 return cppu::supportsService(this, ServiceName);
402}
403
404Sequence< OUString > OTextInputStream::getSupportedServiceNames()
405{
406 return { "com.sun.star.io.TextInputStream" };
407}
408
409extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
411 css::uno::XComponentContext* , css::uno::Sequence<css::uno::Any> const&)
412{
413 return cppu::acquire(new OTextInputStream());
414}
415
416
417/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * io_OTextInputStream_get_implementation(css::uno::XComponentContext *, css::uno::Sequence< css::uno::Any > const &)
#define INITIAL_UNICODE_BUFFER_CAPACITY
#define READ_BYTE_COUNT
constexpr OUStringLiteral aData
sal_Int32 findValue(const css::uno::Sequence< T1 > &_rList, const T2 &_rValue)
css::uno::Sequence< OUString > getSupportedServiceNames()
OUString getImplementationName()
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
DESKTOP_DEPLOYMENTMISC_DLLPUBLIC bool readLine(OUString *res, std::u16string_view startingWith, ::ucbhelper::Content &ucb_content, rtl_TextEncoding textenc)
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
sal_uInt32 readString(const sal_uInt8 *buffer, sal_Unicode *v, sal_uInt32 maxSize)
unsigned char sal_Bool
sal_uInt16 sal_Unicode
signed char sal_Int8