20#include <com/sun/star/io/XInputStream.hpp>
39bool IsHTMLStream(
const uno::Reference<io::XInputStream>& xInStream )
42 if ( !pInStream || pInStream->GetError() )
47 pInStream->StartReadingUnicodeText( RTL_TEXTENCODING_DONTKNOW );
48 const sal_uInt64 nUniPos = pInStream->Tell();
49 const sal_uInt16 nSize = 4096;
52 if ( nUniPos == 3 || nUniPos == 0 )
58 enum DetectPhase { BeforeTag, TagOpened, InTagName };
59 DetectPhase dp = BeforeTag;
66 DeclarationPhase eDeclaration = BeforeDeclaration;
68 const char* pHeader = sHeader.getStr();
69 const int nLength = sHeader.getLength();
70 int i = 0, nStartOfTagIndex = 0;
75 if ((c ==
' ' || c ==
'\n' || c ==
'\t' || c ==
'\r' || c ==
'\f')
76 && eDeclaration == BeforeDeclaration)
78 if ( dp == TagOpened )
80 else if ( dp == InTagName )
85 if ( dp == BeforeTag )
92 if ( dp == InTagName )
94 else if (eDeclaration == DeclarationOpened)
97 eDeclaration = BeforeDeclaration;
104 if ( dp == TagOpened )
111 if ( dp == BeforeTag )
113 else if ( dp == TagOpened )
115 if (c ==
'?' && eDeclaration == BeforeDeclaration)
116 eDeclaration = DeclarationOpened;
117 else if (eDeclaration == BeforeDeclaration)
119 nStartOfTagIndex =
i;
127 OString aToken = sHeader.copy( nStartOfTagIndex,
i - nStartOfTagIndex );
128 return GetHTMLToken( OStringToOUString( aToken.toAsciiLowerCase(), RTL_TEXTENCODING_ASCII_US ) ) != HtmlTokenId::NONE;
140 OUString aType = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_TYPENAME, OUString() );
141 OUString aDocService = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_DOCUMENTSERVICE, OUString() );
143 if ((aType ==
"generic_HTML") || (aType ==
"calc_HTML"))
145 uno::Reference<io::XInputStream> xInStream(aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM], uno::UNO_QUERY);
146 if (!xInStream.is() || !IsHTMLStream(xInStream))
150 aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= OUString(
CALC_HTML_FILTER);
154 aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= OUString(
WEB_HTML_FILTER);
157 else if (aType ==
"generic_Text")
159 uno::Reference<io::XStream>
xStream(aMediaDesc[MediaDescriptor::PROP_STREAM], uno::UNO_QUERY);
160 uno::Reference<io::XInputStream> xInStream(aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM], uno::UNO_QUERY);
161 if (
xStream.is() || xInStream.is())
164 std::unique_ptr<SvStream> pInStream;
169 std::unique_ptr<SvMemoryStream> pDecompressedStream(
new SvMemoryStream());
172 uno::Reference<io::XStream> xStreamDecompressed(
new utl::OStreamWrapper(std::move(pDecompressedStream)));
173 aMediaDesc[MediaDescriptor::PROP_STREAM] <<= xStreamDecompressed;
174 aMediaDesc[MediaDescriptor::PROP_INPUTSTREAM] <<= xStreamDecompressed->getInputStream();
175 OUString
aURL = aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_URL, OUString() );
176 sal_Int32 nIdx =
aURL.lastIndexOf(
".gz");
178 aMediaDesc[MediaDescriptor::PROP_URL] <<=
aURL.copy(0, nIdx);
182 INetURLObject aParser(aMediaDesc.getUnpackedValueOrDefault(MediaDescriptor::PROP_URL, OUString() ) );
184 aExt = aExt.toAsciiLowerCase();
191 aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= OUString(
CALC_TEXT_FILTER);
194 else if (aExt ==
"csv" || aExt ==
"tsv" || aExt ==
"tab" || aExt ==
"xls" ||
aName.endsWith(
".csv.gz"))
195 aMediaDesc[MediaDescriptor::PROP_FILTERNAME] <<= OUString(
CALC_TEXT_FILTER);
204 aMediaDesc >> lDescriptor;
216 return "com.sun.star.comp.filters.PlainTextFilterDetect";
221 return {
"com.sun.star.document.ExtendedTypeDetection",
"com.sun.star.comp.filters.PlainTextFilterDetect" };
240extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
242 css::uno::Sequence<css::uno::Any>
const &)
OUString getName(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
OUString getExtension(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
virtual ~PlainTextFilterDetect() override
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
virtual OUString SAL_CALL getImplementationName() override
virtual OUString SAL_CALL detect(css::uno::Sequence< css::beans::PropertyValue > &lDescriptor) override
virtual void SAL_CALL initialize(const css::uno::Sequence< css::uno::Any > &aArguments) override
bool AttemptDecompression(SvStream &rIStm, SvStream &rOStm)
static std::unique_ptr< SvStream > CreateStream(const OUString &rFileName, StreamMode eOpenMode, css::uno::Reference< css::awt::XWindow > xParentWin=nullptr)
SVT_DLLPUBLIC HtmlTokenId GetHTMLToken(std::u16string_view rName)
Shape IDs per cluster in DGG atom.
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
OString OUStringToOString(std::u16string_view str, ConnectionSettings const *settings)
TOOLS_DLLPUBLIC OString read_uInt8s_ToOString(SvStream &rStrm, std::size_t nUnits)
TOOLS_DLLPUBLIC OUString read_uInt16s_ToOUString(SvStream &rStrm, std::size_t nUnits)
constexpr OUStringLiteral CALC_TEXT_FILTER
uno::Sequence< OUString > PlainTextFilterDetect_getSupportedServiceNames()
constexpr OUStringLiteral WEB_HTML_FILTER
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * com_sun_star_comp_filters_PlainTextFilterDetect_get_implementation(css::uno::XComponentContext *, css::uno::Sequence< css::uno::Any > const &)
constexpr OUStringLiteral WRITER_DOCSERVICE
OUString PlainTextFilterDetect_getImplementationName()
constexpr OUStringLiteral WRITER_TEXT_FILTER
constexpr OUStringLiteral CALC_DOCSERVICE
constexpr OUStringLiteral WRITER_HTML_FILTER
constexpr OUStringLiteral CALC_HTML_FILTER