LibreOffice Module filter (master) 1
typedetection.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include "typedetection.hxx"
21#include "constant.hxx"
22
23#include <com/sun/star/document/XExtendedFilterDetection.hpp>
24#include <com/sun/star/frame/Desktop.hpp>
25#include <com/sun/star/util/URLTransformer.hpp>
26#include <com/sun/star/util/XURLTransformer.hpp>
27
28#include <com/sun/star/io/XInputStream.hpp>
29#include <com/sun/star/io/XSeekable.hpp>
30#include <com/sun/star/task/XInteractionHandler.hpp>
31#include <o3tl/string_view.hxx>
32#include <tools/wldcrd.hxx>
33#include <sal/log.hxx>
36#include <tools/urlobj.hxx>
39#include <utility>
40
41#define DEBUG_TYPE_DETECTION 0
42
43#if DEBUG_TYPE_DETECTION
44#include <iostream>
45using std::cout;
46using std::endl;
47#endif
48
49using namespace com::sun::star;
50
51namespace filter::config{
52
53TypeDetection::TypeDetection(const css::uno::Reference< css::uno::XComponentContext >& rxContext)
54 : m_xContext(rxContext)
55 , m_xTerminateListener(new TerminateDetection(this))
56 , m_bCancel(false)
57{
58 css::frame::Desktop::create(m_xContext)->addTerminateListener(m_xTerminateListener);
59 BaseContainer::init("com.sun.star.comp.filter.config.TypeDetection" ,
60 { "com.sun.star.document.TypeDetection" },
62}
63
64
66{
67 css::frame::Desktop::create(m_xContext)->removeTerminateListener(m_xTerminateListener);
68}
69
70
71OUString SAL_CALL TypeDetection::queryTypeByURL(const OUString& sURL)
72{
73 OUString sType;
74
75 // SAFE ->
76 std::unique_lock aLock(m_aMutex);
77
78 css::util::URL aURL;
79 aURL.Complete = sURL;
80 css::uno::Reference< css::util::XURLTransformer > xParser( css::util::URLTransformer::create(m_xContext) );
81 xParser->parseStrict(aURL);
82
83 // set std types as minimum requirement first!
84 // Only in case no type was found for given URL,
85 // use optional types too ...
86 auto & cache = GetTheFilterCache();
87 FlatDetection lFlatTypes;
88 cache.detectFlatForURL(aURL, lFlatTypes);
89
90 if (
91 (lFlatTypes.empty() ) &&
92 (!cache.isFillState(FilterCache::E_CONTAINS_TYPES))
93 )
94 {
96 cache.detectFlatForURL(aURL, lFlatTypes);
97 }
98
99 // first item is guaranteed as "preferred" one!
100 if (!lFlatTypes.empty())
101 {
102 const FlatDetectionInfo& aMatch = *(lFlatTypes.begin());
103 sType = aMatch.sType;
104 }
105
106 return sType;
107 // <- SAFE
108}
109
110namespace {
111
129int getFlatTypeRank(std::u16string_view rType)
130{
131 // List formats from more complex to less complex.
132 // TODO: Add more.
133 static const char* ranks[] = {
134
135 // Compressed XML (ODF XML zip formats)
136 "writer8_template",
137 "writer8",
138 "calc8_template",
139 "calc8",
140 "impress8_template",
141 "impress8",
142 "draw8_template",
143 "draw8",
144 "chart8",
145 "math8",
146 "writerglobal8_template",
147 "writerglobal8",
148 "writerweb8_writer_template",
149 "StarBase",
150
151 // Compressed XML (OOXML)
152 "writer_OOXML_Text_Template",
153 "writer_OOXML",
154 "writer_MS_Word_2007_Template",
155 "writer_MS_Word_2007",
156 "Office Open XML Spreadsheet Template",
157 "Office Open XML Spreadsheet",
158 "MS Excel 2007 XML Template",
159 "MS Excel 2007 XML",
160 "MS PowerPoint 2007 XML Template",
161 "MS PowerPoint 2007 XML AutoPlay",
162 "MS PowerPoint 2007 XML",
163
164 // Compressed XML (Uniform/Unified Office Format)
165 "Unified_Office_Format_text",
166 "Unified_Office_Format_spreadsheet",
167 "Unified_Office_Format_presentation",
168
169 // Compressed XML (StarOffice XML zip formats)
170 "calc_StarOffice_XML_Calc",
171 "calc_StarOffice_XML_Calc_Template",
172 "chart_StarOffice_XML_Chart",
173 "draw_StarOffice_XML_Draw",
174 "draw_StarOffice_XML_Draw_Template",
175 "impress_StarOffice_XML_Impress",
176 "impress_StarOffice_XML_Impress_Template",
177 "math_StarOffice_XML_Math",
178 "writer_StarOffice_XML_Writer",
179 "writer_StarOffice_XML_Writer_Template",
180 "writer_globaldocument_StarOffice_XML_Writer_GlobalDocument",
181 "writer_web_StarOffice_XML_Writer_Web_Template",
182
183 // Compressed text
184 "pdf_Portable_Document_Format",
185
186 // Binary
187 "writer_T602_Document",
188 "writer_WordPerfect_Document",
189 "writer_MS_Works_Document",
190 "writer_MS_Word_97_Vorlage",
191 "writer_MS_Word_97",
192 "writer_MS_Word_95_Vorlage",
193 "writer_MS_Word_95",
194 "writer_MS_WinWord_60",
195 "writer_MS_WinWord_5",
196 "MS Excel 2007 Binary",
197 "calc_MS_Excel_97_VorlageTemplate",
198 "calc_MS_Excel_97",
199 "calc_MS_Excel_95_VorlageTemplate",
200 "calc_MS_Excel_95",
201 "calc_MS_Excel_5095_VorlageTemplate",
202 "calc_MS_Excel_5095",
203 "calc_MS_Excel_40_VorlageTemplate",
204 "calc_MS_Excel_40",
205 "calc_Pocket_Excel_File",
206 "impress_MS_PowerPoint_97_Vorlage",
207 "impress_MS_PowerPoint_97_AutoPlay",
208 "impress_MS_PowerPoint_97",
209 "calc_Lotus",
210 "calc_QPro",
211 "calc_SYLK",
212 "calc_DIF",
213 "calc_dBase",
214
215 // Binary (raster and vector image files)
216 "emf_MS_Windows_Metafile",
217 "wmf_MS_Windows_Metafile",
218 "met_OS2_Metafile",
219 "svm_StarView_Metafile",
220 "sgv_StarDraw_20",
221 "tif_Tag_Image_File",
222 "tga_Truevision_TARGA",
223 "sgf_StarOffice_Writer_SGF",
224 "ras_Sun_Rasterfile",
225 "psd_Adobe_Photoshop",
226 "png_Portable_Network_Graphic",
227 "jpg_JPEG",
228 "mov_MOV",
229 "gif_Graphics_Interchange",
230 "bmp_MS_Windows",
231 "pcx_Zsoft_Paintbrush",
232 "pct_Mac_Pict",
233 "pcd_Photo_CD_Base",
234 "pcd_Photo_CD_Base4",
235 "pcd_Photo_CD_Base16",
236 "webp_WebP",
237 "impress_CGM_Computer_Graphics_Metafile", // There is binary and ascii variants ?
238 "draw_WordPerfect_Graphics",
239 "draw_Visio_Document",
240 "draw_Publisher_Document",
241 "draw_Corel_Presentation_Exchange",
242 "draw_CorelDraw_Document",
243 "writer_LotusWordPro_Document",
244 "writer_MIZI_Hwp_97", // Hanword (Hancom Office)
245
246 // Non-compressed XML
247 "writer_ODT_FlatXML",
248 "calc_ODS_FlatXML",
249 "impress_ODP_FlatXML",
250 "draw_ODG_FlatXML",
251 "calc_ADO_rowset_XML",
252 "calc_MS_Excel_2003_XML",
253 "writer_MS_Word_2003_XML",
254 "writer_DocBook_File",
255 "XHTML_File",
256 "svg_Scalable_Vector_Graphics",
257 "math_MathML_XML_Math",
258
259 // Non-compressed text
260 "dxf_AutoCAD_Interchange",
261 "eps_Encapsulated_PostScript",
262 "pbm_Portable_Bitmap", // There is 'raw' and 'ascii' variants.
263 "ppm_Portable_Pixelmap", // There is 'raw' and 'ascii' variants.
264 "pgm_Portable_Graymap", // There is 'raw' and 'ascii' variants.
265 "xpm_XPM",
266 "xbm_X_Consortium",
267 "writer_Rich_Text_Format",
268 "writer_web_HTML_help",
269 "generic_HTML",
270
271 "generic_Text", // Plain text (catch all)
272
273 // Anything ranked lower than generic_Text will never be used during
274 // type detection (since generic_Text catches all).
275
276 // Export only
277 "writer_layout_dump_xml",
278 "writer_indexing_export",
279 "graphic_HTML",
280
281 // Internal use only
282 "StarBaseReportChart",
283 "StarBaseReport",
284 "math_MathType_3x", // MathType equation embedded in Word doc.
285 };
286
287 size_t n = std::size(ranks);
288
289 for (size_t i = 0; i < n; ++i)
290 {
291 if (o3tl::equalsAscii(rType, ranks[i]))
292 return n - i - 1;
293 }
294
295 // Not ranked. Treat them equally. Unranked formats have higher priority
296 // than the ranked internal ones since they may be defined externally.
297 return n;
298}
299
305struct SortByPriority
306{
307 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
308 {
309 if (r1.bMatchByPattern != r2.bMatchByPattern)
310 return r1.bMatchByPattern;
311
312 if (r1.bMatchByExtension != r2.bMatchByExtension)
313 return r1.bMatchByExtension;
314
315 int rank1 = getFlatTypeRank(r1.sType);
316 int rank2 = getFlatTypeRank(r2.sType);
317
318 if (rank1 != rank2)
319 return rank1 > rank2;
320
321 if (r1.bPreselectedByDocumentService != r2.bPreselectedByDocumentService)
322 return r1.bPreselectedByDocumentService;
323
324 // All things being equal, sort them alphabetically.
325 return r1.sType > r2.sType;
326 }
327};
328
329struct SortByType
330{
331 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
332 {
333 return r1.sType > r2.sType;
334 }
335};
336
337struct EqualByType
338{
339 bool operator() (const FlatDetectionInfo& r1, const FlatDetectionInfo& r2) const
340 {
341 return r1.sType == r2.sType;
342 }
343};
344
345class FindByType
346{
347 OUString maType;
348public:
349 explicit FindByType(OUString aType) : maType(std::move(aType)) {}
350 bool operator() (const FlatDetectionInfo& rInfo) const
351 {
352 return rInfo.sType == maType;
353 }
354};
355
356#if DEBUG_TYPE_DETECTION
357void printFlatDetectionList(const char* caption, const FlatDetection& types)
358{
359 cout << "-- " << caption << " (size=" << types.size() << ")" << endl;
360 for (auto const& item : types)
361 {
362 cout << " type='" << item.sType << "'; match by extension (" << item.bMatchByExtension
363 << "); match by pattern (" << item.bMatchByPattern << "); pre-selected by doc service ("
364 << item.bPreselectedByDocumentService << ")" << endl;
365 }
366 cout << "--" << endl;
367}
368#endif
369
370}
371
372OUString SAL_CALL TypeDetection::queryTypeByDescriptor(css::uno::Sequence< css::beans::PropertyValue >& lDescriptor,
373 sal_Bool bAllowDeep )
374{
375 // make the descriptor more usable :-)
376 utl::MediaDescriptor stlDescriptor(lDescriptor);
377 OUString sType, sURL;
378
379 try
380 {
381 // SAFE -> ----------------------------------
382 std::unique_lock aLock(m_aMutex);
383
384 // parse given URL to split it into e.g. main and jump marks ...
385 sURL = stlDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL, OUString());
386
387#if OSL_DEBUG_LEVEL > 0
388 if (stlDescriptor.find( "FileName" ) != stlDescriptor.end())
389 OSL_FAIL("Detect using of deprecated and already unsupported MediaDescriptor property \"FileName\"!");
390#endif
391
392 css::util::URL aURL;
393 aURL.Complete = sURL;
394 css::uno::Reference< css::util::XURLTransformer > xParser(css::util::URLTransformer::create(m_xContext));
395 xParser->parseStrict(aURL);
396
397 OUString aSelectedFilter = stlDescriptor.getUnpackedValueOrDefault(
399 if (!aSelectedFilter.isEmpty())
400 {
401 // Caller specified the filter type. Honor it. Just get the default
402 // type for that filter, and bail out.
403 if (impl_validateAndSetFilterOnDescriptor(stlDescriptor, aSelectedFilter))
404 return stlDescriptor[utl::MediaDescriptor::PROP_TYPENAME].get<OUString>();
405 }
406
407 FlatDetection lFlatTypes;
408 impl_getAllFormatTypes(aLock, aURL, stlDescriptor, lFlatTypes);
409
410 aLock.unlock();
411 // <- SAFE ----------------------------------
412
413 // Properly prioritize all candidate types.
414 std::stable_sort(lFlatTypes.begin(), lFlatTypes.end(), SortByPriority());
415 auto last = std::unique(lFlatTypes.begin(), lFlatTypes.end(), EqualByType());
416 lFlatTypes.erase(last, lFlatTypes.end());
417
418 OUString sLastChance;
419
420 // verify every flat detected (or preselected!) type
421 // by calling its registered deep detection service.
422 // But break this loop if a type match to the given descriptor
423 // by a URL pattern(!) or if deep detection isn't allowed from
424 // outside (bAllowDeep=sal_False) or break the whole detection by
425 // throwing an exception if creation of the might needed input
426 // stream failed by e.g. an IO exception ...
427 if (!lFlatTypes.empty())
428 sType = impl_detectTypeFlatAndDeep(stlDescriptor, lFlatTypes, bAllowDeep, sLastChance);
429
430 // flat detection failed
431 // pure deep detection failed
432 // => ask might existing InteractionHandler
433 // means: ask user for its decision
434 if (sType.isEmpty() && !m_bCancel)
436
437
438 // no real detected type - but a might valid one.
439 // update descriptor and set last chance for return.
440 if (sType.isEmpty() && !sLastChance.isEmpty() && !m_bCancel)
441 {
442 OSL_FAIL("set first flat detected type without a registered deep detection service as \"last chance\" ... nevertheless some other deep detections said \"NO\". I TRY IT!");
443 sType = sLastChance;
444 }
445 }
446 catch(const css::uno::RuntimeException&)
447 {
448 throw;
449 }
450 catch(const css::uno::Exception&)
451 {
452 TOOLS_WARN_EXCEPTION("filter.config", "caught exception while querying type of " << sURL);
453 sType.clear();
454 }
455
456 // adapt media descriptor, so it contains the right values
457 // for type/filter name/document service/ etcpp.
458 impl_checkResultsAndAddBestFilter(stlDescriptor, sType); // Attention: sType is used as IN/OUT param here and will might be changed inside this method !!!
460
461 stlDescriptor >> lDescriptor;
462 return sType;
463}
464
465
467 OUString& sType )
468{
469 // a)
470 // Don't overwrite a might preselected filter!
471 OUString sFilter = rDescriptor.getUnpackedValueOrDefault(
473 OUString());
474 if (!sFilter.isEmpty())
475 return;
476
477 auto & cache = GetTheFilterCache();
478
479 // b)
480 // check a preselected document service too.
481 // Then we have to search a suitable filter within this module.
482 OUString sDocumentService = rDescriptor.getUnpackedValueOrDefault(
484 OUString());
485 if (!sDocumentService.isEmpty())
486 {
487 try
488 {
489 OUString sRealType = sType;
490
491 // SAFE ->
492 std::unique_lock aLock(m_aMutex);
493
494 // Attention: For executing next lines of code, We must be sure that
495 // all filters already loaded :-(
496 // That can disturb our "load on demand feature". But we have no other chance!
498
499 css::beans::NamedValue lIProps[] {
501 { PROPNAME_TYPE, uno::Any(sRealType) } };
502 std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
503
504 aLock.unlock();
505 // <- SAFE
506
507 for (auto const& filter : lFilters)
508 {
509 // SAFE ->
510 aLock.lock();
511 try
512 {
513 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, filter);
514 sal_Int32 nFlags = 0;
515 aFilter[PROPNAME_FLAGS] >>= nFlags;
516
517 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT)
518 sFilter = filter;
519 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::PREFERED)
520 break;
521 }
522 catch(const css::uno::Exception&) {}
523 aLock.unlock();
524 // <- SAFE
525 }
526
527 if (!sFilter.isEmpty())
528 {
529 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME ] <<= sRealType;
530 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
531 sType = sRealType;
532 return;
533 }
534 }
535 catch(const css::uno::Exception&)
536 {}
537 }
538
539 // c)
540 // We can use the preferred filter for the specified type.
541 // Such preferred filter points:
542 // - to the default filter of the preferred application
543 // - or to any other filter if no preferred filter was set.
544 // Note: It's an optimization only!
545 // It's not guaranteed, that such preferred filter exists.
546 sFilter.clear();
547 try
548 {
549 CacheItem aType = cache.getItem(FilterCache::E_TYPE, sType);
550 aType[PROPNAME_PREFERREDFILTER] >>= sFilter;
551 cache.getItem(FilterCache::E_FILTER, sFilter);
552
553 // no exception => found valid type and filter => set it on the given descriptor
555 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
556 return;
557 }
558 catch(const css::uno::Exception&)
559 {}
560
561 // d)
562 // Search for any import(!) filter, which is registered for this type.
563 sFilter.clear();
564 try
565 {
566 // Attention: For executing next lines of code, We must be sure that
567 // all filters already loaded :-(
568 // That can disturb our "load on demand feature". But we have no other chance!
570
571 css::beans::NamedValue lIProps[] {
573 std::vector<OUString> lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
574
575 for (auto const& filter : lFilters)
576 {
577 sFilter = filter;
578
579 try
580 {
581 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter);
582 sal_Int32 nFlags = 0;
583 aFilter[PROPNAME_FLAGS] >>= nFlags;
584
585 if (static_cast<SfxFilterFlags>(nFlags) & SfxFilterFlags::IMPORT)
586 break;
587 }
588 catch(const css::uno::Exception&)
589 { continue; }
590
591 sFilter.clear();
592 }
593
594 if (!sFilter.isEmpty())
595 {
597 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
598 return;
599 }
600 }
601 catch(const css::uno::Exception&)
602 {}
603}
604
605
607 std::unique_lock<std::mutex>& /*rGuard*/,
608 const OUString& sPreSelType, const util::URL& aParsedURL, FlatDetection& rFlatTypes, bool bDocService)
609{
610 // Can be used to suppress execution of some parts of this method
611 // if it's already clear that detected type is valid or not.
612 // It's necessary to use shared code at the end, which update
613 // all return parameters consistency!
614 bool bBreakDetection = false;
615
616 // Further we must know if it matches by pattern
617 // Every flat detected type by pattern won't be detected deep!
618 bool bMatchByPattern = false;
619
620 // And we must know if a preselection must be preferred, because
621 // it matches by its extension too.
622 bool bMatchByExtension = false;
623
624 // validate type
625 OUString sType(sPreSelType);
626 CacheItem aType;
627 try
628 {
630 }
631 catch(const css::container::NoSuchElementException&)
632 {
633 sType.clear();
634 bBreakDetection = true;
635 }
636
637 if (!bBreakDetection)
638 {
639 // We can't check a preselected type for a given stream!
640 // So we must believe, that it can work ...
641 if ( aParsedURL.Complete == "private:stream" )
642 bBreakDetection = true;
643 }
644
645 if (!bBreakDetection)
646 {
647 // extract extension from URL .. to check it case-insensitive !
648 INetURLObject aParser (aParsedURL.Main);
649 OUString sExtension = aParser.getExtension(INetURLObject::LAST_SEGMENT ,
650 true ,
652 sExtension = sExtension.toAsciiLowerCase();
653
654 // otherwise we must know, if it matches to the given URL really.
655 // especially if it matches by its extension or pattern registration.
656 const css::uno::Sequence<OUString> lExtensions = aType[PROPNAME_EXTENSIONS].get<css::uno::Sequence<OUString> >();
657 const css::uno::Sequence<OUString> lURLPattern = aType[PROPNAME_URLPATTERN].get<css::uno::Sequence<OUString> >();
658
659 for (auto const& extension : lExtensions)
660 {
661 OUString sCheckExtension(extension.toAsciiLowerCase());
662 if (sCheckExtension == sExtension)
663 {
664 bBreakDetection = true;
665 bMatchByExtension = true;
666 break;
667 }
668 }
669
670 if (!bBreakDetection)
671 {
672 for (auto const& elem : lURLPattern)
673 {
674 WildCard aCheck(elem);
675 if (aCheck.Matches(aParsedURL.Main))
676 {
677 bMatchByPattern = true;
678 break;
679 }
680 }
681 }
682 }
683
684 // if it's a valid type - set it on all return values!
685 if (!sType.isEmpty())
686 {
687 FlatDetection::iterator it = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(sType));
688 if (it != rFlatTypes.end())
689 {
690 if (bMatchByExtension)
691 it->bMatchByExtension = true;
692 if (bMatchByPattern)
693 it->bMatchByPattern = true;
694 if (bDocService)
695 it->bPreselectedByDocumentService = true;
696 }
697
698 return true;
699 }
700
701 // not valid!
702 return false;
703}
704
706 std::unique_lock<std::mutex>& rGuard,
707 const OUString& sPreSelDocumentService, const util::URL& aParsedURL, FlatDetection& rFlatTypes)
708{
709 // get all filters, which match to this doc service
710 std::vector<OUString> lFilters;
711 try
712 {
713 // Attention: For executing next lines of code, We must be sure that
714 // all filters already loaded :-(
715 // That can disturb our "load on demand feature". But we have no other chance!
716 auto & cache = GetTheFilterCache();
718
719 css::beans::NamedValue lIProps[] {
720 { PROPNAME_DOCUMENTSERVICE, css::uno::Any(sPreSelDocumentService) } };
721 lFilters = cache.getMatchingItemsByProps(FilterCache::E_FILTER, lIProps);
722 }
723 catch (const css::container::NoSuchElementException&)
724 {
725 lFilters.clear();
726 }
727
728 // step over all filters, and check if its registered type
729 // match the given URL.
730 // But use temp. list of "preselected types" instead of incoming rFlatTypes list!
731 // The reason behind: we must filter the obtained results. And copying stl entries
732 // is an easier job than removing them .-)
733 for (auto const& filter : lFilters)
734 {
735 OUString aType = impl_getTypeFromFilter(rGuard, filter);
736 if (aType.isEmpty())
737 continue;
738
739 impl_getPreselectionForType(rGuard, aType, aParsedURL, rFlatTypes, true);
740 }
741}
742
743OUString TypeDetection::impl_getTypeFromFilter(std::unique_lock<std::mutex>& /*rGuard*/, const OUString& rFilterName)
744{
745 CacheItem aFilter;
746 try
747 {
748 aFilter = GetTheFilterCache().getItem(FilterCache::E_FILTER, rFilterName);
749 }
750 catch (const container::NoSuchElementException&)
751 {
752 return OUString();
753 }
754
755 OUString aType;
756 aFilter[PROPNAME_TYPE] >>= aType;
757 return aType;
758}
759
761 std::unique_lock<std::mutex>& rGuard,
762 const util::URL& aParsedURL, utl::MediaDescriptor const & rDescriptor, FlatDetection& rFlatTypes)
763{
764 rFlatTypes.clear();
765
766 // Get all filters that we have.
767 std::vector<OUString> aFilterNames;
768 try
769 {
770 auto & cache = GetTheFilterCache();
772 aFilterNames = cache.getItemNames(FilterCache::E_FILTER);
773 }
774 catch (const container::NoSuchElementException&)
775 {
776 return;
777 }
778
779 // Retrieve the default type for each of these filters, and store them.
780 for (auto const& filterName : aFilterNames)
781 {
782 OUString aType = impl_getTypeFromFilter(rGuard, filterName);
783
784 if (aType.isEmpty())
785 continue;
786
787 FlatDetectionInfo aInfo; // all flags set to false by default.
788 aInfo.sType = aType;
789 rFlatTypes.push_back(aInfo);
790 }
791
792 {
793 // Get all types that match the URL alone.
794 FlatDetection aFlatByURL;
795 GetTheFilterCache().detectFlatForURL(aParsedURL, aFlatByURL);
796 for (auto const& elem : aFlatByURL)
797 {
798 FlatDetection::iterator itPos = std::find_if(rFlatTypes.begin(), rFlatTypes.end(), FindByType(elem.sType));
799 if (itPos == rFlatTypes.end())
800 // Not in the list yet.
801 rFlatTypes.push_back(elem);
802 else
803 {
804 // Already in the list. Update the flags.
805 FlatDetectionInfo& rInfo = *itPos;
806 const FlatDetectionInfo& rThisInfo = elem;
807 if (rThisInfo.bMatchByExtension)
808 rInfo.bMatchByExtension = true;
809 if (rThisInfo.bMatchByPattern)
810 rInfo.bMatchByPattern = true;
811 if (rThisInfo.bPreselectedByDocumentService)
813 }
814 }
815 }
816
817 // Remove duplicates.
818 std::stable_sort(rFlatTypes.begin(), rFlatTypes.end(), SortByType());
819 auto last = std::unique(rFlatTypes.begin(), rFlatTypes.end(), EqualByType());
820 rFlatTypes.erase(last, rFlatTypes.end());
821
822 // Mark pre-selected type (if any) to have it prioritized.
823 OUString sSelectedType = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_TYPENAME, OUString());
824 if (!sSelectedType.isEmpty())
825 impl_getPreselectionForType(rGuard, sSelectedType, aParsedURL, rFlatTypes, false);
826
827 // Mark all types preferred by the current document service, to have it prioritized.
828 OUString sSelectedDoc = rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_DOCUMENTSERVICE, OUString());
829 if (!sSelectedDoc.isEmpty())
830 impl_getPreselectionForDocumentService(rGuard, sSelectedDoc, aParsedURL, rFlatTypes);
831}
832
833
835 const FlatDetection& lFlatTypes ,
836 bool bAllowDeep ,
837 OUString& rLastChance )
838{
839 // reset it everytimes, so the outside code can distinguish between
840 // a set and a not set value.
841 rLastChance.clear();
842
843 // step over all possible types for this URL.
844 // solutions:
845 // a) no types => no detection
846 // b) deep detection not allowed => return first valid type of list (because it's the preferred or the first valid one)
847 // or(!) match by URLPattern => in such case a deep detection will be suppressed!
848 // c) type has no detect service => safe the first occurred type without a detect service
849 // as "last chance"(!). It will be used outside of this method
850 // if no further type could be detected.
851 // It must be the first one, because it can be a preferred type.
852 // Our types list was sorted by such criteria!
853 // d) detect service return a valid result => return its decision
854 // e) detect service return an invalid result
855 // or any needed information could not be
856 // obtained from the cache => ignore it, and continue with search
857
858 for (auto const& flatTypeInfo : lFlatTypes)
859 {
860 if (m_bCancel)
861 break;
862 OUString sFlatType = flatTypeInfo.sType;
863
864 if (!impl_validateAndSetTypeOnDescriptor(rDescriptor, sFlatType))
865 continue;
866
867 // b)
868 if (
869 (!bAllowDeep ) ||
870 (flatTypeInfo.bMatchByPattern)
871 )
872 {
873 return sFlatType;
874 }
875
876 try
877 {
878 // SAFE -> ----------------------------------
879 std::unique_lock aLock(m_aMutex);
881 aLock.unlock();
882
883 OUString sDetectService;
884 aType[PROPNAME_DETECTSERVICE] >>= sDetectService;
885
886 // c)
887 if (sDetectService.isEmpty())
888 {
889 // flat detected types without any registered deep detection service and not
890 // preselected by the user can be used as LAST CHANCE in case no other type could
891 // be detected. Of course only the first type without deep detector can be used.
892 // Further ones has to be ignored.
893 if (rLastChance.isEmpty())
894 rLastChance = sFlatType;
895
896 continue;
897 }
898
899 OUString sDeepType = impl_askDetectService(sDetectService, rDescriptor);
900
901 // d)
902 if (!sDeepType.isEmpty())
903 return sDeepType;
904 }
905 catch(const css::container::NoSuchElementException&)
906 {}
907 // e)
908 }
909
910 return OUString();
911 // <- SAFE ----------------------------------
912}
913
915{
916 // try to seek to 0 ...
917 // But because XSeekable is an optional interface ... try it only .-)
918 css::uno::Reference< css::io::XInputStream > xStream = rDescriptor.getUnpackedValueOrDefault(
920 css::uno::Reference< css::io::XInputStream >());
921 css::uno::Reference< css::io::XSeekable > xSeek(xStream, css::uno::UNO_QUERY);
922 if (!xSeek.is())
923 return;
924
925 try
926 {
927 xSeek->seek(0);
928 }
929 catch(const css::uno::RuntimeException&)
930 {
931 throw;
932 }
933 catch(const css::uno::Exception&)
934 {
935 }
936}
937
938OUString TypeDetection::impl_askDetectService(const OUString& sDetectService,
939 utl::MediaDescriptor& rDescriptor )
940{
941 // Open the stream and add it to the media descriptor if this method is called for the first time.
942 // All following requests to this method will detect, that there already exists a stream .-)
943 // Attention: This method throws an exception if the stream could not be opened.
944 // It's important to break any further detection in such case.
945 // Catch it on the highest detection level only !!!
946 impl_openStream(rDescriptor);
947
948 // seek to 0 is an optional feature to be more robust against
949 // "simple implemented detect services" .-)
950 impl_seekStreamToZero(rDescriptor);
951
952 css::uno::Reference< css::document::XExtendedFilterDetection > xDetector;
953 css::uno::Reference< css::uno::XComponentContext > xContext;
954
955 // SAFE ->
956 {
957 std::unique_lock aLock(m_aMutex);
958 xContext = m_xContext;
959 }
960 // <- SAFE
961
962 try
963 {
964 // Attention! If e.g. an office module was not installed sometimes we
965 // find a registered detect service, which is referred inside the
966 // configuration ... but not really installed. On the other side we use
967 // third party components here, which can make trouble anyway. So we
968 // should handle errors during creation of such services more
969 // gracefully .-)
970 xDetector.set(
971 xContext->getServiceManager()->createInstanceWithContext(sDetectService, xContext),
972 css::uno::UNO_QUERY_THROW);
973 }
974 catch (...)
975 {
976 }
977
978 if ( ! xDetector.is())
979 return OUString();
980
981 OUString sDeepType;
982 try
983 {
984 // start deep detection
985 // Don't forget to convert stl descriptor to its uno representation.
986
987 /* Attention!
988 You have to use an explicit instance of this uno sequence...
989 Because it's used as an in out parameter. And in case of a temp. used object
990 we will run into memory corruptions!
991 */
992 css::uno::Sequence< css::beans::PropertyValue > lDescriptor;
993 rDescriptor >> lDescriptor;
994 sDeepType = xDetector->detect(lDescriptor);
995 rDescriptor << lDescriptor;
996 }
997 catch (...)
998 {
999 // We should ignore errors here.
1000 // Thrown exceptions mostly will end in crash recovery...
1001 // But might be we find another deep detection service which can detect the same
1002 // document without a problem .-)
1003 sDeepType.clear();
1004 }
1005
1006 // seek to 0 is an optional feature to be more robust against
1007 // "simple implemented detect services" .-)
1008 impl_seekStreamToZero(rDescriptor);
1009
1010 // analyze the results
1011 // a) detect service returns "" => return "" too and remove TYPE/FILTER prop from descriptor
1012 // b) returned type is unknown => return "" too and remove TYPE/FILTER prop from descriptor
1013 // c) returned type is valid => check TYPE/FILTER props inside descriptor and return the type
1014
1015 // this special helper checks for a valid type
1016 // and set right values on the descriptor!
1017 bool bValidType = impl_validateAndSetTypeOnDescriptor(rDescriptor, sDeepType);
1018 if (bValidType)
1019 return sDeepType;
1020
1021 return OUString();
1022}
1023
1024
1026{
1027 css::uno::Reference< css::task::XInteractionHandler > xInteraction =
1028 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INTERACTIONHANDLER,
1029 css::uno::Reference< css::task::XInteractionHandler >());
1030
1031 if (!xInteraction.is())
1032 return OUString();
1033
1034 OUString sURL =
1035 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_URL,
1036 OUString());
1037
1038 css::uno::Reference< css::io::XInputStream > xStream =
1039 rDescriptor.getUnpackedValueOrDefault(utl::MediaDescriptor::PROP_INPUTSTREAM,
1040 css::uno::Reference< css::io::XInputStream >());
1041
1042 // Don't disturb the user for "non existing files - means empty URLs" or
1043 // if we were forced to detect a stream.
1044 // Reason behind: we must be sure to ask user for "unknown contents" only...
1045 // and not for "missing files". Especially if detection is done by a stream only
1046 // we can't check if the stream points to an "existing content"!
1047 if (
1048 (sURL.isEmpty() ) || // "non existing file" ?
1049 (!xStream.is() ) || // non existing file !
1050 (sURL.equalsIgnoreAsciiCase("private:stream")) // not a good idea .-)
1051 )
1052 return OUString();
1053
1054 try
1055 {
1056 // create a new request to ask user for its decision about the usable filter
1057 ::framework::RequestFilterSelect aRequest(sURL);
1058 xInteraction->handle(aRequest.GetRequest());
1059
1060 // "Cancel" pressed? => return with error
1061 if (aRequest.isAbort())
1062 return OUString();
1063
1064 // "OK" pressed => verify the selected filter, get its corresponding
1065 // type and return it. (BTW: We must update the media descriptor here ...)
1066 // The user selected explicitly a filter ... but normally we are interested on
1067 // a type here only. But we must be sure, that the selected filter is used
1068 // too and no ambiguous filter registration disturb us .-)
1069
1070 OUString sFilter = aRequest.getFilter();
1071 if (!impl_validateAndSetFilterOnDescriptor(rDescriptor, sFilter))
1072 return OUString();
1073 OUString sType;
1075 return sType;
1076 }
1077 catch(const css::uno::Exception&)
1078 {}
1079
1080 return OUString();
1081}
1082
1083
1085{
1086 bool bSuccess = false;
1087 OUString sURL = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_URL, OUString() );
1088 bool bRequestedReadOnly = rDescriptor.getUnpackedValueOrDefault( utl::MediaDescriptor::PROP_READONLY, false );
1089 if ( comphelper::isFileUrl( sURL ) )
1090 {
1091 // OOo uses own file locking mechanics in case of local file
1092 bSuccess = rDescriptor.addInputStreamOwnLock();
1093 }
1094 else
1095 bSuccess = rDescriptor.addInputStream();
1096
1097 if ( !bSuccess )
1098 throw css::uno::Exception(
1099 "Could not open stream for <" + sURL + ">",
1100 getXWeak());
1101
1102 if ( !bRequestedReadOnly )
1103 {
1104 // The MediaDescriptor implementation adds ReadOnly argument if the file can not be opened for writing
1105 // this argument should be either removed or an additional argument should be added so that application
1106 // can separate the case when the user explicitly requests readonly document.
1107 // The current solution is to remove it here.
1108 rDescriptor.erase( utl::MediaDescriptor::PROP_READONLY );
1109 }
1110}
1111
1112
1114{
1115 utl::MediaDescriptor::iterator pItType = rDescriptor.find(utl::MediaDescriptor::PROP_TYPENAME );
1116 utl::MediaDescriptor::iterator pItFilter = rDescriptor.find(utl::MediaDescriptor::PROP_FILTERNAME);
1117 if (pItType != rDescriptor.end())
1118 rDescriptor.erase(pItType);
1119 if (pItFilter != rDescriptor.end())
1120 rDescriptor.erase(pItFilter);
1121}
1122
1123
1125 const OUString& sType )
1126{
1128 {
1130 return true;
1131 }
1132
1133 // remove all related information from the descriptor
1135 return false;
1136}
1137
1138
1140 const OUString& sFilter )
1141{
1142 try
1143 {
1144 auto & cache = GetTheFilterCache();
1145 CacheItem aFilter = cache.getItem(FilterCache::E_FILTER, sFilter);
1146 OUString sType;
1147 aFilter[PROPNAME_TYPE] >>= sType;
1148
1149 // found valid type and filter => set it on the given descriptor
1150 rDescriptor[utl::MediaDescriptor::PROP_TYPENAME ] <<= sType ;
1151 rDescriptor[utl::MediaDescriptor::PROP_FILTERNAME] <<= sFilter;
1152 return true;
1153 }
1154 catch(const css::container::NoSuchElementException&){}
1155
1156 // remove all related information from the descriptor
1158 return false;
1159}
1160
1161} // namespace filter
1162
1163extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
1165 css::uno::XComponentContext* context, css::uno::Sequence<css::uno::Any> const&)
1166{
1167 return cppu::acquire(new filter::config::TypeDetection(context));
1168}
1169
1170/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
OptionalString sType
Reference< XComponentContext > m_xContext
Definition: OdfFlatXml.cxx:62
Reference< XInputStream > xStream
constexpr OUStringLiteral sDocumentService
OUString getExtension(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
bool Matches(std::u16string_view rStr) const
void init(const OUString &sImplementationName, const css::uno::Sequence< OUString > &lServiceNames, FilterCache::EItemType eType)
initialize this generic instance with some specialized values from our derived object.
represent an item of a FilterCache instance.
Definition: cacheitem.hxx:40
void detectFlatForURL(const css::util::URL &aURL, FlatDetection &rFlatTypes) const
supports a flat type detection for given URL.
CacheItem getItem(EItemType eType, const OUString &sItem)
return an item, which match the specified type and name.
implements the service <type scope="com.sun.star.document">TypeDetection</type>.
static void impl_getPreselectionForDocumentService(std::unique_lock< std::mutex > &rGuard, const OUString &sPreSelDocumentService, const css::util::URL &aParsedURL, FlatDetection &rFlatTypes)
TypeDetection(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
standard ctor to connect this interface wrapper to the global filter cache instance ....
OUString impl_askDetectService(const OUString &sDetectService, utl::MediaDescriptor &rDescriptor)
make deep type detection for a specified detect service (threadsafe!).
void impl_checkResultsAndAddBestFilter(utl::MediaDescriptor &rDescriptor, OUString &sType)
search the best suitable filter for the given type and add it into the media descriptor.
static void impl_getAllFormatTypes(std::unique_lock< std::mutex > &rGuard, const css::util::URL &aParsedURL, utl::MediaDescriptor const &rDescriptor, FlatDetection &rFlatTypes)
Get all format types that we handle.
static bool impl_validateAndSetTypeOnDescriptor(utl::MediaDescriptor &rDescriptor, const OUString &sType)
validate the specified type and its relationships and set all needed information related to this type...
rtl::Reference< TerminateDetection > m_xTerminateListener
static OUString impl_askUserForTypeAndFilterIfAllowed(utl::MediaDescriptor &rDescriptor)
try to find an interaction handler and ask him to select a possible filter for this unknown format.
static void impl_seekStreamToZero(utl::MediaDescriptor const &rDescriptor)
seek a might existing stream to position 0.
static OUString impl_getTypeFromFilter(std::unique_lock< std::mutex > &rGuard, const OUString &rFilterName)
static void impl_removeTypeFilterFromDescriptor(utl::MediaDescriptor &rDescriptor)
remove anything related to a TYPE/FILTER entry from the specified MediaDescriptor.
virtual OUString SAL_CALL queryTypeByURL(const OUString &sURL) override
void impl_openStream(utl::MediaDescriptor &rDescriptor)
check if an input stream is already part of the given MediaDescriptor and creates a new one if necess...
virtual ~TypeDetection() override
standard dtor.
css::uno::Reference< css::uno::XComponentContext > m_xContext
virtual OUString SAL_CALL queryTypeByDescriptor(css::uno::Sequence< css::beans::PropertyValue > &lDescriptor, sal_Bool bAllowDeep) override
OUString impl_detectTypeFlatAndDeep(utl::MediaDescriptor &rDescriptor, const FlatDetection &lFlatTypes, bool bAllowDeep, OUString &rLastChance)
make a combined flat/deep type detection
static bool impl_getPreselectionForType(std::unique_lock< std::mutex > &rGuard, const OUString &sPreSelType, const css::util::URL &aParsedURL, FlatDetection &rFlatTypes, bool bDocService)
static bool impl_validateAndSetFilterOnDescriptor(utl::MediaDescriptor &rDescriptor, const OUString &sFilter)
validate the specified filter and its relationships and set all needed information related to this fi...
static constexpr OUStringLiteral PROP_INPUTSTREAM
static constexpr OUStringLiteral PROP_URL
static constexpr OUStringLiteral PROP_DOCUMENTSERVICE
static constexpr OUStringLiteral PROP_TYPENAME
static constexpr OUStringLiteral PROP_READONLY
static constexpr OUStringLiteral PROP_FILTERNAME
static constexpr OUStringLiteral PROP_INTERACTIONHANDLER
constexpr OUStringLiteral PROPNAME_FLAGS
Definition: constant.hxx:51
constexpr OUStringLiteral PROPNAME_PREFERREDFILTER
Definition: constant.hxx:36
constexpr OUStringLiteral PROPNAME_DETECTSERVICE
Definition: constant.hxx:37
constexpr OUStringLiteral PROPNAME_TYPE
used to identify a filter item property against the configuration API and can be used at all name con...
Definition: constant.hxx:47
constexpr OUStringLiteral PROPNAME_URLPATTERN
Definition: constant.hxx:40
constexpr OUStringLiteral PROPNAME_DOCUMENTSERVICE
Definition: constant.hxx:48
constexpr OUStringLiteral PROPNAME_EXTENSIONS
Definition: constant.hxx:41
#define TOOLS_WARN_EXCEPTION(area, stream)
URL aURL
SfxFilterFlags
std::mutex m_aMutex
sal_Int64 n
Shape IDs per cluster in DGG atom.
COMPHELPER_DLLPUBLIC bool isFileUrl(std::u16string_view url)
::std::vector< FlatDetectionInfo > FlatDetection
Definition: cacheitem.hxx:173
FilterCache & GetTheFilterCache()
int i
Definition: gentoken.py:48
constexpr OUStringLiteral last
bool equalsAscii(std::u16string_view s1, std::string_view s2)
TOOLS_DLLPUBLIC SvStream & endl(SvStream &rStr)
is used to collect all matching types of a URL during type detection.
Definition: cacheitem.hxx:157
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * filter_TypeDetection_get_implementation(css::uno::XComponentContext *context, css::uno::Sequence< css::uno::Any > const &)
OUString maType
unsigned char sal_Bool