LibreOffice Module xmlhelp (master) 1
resultsetforquery.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <iterator>
21
22#include <com/sun/star/ucb/Command.hpp>
23#include <com/sun/star/ucb/XCommandEnvironment.hpp>
24#include <com/sun/star/i18n/Transliteration.hpp>
25#include <com/sun/star/ucb/XCommandProcessor.hpp>
26#include <com/sun/star/lang/Locale.hpp>
27
30
31#if defined(__GNUC__)
32# pragma GCC visibility push (default)
33#endif
34#include <CLucene.h>
35#if defined(__GNUC__)
36# pragma GCC visibility pop
37#endif
38
39#include <rtl/ustring.hxx>
40#include <sal/log.hxx>
41
42#include <algorithm>
43#include <set>
44#include <utility>
45#include "resultsetforquery.hxx"
46#include "databases.hxx"
47
48using namespace chelp;
49using namespace com::sun::star;
50using namespace com::sun::star::ucb;
51using namespace com::sun::star::i18n;
52using namespace com::sun::star::uno;
53using namespace com::sun::star::lang;
54
55namespace {
56
57struct HitItem
58{
59 OUString m_aURL;
60 float m_fScore;
61
62 HitItem(OUString aURL, float fScore)
63 : m_aURL(std::move(aURL))
64 , m_fScore(fScore)
65 {}
66 bool operator < ( const HitItem& rHitItem ) const
67 {
68 return rHitItem.m_fScore < m_fScore;
69 }
70};
71
72}
73
74ResultSetForQuery::ResultSetForQuery( const uno::Reference< uno::XComponentContext >& rxContext,
75 const uno::Reference< XContentProvider >& xProvider,
76 const uno::Sequence< beans::Property >& seq,
77 const URLParameter& aURLParameter,
78 Databases* pDatabases )
79 : ResultSetBase( rxContext,xProvider,seq )
80{
81 Reference< XExtendedTransliteration > xTrans = Transliteration::create( rxContext );
82 Locale aLocale( aURLParameter.get_language(),
83 OUString(),
84 OUString() );
85 xTrans->loadModule(TransliterationModules_UPPERCASE_LOWERCASE,
86 aLocale );
87
88 std::vector< std::vector< OUString > > queryList;
89 {
90 sal_Int32 idx;
91 OUString query = aURLParameter.get_query();
92 while( !query.isEmpty() )
93 {
94 idx = query.indexOf( ' ' );
95 if( idx == -1 )
96 idx = query.getLength();
97
98 std::vector< OUString > currentQuery;
99 OUString tmp(query.copy( 0,idx ));
101 OUString toliterate = xTrans->transliterate(
102 tmp,0,tmp.getLength(),aSeq);
103
104 currentQuery.push_back( toliterate );
105 queryList.push_back( currentQuery );
106
107 int nCpy = 1 + idx;
108 if( nCpy >= query.getLength() )
109 query.clear();
110 else
111 query = query.copy( 1 + idx );
112 }
113 }
114
115 std::vector< OUString > aCompleteResultVector;
116 OUString scope = aURLParameter.get_scope();
117 bool bCaptionsOnly = scope == "Heading";
118 sal_Int32 hitCount = aURLParameter.get_hitCount();
119
120 IndexFolderIterator aIndexFolderIt( *pDatabases, aURLParameter.get_module(), aURLParameter.get_language() );
121 OUString idxDir;
122 bool bExtension = false;
123 std::vector< std::vector<HitItem> > aIndexFolderResultVectorVector;
124
125 bool bTemporary;
126 for (;;)
127 {
128 idxDir = aIndexFolderIt.nextIndexFolder( bExtension, bTemporary );
129 if( idxDir.isEmpty() )
130 break;
131 std::vector<HitItem> aIndexFolderResultVector;
132
133 try
134 {
135 std::vector< std::vector<HitItem> > aQueryListResultVectorVector;
136 std::set< OUString > aSet,aCurrent,aResultSet;
137
138 int nQueryListSize = queryList.size();
139 if( nQueryListSize > 1 )
140 hitCount = 2000;
141
142 for( int i = 0; i < nQueryListSize; ++i )
143 {
144 std::vector<HitItem>* pQueryResultVector;
145 if( nQueryListSize > 1 )
146 {
147 aQueryListResultVectorVector.emplace_back();
148 pQueryResultVector = &aQueryListResultVectorVector.back();
149 }
150 else
151 {
152 pQueryResultVector = &aIndexFolderResultVector;
153 }
154 pQueryResultVector->reserve( hitCount );
155
156 const std::vector< OUString >& aListItem = queryList[i];
157 OUString aNewQueryStr = aListItem[0];
158
159 std::vector<float> aScoreVector;
160 std::vector<OUString> aPathVector;
161
162 try
163 {
164 HelpSearch searcher(idxDir);
165 searcher.query(aNewQueryStr, bCaptionsOnly, aPathVector, aScoreVector);
166 }
167 catch (CLuceneError &e)
168 {
169 SAL_WARN("xmlhelp", "CLuceneError: " << e.what());
170 }
171
172 if( nQueryListSize > 1 )
173 aSet.clear();
174
175 for (size_t j = 0; j < aPathVector.size(); ++j) {
176 pQueryResultVector->push_back(HitItem(aPathVector[j], aScoreVector[j]));
177 if (nQueryListSize > 1)
178 aSet.insert(aPathVector[j]);
179 }
180
181 // intersect
182 if( nQueryListSize > 1 )
183 {
184 if( i == 0 )
185 {
186 aResultSet = aSet;
187 }
188 else
189 {
190 aCurrent = aResultSet;
191 aResultSet.clear();
192 set_intersection( aSet.begin(),aSet.end(),
193 aCurrent.begin(),aCurrent.end(),
194 inserter(aResultSet,aResultSet.begin()));
195 }
196 }
197 }
198
199 // Combine results in aIndexFolderResultVector
200 if( nQueryListSize > 1 )
201 {
202 for( int n = 0 ; n < nQueryListSize ; ++n )
203 {
204 std::vector<HitItem>& rQueryResultVector = aQueryListResultVectorVector[n];
205
206 int nItemCount = rQueryResultVector.size();
207 for( int i = 0 ; i < nItemCount ; ++i )
208 {
209 const HitItem& rItem = rQueryResultVector[ i ];
210 if( (aResultSet.find( rItem.m_aURL )) != aResultSet.end() )
211 {
212 HitItem aItemCopy( rItem );
213 aItemCopy.m_fScore /= nQueryListSize; // To get average score
214 if( n == 0 )
215 {
216 // Use first pass to create entry
217 aIndexFolderResultVector.push_back( aItemCopy );
218 }
219 else
220 {
221 // Find entry in vector
222 int nCount = aIndexFolderResultVector.size();
223 for( int j = 0 ; j < nCount ; ++j )
224 {
225 HitItem& rFindItem = aIndexFolderResultVector[ j ];
226 if( rFindItem.m_aURL == aItemCopy.m_aURL )
227 {
228 rFindItem.m_fScore += aItemCopy.m_fScore;
229 break;
230 }
231 }
232 }
233 }
234 }
235 }
236
237 sort( aIndexFolderResultVector.begin(), aIndexFolderResultVector.end() );
238 }
239
240 aIndexFolderResultVectorVector.push_back( std::move(aIndexFolderResultVector) );
241 }
242 catch (const Exception &)
243 {
244 TOOLS_WARN_EXCEPTION("xmlhelp", "");
245 }
246
247 if( bTemporary )
248 aIndexFolderIt.deleteTempIndexFolder( idxDir );
249
250 } // Iterator
251
252
253 int nVectorCount = aIndexFolderResultVectorVector.size();
254 std::unique_ptr<std::vector<HitItem>::size_type[]> pCurrentVectorIndex(new std::vector<HitItem>::size_type[nVectorCount]);
255 for( int j = 0 ; j < nVectorCount ; ++j )
256 pCurrentVectorIndex[j] = 0;
257
258 sal_Int32 nTotalHitCount = aURLParameter.get_hitCount();
259 sal_Int32 nHitCount = 0;
260 while( nHitCount < nTotalHitCount )
261 {
262 int iVectorWithBestScore = -1;
263 float fBestScore = 0.0;
264 for( int k = 0 ; k < nVectorCount ; ++k )
265 {
266 std::vector<HitItem>& rIndexFolderVector = aIndexFolderResultVectorVector[k];
267 if( pCurrentVectorIndex[k] < rIndexFolderVector.size() )
268 {
269 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[k] ];
270
271 if( fBestScore < rItem.m_fScore )
272 {
273 fBestScore = rItem.m_fScore;
274 iVectorWithBestScore = k;
275 }
276 }
277 }
278
279 if( iVectorWithBestScore == -1 ) // No item left at all
280 break;
281
282 std::vector<HitItem>& rIndexFolderVector = aIndexFolderResultVectorVector[iVectorWithBestScore];
283 const HitItem& rItem = rIndexFolderVector[ pCurrentVectorIndex[iVectorWithBestScore] ];
284
285 pCurrentVectorIndex[iVectorWithBestScore]++;
286
287 aCompleteResultVector.push_back( rItem.m_aURL );
288 ++nHitCount;
289 }
290
291 pCurrentVectorIndex.reset();
292 aIndexFolderResultVectorVector.clear();
293
294 sal_Int32 replIdx = strlen( "#HLP#" );
295 OUString replWith = "vnd.sun.star.help://";
296
297 int nResultCount = aCompleteResultVector.size();
298 for( int r = 0 ; r < nResultCount ; ++r )
299 {
300 OUString aURL = aCompleteResultVector[r];
301 OUString aResultStr = replWith + aURL.subView(replIdx);
302 m_aPath.push_back( aResultStr );
303 }
304
305 m_aItems.resize( m_aPath.size() );
306 m_aIdents.resize( m_aPath.size() );
307
309 aCommand.Name = "getPropertyValues";
310 aCommand.Argument <<= m_sProperty;
311
312 for( m_nRow = 0; sal::static_int_cast<sal_uInt32>( m_nRow ) < m_aPath.size(); ++m_nRow )
313 {
314 m_aPath[m_nRow] =
315 m_aPath[m_nRow] +
316 "?Language=" +
317 aURLParameter.get_language() +
318 "&System=" +
319 aURLParameter.get_system();
320
321 uno::Reference< XContent > content = queryContent();
322 if( content.is() )
323 {
324 uno::Reference< XCommandProcessor > cmd( content,uno::UNO_QUERY );
325 cmd->execute( aCommand,0,uno::Reference< XCommandEnvironment >( nullptr ) ) >>= m_aItems[m_nRow]; //TODO: check return value of operator >>=
326 }
327 }
328 m_nRow = 0xffffffff;
329}
330
331/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
css::util::URL m_aURL
void query(OUString const &queryStr, bool captionOnly, std::vector< OUString > &rDocuments, std::vector< float > &rScores)
void deleteTempIndexFolder(std::u16string_view aIndexFolder)
Definition: databases.cxx:1860
OUString nextIndexFolder(bool &o_rbExtension, bool &o_rbTemporary)
Definition: databases.cxx:1715
std::vector< OUString > m_aPath
css::uno::Sequence< css::beans::Property > m_sProperty
virtual css::uno::Reference< css::ucb::XContent > SAL_CALL queryContent() override
sal_Int32 get_hitCount() const
const OUString & get_module() const
const OUString & get_system() const
OUString const & get_language() const
const OUString & get_query() const
const OUString & get_scope() const
int nCount
#define TOOLS_WARN_EXCEPTION(area, stream)
URL aURL
const sal_uInt16 idx[]
sal_Int64 n
Sequence< sal_Int8 > aSeq
#define SAL_WARN(area, stream)
@ Exception
int i
store_handle_type *SAL_CALL query(OStoreObject *pHandle, store_handle_type *)
OUString aCommand