22#include <com/sun/star/i18n/BreakIterator.hpp>
23#include <com/sun/star/util/SearchAlgorithms2.hpp>
24#include <com/sun/star/util/SearchFlags.hpp>
25#include <com/sun/star/i18n/WordType.hpp>
26#include <com/sun/star/i18n/ScriptType.hpp>
27#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
28#include <com/sun/star/i18n/CharacterClassification.hpp>
29#include <com/sun/star/i18n/KCharacterType.hpp>
30#include <com/sun/star/i18n/Transliteration.hpp>
34#include <rtl/ustrbuf.hxx>
37#include <unicode/regex.h>
46 TransliterationFlags::ignoreBaFa_ja_JP |
47 TransliterationFlags::ignoreIterationMark_ja_JP |
48 TransliterationFlags::ignoreTiJi_ja_JP |
49 TransliterationFlags::ignoreHyuByu_ja_JP |
50 TransliterationFlags::ignoreSeZe_ja_JP |
51 TransliterationFlags::ignoreIandEfollowedByYa_ja_JP |
52 TransliterationFlags::ignoreKiKuFollowedBySa_ja_JP |
53 TransliterationFlags::ignoreProlongedSoundMark_ja_JP;
63 TransliterationFlags::IGNORE_KANA |
64 TransliterationFlags::FULLWIDTH_HALFWIDTH);
74 return n & ~COMPLEX_TRANS_MASK;
79 return bool(maskSimpleTrans(
n));
87 if (
v == TransliterationFlags::UPPERCASE_LOWERCASE ||
v == TransliterationFlags::LOWERCASE_UPPERCASE)
88 v = TransliterationFlags::NONE;
89 return (
m |
v) & ~COMPLEX_TRANS_MASK;
94 return bool(maskSimpleRegexTrans(
n));
102 aOpt.AlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
103 aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
104 aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
131 bool bReplaceApostrophe =
false;
132 if (
aSrchPara.AlgorithmType2 == SearchAlgorithms2::REGEXP)
142 else if (
aSrchPara.searchString.indexOf(
'\'') > - 1 )
145 bReplaceApostrophe =
aSrchPara.searchString.indexOf(
u'\u2019') > -1;
149 if( isSimpleTrans( transliterateFlags) )
154 static_cast<TransliterationModules
>(maskSimpleTrans(transliterateFlags)),
161 if ( isComplexTrans( transliterateFlags) )
167 static_cast<TransliterationModules
>(maskComplexTrans(transliterateFlags)),
177 if (
aSrchPara.AlgorithmType2 == SearchAlgorithms2::REGEXP)
179 if (isSimpleRegexTrans(transliterateFlags))
181 if (maskSimpleRegexTrans(transliterateFlags) !=
182 maskSimpleTrans(transliterateFlags))
184 css::uno::Reference< XExtendedTransliteration > xTranslitPattern(
186 if (xTranslitPattern.is())
188 xTranslitPattern->loadModule(
189 static_cast<TransliterationModules
>(maskSimpleRegexTrans(transliterateFlags)),
191 sSrchStr = xTranslitPattern->transliterateString2String(
208 if (
xTranslit.is() && isSimpleTrans(transliterateFlags) )
212 if (
xTranslit2.is() && isComplexTrans(transliterateFlags) )
217 if ( bReplaceApostrophe )
224 case SearchAlgorithms2::REGEXP:
230 case SearchAlgorithms2::APPROXIMATE:
236 0 != (SearchFlags::LEV_RELAXED &
aSrchPara.searchFlag ) ) );
241 case SearchAlgorithms2::WILDCARD:
249 SAL_WARN(
"i18npool",
"TextSearch::setOptions2 - default what?");
251 case SearchAlgorithms2::ABSOLUTE:
260 sal_Int16 nAlgorithmType2;
261 switch (rOptions.algorithmType)
263 case SearchAlgorithms_REGEXP:
264 nAlgorithmType2 = SearchAlgorithms2::REGEXP;
266 case SearchAlgorithms_APPROXIMATE:
267 nAlgorithmType2 = SearchAlgorithms2::APPROXIMATE;
270 SAL_WARN(
"i18npool",
"TextSearch::setOptions - default what?");
272 case SearchAlgorithms_ABSOLUTE:
273 nAlgorithmType2 = SearchAlgorithms2::ABSOLUTE;
278 SearchOptions2 aOptions2(
279 rOptions.algorithmType,
281 rOptions.searchString,
282 rOptions.replaceString,
284 rOptions.changedChars,
285 rOptions.deletedChars,
286 rOptions.insertedChars,
287 rOptions.transliterateFlags,
296 auto pOff = std::find_if(rOff.begin(), rOff.end(),
297 [
nPos](
const sal_Int32 nOff) { return nOff >= nPos; });
298 return static_cast<sal_Int32
>(std::distance(rOff.begin(), pOff));
307 OUString in_str(searchStr);
328 const sal_Int32 nMaxLeadingLen =
aSrchPara.searchString.startsWith(
"(?") ? 100 : 3;
329 nInStartPos -= std::min(nMaxLeadingLen,
startPos);
331 sal_Int32 nInEndPos =
endPos;
335 const sal_Int32 nMaxTrailingLen =
aSrchPara.searchString.endsWith(
")") ? 100 : 3;
336 nInEndPos += std::min(nMaxTrailingLen, searchStr.getLength() -
endPos);
339 css::uno::Sequence<sal_Int32> offset(nInEndPos - nInStartPos);
340 in_str =
xTranslit->transliterate(searchStr, nInStartPos, nInEndPos - nInStartPos, offset);
342 if ( bReplaceApostrophe )
343 in_str = in_str.replace(
u'\u2019',
'\'');
346 sal_Int32 newStartPos =
349 sal_Int32 newEndPos = (
endPos < searchStr.getLength())
351 : in_str.getLength();
353 sres = (this->*
fnForward)( in_str, newStartPos, newEndPos );
356 const sal_Int32 nOffsets = offset.getLength();
359 auto sres_startOffsetRange = asNonConstRange(sres.startOffset);
360 auto sres_endOffsetRange = asNonConstRange(sres.endOffset);
363 const sal_Int32 nGroups = sres.startOffset.getLength();
364 for ( sal_Int32 k = 0; k < nGroups; k++ )
366 const sal_Int32 nStart = sres.startOffset[k];
370 sres_startOffsetRange[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
375 const sal_Int32 nStop = sres.endOffset[k];
379 sres_endOffsetRange[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
381 sres_endOffsetRange[k] = offset[0];
388 if ( bReplaceApostrophe )
389 in_str = in_str.replace(
u'\u2019',
'\'');
399 css::uno::Sequence <sal_Int32> offset( in_str.getLength());
401 in_str =
xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
406 if(
endPos < searchStr.getLength() )
409 endPos = in_str.getLength();
413 auto sres2_startOffsetRange = asNonConstRange(sres2.startOffset);
414 auto sres2_endOffsetRange = asNonConstRange(sres2.endOffset);
416 for (
int k = 0; k < sres2.startOffset.getLength(); k++ )
418 if (sres2.startOffset[k])
419 sres2_startOffsetRange[k] = offset[sres2.startOffset[k]-1] + 1;
420 if (sres2.endOffset[k])
421 sres2_endOffsetRange[k] = offset[sres2.endOffset[k]-1] + 1;
425 if ( sres.subRegExpressions == 0)
427 if ( sres2.subRegExpressions == 1)
429 if ( sres.startOffset[0] > sres2.startOffset[0])
431 else if ( sres.startOffset[0] == sres2.startOffset[0] &&
432 sres.endOffset[0] < sres2.endOffset[0])
446 OUString in_str(searchStr);
460 if ( bReplaceApostrophe )
461 in_str = in_str.replace(
u'\u2019',
'\'');
464 sal_Int32
const newStartPos = (
startPos < searchStr.getLength())
466 : in_str.getLength();
468 sal_Int32
const newEndPos =
475 sres = (this->*
fnBackward)( in_str, newStartPos, newEndPos );
478 const sal_Int32 nOffsets = offset.getLength();
481 auto sres_startOffsetRange = asNonConstRange(sres.startOffset);
482 auto sres_endOffsetRange = asNonConstRange(sres.endOffset);
485 const sal_Int32 nGroups = sres.startOffset.getLength();
486 for ( sal_Int32 k = 0; k < nGroups; k++ )
488 const sal_Int32 nStart = sres.startOffset[k];
494 sres_startOffsetRange[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
496 sres_startOffsetRange[k] = offset[0];
502 const sal_Int32 nStop = sres.endOffset[k];
504 sres_endOffsetRange[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
510 if ( bReplaceApostrophe )
511 in_str = in_str.replace(
u'\u2019',
'\'');
521 css::uno::Sequence <sal_Int32> offset( in_str.getLength());
523 in_str =
xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
525 if(
startPos < searchStr.getLength() )
535 auto sres2_startOffsetRange = asNonConstRange(sres2.startOffset);
536 auto sres2_endOffsetRange = asNonConstRange(sres2.endOffset);
538 for(
int k = 0; k < sres2.startOffset.getLength(); k++ )
540 if (sres2.startOffset[k])
541 sres2_startOffsetRange[k] = offset[sres2.startOffset[k]-1]+1;
542 if (sres2.endOffset[k])
543 sres2_endOffsetRange[k] = offset[sres2.endOffset[k]-1]+1;
547 if ( sres.subRegExpressions == 0 )
549 if ( sres2.subRegExpressions == 1 )
551 if ( sres.startOffset[0] < sres2.startOffset[0] )
553 if ( sres.startOffset[0] == sres2.startOffset[0] &&
554 sres.endOffset[0] > sres2.endOffset[0] )
566 if(
'\x7f' != rStr[
nPos])
572 if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
573 KCharacterType::LETTER ) & nCType ) )
592 sal_Int32
n, nLen =
sSrchStr.getLength();
595 for(
n = 0;
n < nLen - 1; ++
n )
598 sal_Int32 nDiff = nLen -
n - 1;
599 TextSearchJumpTable::value_type aEntry( cCh, nDiff );
601 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
604 (*(aPair.first)).second = nDiff;
620 for(
n = 0;
n < nLen - 1; ++
n )
623 sal_Int32 nDiff = nLen -
n - 1;
625 TextSearchJumpTable::value_type aEntry( cCh, nDiff );
626 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
629 (*(aPair.first)).second = nDiff;
642 sal_Int32
n, nLen =
sSrchStr.getLength();
645 for(
n = nLen-1;
n > 0; --
n )
648 TextSearchJumpTable::value_type aEntry( cCh,
n );
649 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
652 (*(aPair.first)).second =
n;
668 for(
n = nLen-1;
n > 0; --
n )
671 TextSearchJumpTable::value_type aEntry( cCh,
n );
672 ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
675 (*(aPair.first)).second =
n;
692 TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
693 if ( iLook == pJump->end() )
694 return sSearchKey.getLength();
695 return (*iLook).second;
702 aRet.subRegExpressions = 0;
706 sal_Int32 nSuchIdx = searchStr.getLength();
708 if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
712 if( nEnd < sSearchKey.getLength() )
715 nEnd -= sSearchKey.getLength();
724 nCmpIdx +=
GetDiff( searchStr[nCmpIdx + sSearchKey.getLength()-1]))
726 nSuchIdx = sSearchKey.getLength() - 1;
727 while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == searchStr[nCmpIdx + nSuchIdx])
731 if( SearchFlags::NORM_WORD_ONLY &
aSrchPara.searchFlag )
733 sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
734 bool bAtStart = !nCmpIdx;
735 bool bAtEnd = nFndEnd ==
endPos;
736 bool bDelimBefore = bAtStart ||
IsDelimiter( searchStr, nCmpIdx-1 );
737 bool bDelimBehind = bAtEnd ||
IsDelimiter( searchStr, nFndEnd );
742 if( !( ( bAtStart && bAtEnd ) ||
743 ( bAtStart && bDelimBehind ) ||
744 ( bAtEnd && bDelimBefore ) ||
745 ( bDelimBefore && bDelimBehind )))
749 aRet.subRegExpressions = 1;
750 aRet.startOffset = { nCmpIdx };
751 aRet.endOffset = { nCmpIdx + sSearchKey.getLength() };
765 aRet.subRegExpressions = 0;
769 sal_Int32 nSuchIdx = searchStr.getLength();
771 if( nSuchIdx == 0 || sSearchKey.isEmpty() || sSearchKey.getLength() > nSuchIdx)
779 if( nEnd == nSuchIdx )
780 nEnd = sSearchKey.getLength();
782 nEnd += sSearchKey.getLength();
786 while (nCmpIdx >= nEnd)
789 while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
790 searchStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
792 if( nSuchIdx >= sSearchKey.getLength() )
794 if( SearchFlags::NORM_WORD_ONLY &
aSrchPara.searchFlag )
796 sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
797 bool bAtStart = !nFndStt;
799 bool bDelimBehind = bAtEnd ||
IsDelimiter( searchStr, nCmpIdx );
800 bool bDelimBefore = bAtStart ||
806 if( ( bAtStart && bAtEnd ) ||
807 ( bAtStart && bDelimBehind ) ||
808 ( bAtEnd && bDelimBefore ) ||
809 ( bDelimBefore && bDelimBehind ))
811 aRet.subRegExpressions = 1;
812 aRet.startOffset = { nCmpIdx };
813 aRet.endOffset = { nCmpIdx - sSearchKey.getLength() };
819 aRet.subRegExpressions = 1;
820 aRet.startOffset = { nCmpIdx };
821 aRet.endOffset = { nCmpIdx - sSearchKey.getLength() };
825 nSuchIdx =
GetDiff( searchStr[nCmpIdx - sSearchKey.getLength()] );
826 if( nCmpIdx < nSuchIdx )
837 const OUString& rPatternStr =
838 (isSimpleTrans(transliterateFlags) ?
sSrchStr
839 : (isComplexTrans(transliterateFlags) ?
sSrchStr2 : rOptions.searchString));
841 sal_uInt32 nIcuSearchFlags = UREGEX_UWORD;
850 if( (rOptions.searchFlag & css::util::SearchFlags::ALL_IGNORE_CASE) != 0
851 || (transliterateFlags & TransliterationFlags::IGNORE_CASE))
852 nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
853 UErrorCode nIcuErr = U_ZERO_ERROR;
855 icu::UnicodeString aIcuSearchPatStr(
reinterpret_cast<const UChar*
>(rPatternStr.getStr()), rPatternStr.getLength());
856#ifndef DISABLE_WORDBOUND_EMULATION
859 static const icu::UnicodeString aChevronPatternB(
"\\\\<", -1, icu::UnicodeString::kInvariant);
860 static const icu::UnicodeString aChevronReplaceB(
"\\\\b(?=\\\\w)", -1, icu::UnicodeString::kInvariant);
861 static icu::RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
862 aChevronMatcherB.reset( aIcuSearchPatStr);
863 aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
864 aChevronMatcherB.reset();
866 static const icu::UnicodeString aChevronPatternE(
"\\\\>", -1, icu::UnicodeString::kInvariant);
867 static const icu::UnicodeString aChevronReplaceE(
"(?<=\\\\w)\\\\b", -1, icu::UnicodeString::kInvariant);
868 static icu::RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
869 aChevronMatcherE.reset( aIcuSearchPatStr);
870 aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
871 aChevronMatcherE.reset();
873 pRegexMatcher.reset(
new icu::RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr) );
876 SAL_INFO(
"i18npool",
"TextSearch::RESrchPrepare UErrorCode " << nIcuErr);
901static bool lcl_findRegex(std::unique_ptr<icu::RegexMatcher>
const& pRegexMatcher,
902 sal_Int32 nStartPos, sal_Int32 nEndPos, UErrorCode& rIcuErr)
904 pRegexMatcher->region(nStartPos, nEndPos, rIcuErr);
905 pRegexMatcher->useAnchoringBounds(
false);
906 pRegexMatcher->useTransparentBounds(
true);
909 if (!pRegexMatcher->find(rIcuErr))
916 SAL_INFO(
"i18npool",
"lcl_findRegex UErrorCode " << rIcuErr);
926 aRet.subRegExpressions = 0;
930 if(
endPos > searchStr.getLength())
931 endPos = searchStr.getLength();
934 UErrorCode nIcuErr = U_ZERO_ERROR;
935 const icu::UnicodeString aSearchTargetStr(
false,
reinterpret_cast<const UChar*
>(searchStr.getStr()),
936 searchStr.getLength());
947 if( nStartOfs < nEndOfs)
961 aRet.subRegExpressions = nGroupCount + 1;
962 aRet.startOffset.realloc( aRet.subRegExpressions);
963 auto pstartOffset = aRet.startOffset.getArray();
964 aRet.endOffset.realloc( aRet.subRegExpressions);
965 auto pendOffset = aRet.endOffset.getArray();
968 for(
int i = 1;
i <= nGroupCount; ++
i) {
981 aRet.subRegExpressions = 0;
985 if(
startPos > searchStr.getLength())
991 UErrorCode nIcuErr = U_ZERO_ERROR;
992 const icu::UnicodeString aSearchTargetStr(
false,
reinterpret_cast<const UChar*
>(searchStr.getStr()),
993 searchStr.getLength());
1001 int nGoodPos = 0, nGoodEnd = 0;
1006 if (nLastPos < nFoundEnd)
1009 nGoodPos = nLastPos;
1010 nGoodEnd = nFoundEnd;
1015 if( nFoundEnd == nLastPos)
1020 if (nGoodPos == nGoodEnd)
1022 if (bFirst && nLastPos ==
startPos)
1023 nGoodPos = nLastPos;
1033 aRet.subRegExpressions = nGroupCount + 1;
1034 aRet.startOffset.realloc( aRet.subRegExpressions);
1035 auto pstartOffset = aRet.startOffset.getArray();
1036 aRet.endOffset.realloc( aRet.subRegExpressions);
1037 auto pendOffset = aRet.endOffset.getArray();
1041 for(
int i = 1;
i <= nGroupCount; ++
i) {
1055 aRet.subRegExpressions = 0;
1060 sal_Int32 nStt, nEnd;
1064 WordType::ANYWORD_IGNOREWHITESPACES,
true );
1068 if( aWBnd.startPos >=
endPos )
1071 nEnd = std::min(aWBnd.endPos,
endPos);
1074 pWLD->WLD( searchStr.getStr() + nStt, nEnd - nStt ) <=
nLimit )
1076 aRet.subRegExpressions = 1;
1077 aRet.startOffset = { nStt };
1078 aRet.endOffset = { nEnd };
1084 WordType::ANYWORD_IGNOREWHITESPACES);
1085 }
while( aWBnd.startPos != aWBnd.endPos ||
1086 (aWBnd.endPos != searchStr.getLength() && aWBnd.endPos != nEnd) );
1097 aRet.subRegExpressions = 0;
1102 sal_Int32 nStt, nEnd;
1106 WordType::ANYWORD_IGNOREWHITESPACES,
true );
1110 if( aWBnd.endPos <=
endPos )
1112 nStt = aWBnd.startPos <
endPos ?
endPos : aWBnd.startPos;
1113 nEnd = std::min(aWBnd.endPos,
startPos);
1116 pWLD->WLD( searchStr.getStr() + nStt, nEnd - nStt ) <=
nLimit )
1118 aRet.subRegExpressions = 1;
1119 aRet.startOffset = { nEnd };
1120 aRet.endOffset = { nStt };
1127 WordType::ANYWORD_IGNOREWHITESPACES);
1128 }
while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != searchStr.getLength() );
1134void setWildcardMatch( css::util::SearchResult& rRes, sal_Int32 nStartOffset, sal_Int32 nEndOffset )
1136 rRes.subRegExpressions = 1;
1137 rRes.startOffset = { nStartOffset };
1138 rRes.endOffset = { nEndOffset };
1145 aRes.subRegExpressions = 0;
1146 sal_Int32 nStartOffset = nStartPos;
1147 sal_Int32 nEndOffset = nEndPos;
1149 const sal_Int32 nStringLen = searchStr.getLength();
1153 if (nStartPos < 0 || nEndPos > nStringLen || nEndPos < nStartPos ||
1154 (nStartPos == nStringLen && (nStringLen != 0 || nStartPos != nEndPos)))
1158 const sal_Int32 nPatternLen = rPattern.getLength();
1162 if (nStartPos == nEndPos)
1165 while (
i < nPatternLen && rPattern[
i] ==
'*')
1167 if (
i == nPatternLen)
1168 setWildcardMatch( aRes, nStartOffset, nEndOffset);
1176 bool bRewind =
false;
1177 sal_uInt32 cPattern = 0;
1178 sal_Int32 nPattern = 0;
1179 sal_Int32 nAfterFakePattern = nPattern;
1187 sal_uInt32 cu = rPattern.iterateCodePoints( &nAfterFakePattern);
1189 rPattern.iterateCodePoints( &nAfterFakePattern);
1192 sal_Int32 nString = nStartPos, nPat = -1, nStr = -1, nLastAsterisk = -1;
1193 sal_uInt32 cPatternAfterAsterisk = 0;
1194 bool bEscaped =
false, bEscapedAfterAsterisk =
false;
1207 else if (nPattern < nPatternLen)
1210 cPattern = rPattern.iterateCodePoints( &nPattern);
1214 cPattern = rPattern.iterateCodePoints( &nPattern);
1223 setWildcardMatch( aRes, nStartOffset, nString);
1226 else if (nString < nEndPos && nLastAsterisk >= 0)
1229 nPattern = nLastAsterisk;
1236 if (cPattern ==
'*' && !bEscaped)
1239 while (nPattern < nPatternLen && rPattern[nPattern] ==
'*')
1242 if (nPattern >= nPatternLen)
1245 setWildcardMatch( aRes, nStartOffset, nEndOffset);
1249 nLastAsterisk = nPattern;
1253 cPattern = rPattern.iterateCodePoints( &nPattern);
1257 cPattern = rPattern.iterateCodePoints( &nPattern);
1260 cPatternAfterAsterisk = cPattern;
1261 bEscapedAfterAsterisk = bEscaped;
1266 if (nString >= nEndPos)
1271 sal_uInt32 cString = searchStr.iterateCodePoints( &nString);
1273 if ((cPattern !=
'?' || bEscaped) && cPattern != cString)
1281 cPattern = cPatternAfterAsterisk;
1282 bEscaped = bEscapedAfterAsterisk;
1283 searchStr.iterateCodePoints( &nStr);
1285 if (nPat == nAfterFakePattern)
1288 nStartOffset = nString;
1298 while (nString < nEndPos);
1305 while (nPattern < nPatternLen && rPattern[nPattern] ==
'*')
1308 if (nPattern == nPatternLen)
1309 setWildcardMatch( aRes, nStartOffset, nEndOffset);
1316 aRes.subRegExpressions = 0;
1318 sal_Int32 nStartOffset = nStartPos;
1319 sal_Int32 nEndOffset = nEndPos;
1321 const sal_Int32 nStringLen = searchStr.getLength();
1325 if (nStartPos > nStringLen || nEndPos < 0 || nStartPos < nEndPos ||
1326 (nEndPos == nStringLen && (nStringLen != 0 || nStartPos != nEndPos)))
1330 sal_Int32 nPatternLen = rPattern.getLength();
1334 if (nStartPos == nEndPos)
1337 while (
i < nPatternLen && rPattern[
i] ==
'*')
1339 if (
i == nPatternLen)
1340 setWildcardMatch( aRes, nStartOffset, nEndOffset);
1353 OUStringBuffer aPatternBuf( rPattern);
1355 while (
nIndex < nPatternLen)
1357 const sal_Int32 nOld =
nIndex;
1358 const sal_uInt32 cu = rPattern.iterateCodePoints( &
nIndex);
1361 if (
nIndex < nPatternLen)
1367 const sal_Int32 nOld2 =
nIndex;
1368 rPattern.iterateCodePoints( &
nIndex);
1369 for (sal_Int32
i=0;
i <
nIndex - nOld2; ++
i)
1370 aPatternBuf[nOld+
i] = rPattern[nOld2+
i];
1377 assert(
nIndex - nOld == 2);
1379 buf[0] = rPattern[nOld];
1380 buf[1] = rPattern[nOld+1];
1381 const sal_Int32 nOld2 =
nIndex;
1382 rPattern.iterateCodePoints( &
nIndex);
1383 for (sal_Int32
i=0;
i <
nIndex - nOld2; ++
i)
1384 aPatternBuf[nOld+
i] = rPattern[nOld2+
i];
1385 aPatternBuf[
nIndex-2] = buf[0];
1386 aPatternBuf[
nIndex-1] = buf[1];
1393 aPatternBuf.remove( nOld,
nIndex - nOld);
1403 nPatternLen = rReversePattern.getLength();
1405 bool bRewind =
false;
1406 sal_uInt32 cPattern = 0;
1407 sal_Int32 nPattern = nPatternLen;
1408 sal_Int32 nAfterFakePattern = nPattern;
1416 sal_uInt32 cu = rReversePattern.iterateCodePoints( &nAfterFakePattern, -1);
1418 rReversePattern.iterateCodePoints( &nAfterFakePattern, -1);
1421 sal_Int32 nString = nStartPos, nPat = -1, nStr = -1, nLastAsterisk = -1;
1422 sal_uInt32 cPatternAfterAsterisk = 0;
1423 bool bEscaped =
false, bEscapedAfterAsterisk =
false;
1436 else if (nPattern > 0)
1439 cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1443 cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1452 setWildcardMatch( aRes, nStartOffset, nString);
1455 else if (nString > nEndPos && nLastAsterisk >= 0)
1458 nPattern = nLastAsterisk;
1465 if (cPattern ==
'*' && !bEscaped)
1468 while (nPattern > 0 && rReversePattern[nPattern-1] ==
'*')
1474 setWildcardMatch( aRes, nStartOffset, nEndOffset);
1478 nLastAsterisk = nPattern;
1482 cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1486 cPattern = rReversePattern.iterateCodePoints( &nPattern, -1);
1489 cPatternAfterAsterisk = cPattern;
1490 bEscapedAfterAsterisk = bEscaped;
1495 if (nString <= nEndPos)
1500 sal_uInt32 cString = searchStr.iterateCodePoints( &nString, -1);
1502 if ((cPattern !=
'?' || bEscaped) && cPattern != cString)
1510 cPattern = cPatternAfterAsterisk;
1511 bEscaped = bEscapedAfterAsterisk;
1512 searchStr.iterateCodePoints( &nStr, -1);
1514 if (nPat == nAfterFakePattern)
1517 nStartOffset = nString;
1527 while (nString > nEndPos);
1534 while (nPattern > 0 && rReversePattern[nPattern-1] ==
'*')
1538 setWildcardMatch( aRes, nStartOffset, nEndOffset);
1546 return "com.sun.star.util.TextSearch_i18n";
1554Sequence< OUString > SAL_CALL
1557 return {
"com.sun.star.util.TextSearch",
"com.sun.star.util.TextSearch2" };
1560extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface*
1562 css::uno::XComponentContext* context , css::uno::Sequence<css::uno::Any>
const&)
1564 return cppu::acquire(
new TextSearch(context));
Reference< XComponentContext > m_xContext
css::uno::Reference< css::uno::XComponentContext > m_xContext
virtual css::util::SearchResult SAL_CALL searchForward(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos) override
std::unique_ptr< WLevDistance > pWLD
void RESrchPrepare(const css::util::SearchOptions2 &)
css::util::SearchResult SAL_CALL RESrchFrwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
std::unique_ptr< TextSearchJumpTable > pJumpTable
OUString maWildcardReversePattern
bool IsDelimiter(const OUString &rStr, sal_Int32 nPos) const
css::util::SearchResult SAL_CALL WildcardSrchFrwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
OUString maWildcardReversePattern2
css::util::SearchResult SAL_CALL RESrchBkwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
virtual ~TextSearch() override
css::util::SearchOptions2 aSrchPara
virtual css::util::SearchResult SAL_CALL searchBackward(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos) override
std::unique_ptr< TextSearchJumpTable > pJumpTable2
virtual void SAL_CALL setOptions2(const css::util::SearchOptions2 &options) override
css::uno::Reference< css::i18n::XExtendedTransliteration > xTranslit2
bool mbWildcardAllowSubstring
css::util::SearchResult SAL_CALL NSrchBkwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
sal_uInt32 mcWildcardEscapeChar
virtual OUString SAL_CALL getImplementationName() override
css::util::SearchResult SAL_CALL ApproxSrchBkwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
sal_Int32 GetDiff(const sal_Unicode) const
sal_Int32 sal_Int32 endPos
css::util::SearchResult SAL_CALL WildcardSrchBkwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
virtual sal_Bool SAL_CALL supportsService(const OUString &ServiceName) override
css::uno::Reference< css::i18n::XExtendedTransliteration > xTranslit
css::util::SearchResult SAL_CALL NSrchFrwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
virtual void SAL_CALL setOptions(const css::util::SearchOptions &options) override
css::uno::Reference< css::i18n::XCharacterClassification > xCharClass
css::util::SearchResult SAL_CALL ApproxSrchFrwrd(const OUString &searchStr, sal_Int32 startPos, sal_Int32 endPos)
std::unique_ptr< icu::RegexMatcher > pRegexMatcher
virtual css::uno::Sequence< OUString > SAL_CALL getSupportedServiceNames() override
TextSearch(const css::uno::Reference< css::uno::XComponentContext > &rxContext)
css::uno::Reference< css::i18n::XBreakIterator > xBreak
Weighted Levenshtein Distance (WLD)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
bool CPPUHELPER_DLLPUBLIC supportsService(css::lang::XServiceInfo *implementation, rtl::OUString const &name)
const TransliterationFlags COMPLEX_TRANS_MASK
SAL_DLLPUBLIC_EXPORT css::uno::XInterface * i18npool_TextSearch_get_implementation(css::uno::XComponentContext *context, css::uno::Sequence< css::uno::Any > const &)
static bool lcl_findRegex(std::unique_ptr< icu::RegexMatcher > const &pRegexMatcher, sal_Int32 nStartPos, sal_Int32 nEndPos, UErrorCode &rIcuErr)
static sal_Int32 FindPosInSeq_Impl(const Sequence< sal_Int32 > &rOff, sal_Int32 nPos)
::std::map< sal_Unicode, sal_Int32 > TextSearchJumpTable