25#include <unicode/idna.h>
28#include <com/sun/star/ucb/Command.hpp>
29#include <com/sun/star/ucb/IllegalIdentifierException.hpp>
30#include <com/sun/star/ucb/UniversalContentBroker.hpp>
31#include <com/sun/star/ucb/UnsupportedCommandException.hpp>
32#include <com/sun/star/ucb/XCommandEnvironment.hpp>
33#include <com/sun/star/ucb/XCommandProcessor.hpp>
34#include <com/sun/star/ucb/XContent.hpp>
35#include <com/sun/star/ucb/XUniversalContentBroker.hpp>
36#include <com/sun/star/uno/Any.hxx>
37#include <com/sun/star/uno/Exception.hpp>
38#include <com/sun/star/uno/Reference.hxx>
39#include <com/sun/star/uno/RuntimeException.hpp>
40#include <com/sun/star/uno/XComponentContext.hpp>
41#include <com/sun/star/uri/UriReferenceFactory.hpp>
42#include <com/sun/star/uri/XUriReference.hpp>
43#include <com/sun/star/uri/XUriReferenceFactory.hpp>
45#include <osl/diagnose.h>
46#include <rtl/character.hxx>
47#include <rtl/ustrbuf.hxx>
48#include <rtl/ustring.hxx>
57 OUString
const & rTheRelURIRef,
59 bool bCheckFileExists,
63 rtl_TextEncoding eCharset,
67 if( rTheRelURIRef.startsWith(
"#") )
72 aAbsURIRef. SetSmartURL(rTheRelURIRef, eEncodeMechanism, eCharset, eStyle);
85 && (aAbsURIRef.
GetProtocol() == INetProtocol::File))
93 && aNonFileURIRef.
GetProtocol() != INetProtocol::File)
95 bool bMaybeFile =
false;
96 if (rMaybeFileHdl.
IsSet())
98 OUString aFilePath(rTheRelURIRef);
99 bMaybeFile = rMaybeFileHdl.
Call(&aFilePath);
102 aAbsURIRef = aNonFileURIRef;
106 return aAbsURIRef.
GetMainURL(eDecodeMechanism, eCharset);
113 gMaybeFileHdl = rTheMaybeFileHdl;
118 return gMaybeFileHdl;
123bool isAbsoluteHierarchicalUriReference(
124 css::uno::Reference< css::uri::XUriReference >
const & uriReference)
126 return uriReference.is() && uriReference->isAbsolute()
127 && !uriReference->hasRelativePath();
134enum Result {
Success, GeneralFailure, SpecificFailure };
136Result normalizePrefix( css::uno::Reference< css::ucb::XUniversalContentBroker >
const & broker,
137 OUString
const & uri, OUString * normalized)
139 OSL_ASSERT(broker.is() && normalized !=
nullptr);
140 css::uno::Reference< css::ucb::XContent > content;
142 content = broker->queryContent(broker->createContentIdentifier(uri));
143 }
catch (css::ucb::IllegalIdentifierException &) {}
145 return GeneralFailure;
149 (css::uno::Reference< css::ucb::XCommandProcessor >(
150 content, css::uno::UNO_QUERY_THROW)->execute(
151 css::ucb::Command(
"getCasePreservingURL",
152 -1, css::uno::Any()),
154 css::uno::Reference< css::ucb::XCommandEnvironment >())
157 }
catch (css::uno::RuntimeException &) {
159 }
catch (css::ucb::UnsupportedCommandException &) {
160 return GeneralFailure;
161 }
catch (css::uno::Exception &) {
162 return SpecificFailure;
168 css::uno::Reference< css::ucb::XUniversalContentBroker >
const & broker,
169 css::uno::Reference< css::uri::XUriReferenceFactory >
const & uriFactory,
170 OUString
const & uriReference)
177 sal_Int32
n = uriReference.indexOf(
'#');
178 normalized =
n == -1 ? uriReference : uriReference.copy(0, n);
179 switch (normalizePrefix(broker, normalized, &normalized)) {
181 return n == -1 ? normalized : normalized + uriReference.subView(n);
184 case SpecificFailure:
188 css::uno::Reference< css::uri::XUriReference > ref(
189 uriFactory->parse(uriReference));
190 if (!isAbsoluteHierarchicalUriReference(ref)) {
193 sal_Int32
count = ref->getPathSegmentCount();
197 OUStringBuffer head(ref->getScheme());
199 if (ref->hasAuthority()) {
200 head.append(
"//" + ref->getAuthority());
202 for (sal_Int32 i = count - 1;
i > 0; --
i) {
203 OUStringBuffer buf(head);
204 for (sal_Int32 j = 0; j <
i; ++j) {
206 buf.append(ref->getPathSegment(j));
208 normalized = buf.makeStringAndClear();
209 if (normalizePrefix(broker, normalized, &normalized) != SpecificFailure)
211 buf.append(normalized);
212 css::uno::Reference< css::uri::XUriReference > preRef(
213 uriFactory->parse(normalized));
214 if (!isAbsoluteHierarchicalUriReference(preRef)) {
218 sal_Int32 preCount = preRef->getPathSegmentCount();
221 if (preCount == i - 1) {
223 }
else if (preCount - 1 == i && !buf.isEmpty()
224 && buf[buf.getLength() - 1] ==
'/')
226 buf.setLength(buf.getLength() - 1);
232 for (sal_Int32 j = i; j <
count; ++j) {
234 buf.append(ref->getPathSegment(j));
236 if (ref->hasQuery()) {
238 buf.append(ref->getQuery());
240 if (ref->hasFragment()) {
242 buf.append(ref->getFragment());
244 return buf.makeStringAndClear();
252css::uno::Reference< css::uri::XUriReference >
254 css::uno::Reference< css::uno::XComponentContext >
const & context,
255 OUString
const & baseUriReference, OUString
const & uriReference)
257 OSL_ASSERT(context.is());
258 css::uno::Reference< css::ucb::XUniversalContentBroker > broker(
259 css::ucb::UniversalContentBroker::create(context));
260 css::uno::Reference< css::uri::XUriReferenceFactory > uriFactory(
261 css::uri::UriReferenceFactory::create(context));
262 return uriFactory->makeRelative(
263 uriFactory->parse(
normalize(broker, uriFactory, baseUriReference)),
264 uriFactory->parse(
normalize(broker, uriFactory, uriReference)),
true,
269 OUString
const & baseUriReference, OUString
const & uriReference)
271 css::uno::Reference< css::uri::XUriReference > rel(
275 return rel.is() ? rel->getUriReference() : uriReference;
284sal_Int32 nextChar(std::u16string_view rStr, sal_Int32 nPos)
286 return rtl::isHighSurrogate(rStr[nPos])
287 && rStr.size() -
nPos >= 2
288 && rtl::isLowSurrogate(rStr[nPos + 1]) ?
292bool isBoundary1(
CharClass const & rCharClass, OUString
const & rStr,
293 sal_Int32 nPos, sal_Int32 nEnd)
314bool isBoundary2(
CharClass const & rCharClass, OUString
const & rStr,
315 sal_Int32 nPos, sal_Int32 nEnd)
351bool checkWChar(
CharClass const & rCharClass, OUString
const & rStr,
352 sal_Int32 * pPos, sal_Int32 * pEnd,
353 sal_Int32 * pMatchingBracketDepth =
nullptr,
354 bool bBackslash =
false,
bool bPipe =
false)
360 = { 0, 0, 0, 0, 0, 0, 0, 0,
361 0, 0, 0, 0, 0, 0, 0, 0,
362 0, 0, 0, 0, 0, 0, 0, 0,
363 0, 0, 0, 0, 0, 0, 0, 0,
364 0, 1, 0, 0, 4, 4, 4, 1,
365 5, 6, 1, 1, 1, 4, 1, 4,
366 4, 4, 4, 4, 4, 4, 4, 4,
367 4, 4, 1, 1, 0, 1, 0, 1,
368 4, 4, 4, 4, 4, 4, 4, 4,
369 4, 4, 4, 4, 4, 4, 4, 4,
370 4, 4, 4, 4, 4, 4, 4, 4,
371 4, 4, 4, 1, 2, 1, 0, 1,
372 0, 4, 4, 4, 4, 4, 4, 4,
373 4, 4, 4, 4, 4, 4, 4, 4,
374 4, 4, 4, 4, 4, 4, 4, 4,
375 4, 4, 4, 0, 3, 0, 1, 0 };
410 if(
nullptr != pMatchingBracketDepth)
411 ++(*pMatchingBracketDepth);
416 if(
nullptr != pMatchingBracketDepth && *pMatchingBracketDepth > 0)
418 --(*pMatchingBracketDepth);
429 *pEnd = *pPos = nextChar(rStr, *pPos);
436sal_uInt32 scanDomain(OUString
const & rStr, sal_Int32 * pPos,
442 *pPos = sal::static_int_cast< sal_Int32 >(p - pBuffer);
453 rtl_TextEncoding eCharset)
455 if (rBegin > rEnd || rEnd > rText.getLength())
527 bool bBoundary1 =
true;
528 bool bBoundary2 =
true;
534 if (rtl::isAsciiAlpha(c))
538 if (eScheme == INetProtocol::File)
540 while (rText[
i++] !=
':') ;
541 sal_Int32 nPrefixEnd =
i;
542 sal_Int32 nUriEnd =
i;
544 && checkWChar(rCharClass, rText, &
i, &nUriEnd,
nullptr,
true,
546 if (
i != nPrefixEnd &&
i != rEnd && rText[
i] ==
'#')
550 && checkWChar(rCharClass, rText, &
i, &nUriEnd)) ;
552 if (nUriEnd != nPrefixEnd
553 && isBoundary1(rCharClass, rText, nUriEnd, rEnd))
556 INetProtocol::File, eMechanism, eCharset,
567 else if (eScheme != INetProtocol::NotValid)
569 while (rText[
i++] !=
':') ;
570 sal_Int32 nPrefixEnd =
i;
571 sal_Int32 nUriEnd =
i;
572 sal_Int32 nMatchingBracketDepth = 0;
574 && checkWChar(rCharClass, rText, &
i, &nUriEnd,
575 &nMatchingBracketDepth)) ;
576 if (
i != nPrefixEnd &&
i != rEnd && rText[
i] ==
'#')
580 && checkWChar(rCharClass, rText, &
i, &nUriEnd)) ;
582 if (nUriEnd != nPrefixEnd
583 && (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
584 || rText[nUriEnd] ==
'\\'))
587 INetProtocol::Http, eMechanism,
601 sal_uInt32 nLabels = scanDomain(rText, &
i, rEnd);
603 && rText[
nPos + 3] ==
'.'
604 && (((rText[
nPos] ==
'w'
605 || rText[
nPos] ==
'W')
606 && (rText[
nPos + 1] ==
'w'
607 || rText[
nPos + 1] ==
'W')
608 && (rText[
nPos + 2] ==
'w'
609 || rText[
nPos + 2] ==
'W'))
610 || ((rText[
nPos] ==
'f'
611 || rText[
nPos] ==
'F')
612 && (rText[
nPos + 1] ==
't'
613 || rText[
nPos + 1] ==
'T')
614 && (rText[
nPos + 2] ==
'p'
615 || rText[
nPos + 2] ==
'P'))))
619 sal_Int32 nUriEnd =
i;
620 if (
i != rEnd && rText[
i] ==
'/')
624 && checkWChar(rCharClass, rText, &
i, &nUriEnd)) ;
626 if (
i != rEnd && rText[
i] ==
'#')
630 && checkWChar(rCharClass, rText, &
i, &nUriEnd)) ;
632 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd)
633 || rText[nUriEnd] ==
'\\')
636 INetProtocol::Http, eMechanism,
649 && rText[
nPos + 1] ==
':'
650 && (rText[
nPos + 2] ==
'/'
651 || rText[
nPos + 2] ==
'\\'))
654 sal_Int32 nUriEnd =
i;
656 && checkWChar(rCharClass, rText, &
i, &nUriEnd)) ;
657 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
662 RTL_TEXTENCODING_UTF8,
674 else if (rEnd -
nPos >= 2
675 && rText[
nPos] ==
'\\'
676 && rText[
nPos + 1] ==
'\\')
678 sal_Int32
i =
nPos + 2;
679 sal_uInt32 nLabels = scanDomain(rText, &
i, rEnd);
680 if (nLabels >= 1 &&
i != rEnd && rText[
i] ==
'\\')
682 sal_Int32 nUriEnd = ++
i;
684 && checkWChar(rCharClass, rText, &
i, &nUriEnd,
686 if (isBoundary1(rCharClass, rText, nUriEnd, rEnd))
691 RTL_TEXTENCODING_UTF8,
707 for (sal_Int32
i =
nPos + 1;
i != rEnd; ++
i)
721 sal_uInt32 nLabels = scanDomain(rText, &
i, rEnd);
723 && isBoundary1(rCharClass, rText,
i, rEnd))
726 INetProtocol::Mailto,
741 bBoundary1 = isBoundary1(rCharClass, rText,
nPos, rEnd);
742 bBoundary2 = isBoundary2(rCharClass, rText,
nPos, rEnd);
753 if (rBegin > rEnd || rEnd > rText.getLength())
757 sal_Int32
count = rEnd-rBegin;
758 OUString candidate(rText.subView(rBegin,
count));
760 if (candidate.startsWithIgnoreAsciiCase(
"doi:10."))
776 if ( rCharClass.
isDigit(candidate,
i) )
780 else if (c==
'/' && digit>=4 &&
i<
count-1)
791 else if (!( rCharClass.
isAlphaNumeric(candidate,
i) || c ==
'.' || c ==
'-' || c==
'_' ||
792 c==
';' || c==
'(' || c==
')' || c==
'\\' || (c==
'/' &&
i<
count-1) || c==
':'))
798 if (flag && digit==-1)
800 return OUString::Concat(
"https://doi.org/")+candidate.subView(4);
810 rtl_TextEncoding eCharset)
819 css::uno::Reference<css::uri::XUriReference> uri(
820 css::uri::UriReferenceFactory::create(
823 if (!(uri.is() && uri->hasAuthority())) {
826 auto auth(uri->getAuthority());
829 sal_Int32 hostStart = auth.indexOf(
'@') + 1;
830 sal_Int32 hostEnd = auth.getLength();
831 while (hostEnd > hostStart && rtl::isAsciiDigit(auth[hostEnd - 1])) {
834 if (hostEnd > hostStart && auth[hostEnd - 1] ==
':') {
837 hostEnd = auth.getLength();
839 auto asciiOnly =
true;
840 for (
auto i = hostStart;
i != hostEnd; ++
i) {
841 if (!rtl::isAscii(auth[
i])) {
850 UErrorCode e = U_ZERO_ERROR;
851 std::unique_ptr<icu::IDNA> idna(
852 icu::IDNA::createUTS46Instance(
853 (UIDNA_USE_STD3_RULES | UIDNA_CHECK_BIDI | UIDNA_CHECK_CONTEXTJ | UIDNA_CHECK_CONTEXTO),
856 SAL_WARN(
"vcl.gdi",
"icu::IDNA::createUTS46Instance " << e);
859 icu::UnicodeString ascii;
863 reinterpret_cast<UChar
const *
>(auth.getStr() + hostStart),
864 hostEnd - hostStart),
866 if (U_FAILURE(e) || info.hasErrors()) {
869 OUStringBuffer buf(uri->getScheme());
870 buf.append(OUString::Concat(
"://") + auth.subView(0, hostStart));
872 reinterpret_cast<sal_Unicode const *
>(ascii.getBuffer()),
874 buf.append(auth.subView(hostEnd) + uri->getPath());
875 if (uri->hasQuery()) {
876 buf.append(
"?" + uri->getQuery());
878 if (uri->hasFragment()) {
879 buf.append(
"#" + uri->getFragment());
881 return buf.makeStringAndClear();
bool isAlphaNumeric(const OUString &rStr, sal_Int32 nPos) const
bool isLetterNumeric(const OUString &rStr, sal_Int32 nPos) const
bool isDigit(const OUString &rStr, sal_Int32 nPos) const
static bool isAtomChar(sal_uInt32 nChar)
OUString GetMainURL(DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
INetURLObject smartRel2Abs(OUString const &rTheRelURIRef, bool &rWasAbsolute, bool bIgnoreFragment=false, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8, bool bRelativeNonURIs=false, FSysStyle eStyle=FSysStyle::Detect) const
bool SetSmartURL(std::u16string_view rTheAbsURIRef, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8, FSysStyle eStyle=FSysStyle::Detect)
static INetProtocol CompareProtocolScheme(std::u16string_view aTheAbsURIRef)
static sal_uInt32 scanDomain(sal_Unicode const *&rBegin, sal_Unicode const *pEnd, bool bEager=true)
INetProtocol GetProtocol() const
OUString GetURLNoPass(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
#define SAL_WARN(area, stream)
SVL_DLLPUBLIC OUString FindFirstURLInText(OUString const &rText, sal_Int32 &rBegin, sal_Int32 &rEnd, CharClass const &rCharClass, INetURLObject::EncodeMechanism eMechanism=INetURLObject::EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
SVL_DLLPUBLIC OUString FindFirstDOIInText(OUString const &rText, sal_Int32 &rBegin, sal_Int32 &rEnd, CharClass const &rCharClass)
SVL_DLLPUBLIC Link< OUString *, bool > const & GetMaybeFileHdl()
SVL_DLLPUBLIC OUString SmartRel2Abs(INetURLObject const &rTheBaseURIRef, OUString const &rTheRelURIRef, Link< OUString *, bool > const &rMaybeFileHdl=Link< OUString *, bool >(), bool bCheckFileExists=true, bool bIgnoreFragment=false, INetURLObject::EncodeMechanism eEncodeMechanism=INetURLObject::EncodeMechanism::WasEncoded, INetURLObject::DecodeMechanism eDecodeMechanism=INetURLObject::DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8, FSysStyle eStyle=FSysStyle::Detect)
@ATT Calling this function with defaulted arguments rMaybeFileHdl = Link() and bCheckFileExists = tru...
SVL_DLLPUBLIC OUString removePassword(OUString const &rURI, INetURLObject::EncodeMechanism eEncodeMechanism, INetURLObject::DecodeMechanism eDecodeMechanism=INetURLObject::DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Remove any password component from both absolute and relative URLs.
SVL_DLLPUBLIC css::uno::Reference< css::uri::XUriReference > normalizedMakeRelative(css::uno::Reference< css::uno::XComponentContext > const &context, OUString const &baseUriReference, OUString const &uriReference)
Converts a URI reference to a relative one, ignoring certain differences (for example,...
SVL_DLLPUBLIC OUString simpleNormalizedMakeRelative(OUString const &baseUriReference, OUString const &uriReference)
A variant of normalizedMakeRelative with a simplified interface.
SVL_DLLPUBLIC void SetMaybeFileHdl(Link< OUString *, bool > const &rTheMaybeFileHdl)
SVL_DLLPUBLIC OUString resolveIdnaHost(OUString const &url)
Resolve a URL's host component domain name in IDNA syntax to plain DNS syntax.
bool normalize(sal_uInt16 &rDay, sal_uInt16 &rMonth, sal_Int16 &rYear)
Reference< XComponentContext > getProcessComponentContext()
bool parse(OUString const &uri, SourceProviderScannerData *data)
HashMap_OWString_Interface aMap