LibreOffice Module tools (master) 1
urlobj.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <sal/config.h>
21
22#include <tools/urlobj.hxx>
23#include <tools/debug.hxx>
24#include <tools/inetmime.hxx>
25#include <tools/stream.hxx>
26#include <com/sun/star/uno/Reference.hxx>
27#include <com/sun/star/util/XStringWidth.hpp>
28#include <o3tl/enumarray.hxx>
29#include <osl/diagnose.h>
30#include <osl/file.hxx>
31#include <rtl/character.hxx>
32#include <rtl/string.h>
33#include <rtl/textenc.h>
34#include <rtl/ustring.hxx>
35#include <sal/log.hxx>
36#include <sal/types.h>
37
38#include <algorithm>
39#include <cassert>
40#include <limits>
41#include <memory>
42#include <string_view>
43
44#include <string.h>
45
46#include <com/sun/star/uno/Sequence.hxx>
47#include <comphelper/base64.hxx>
48#include <comphelper/string.hxx>
49
50using namespace css;
51
52// INetURLObject
53
54/* The URI grammar (using RFC 2234 conventions).
55
56 Constructs of the form
57 {reference <rule1> using rule2}
58 stand for a rule matching the given rule1 specified in the given reference,
59 encoded to URI syntax using rule2 (as specified in this URI grammar).
60
61
62 ; RFC 1738, RFC 2396, RFC 2732, private
63 login = [user [":" password] "@"] hostport
64 user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
65 password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
66 hostport = host [":" port]
67 host = incomplete-hostname / hostname / IPv4address / IPv6reference
68 incomplete-hostname = *(domainlabel ".") domainlabel
69 hostname = *(domainlabel ".") toplabel ["."]
70 domainlabel = alphanum [*(alphanum / "-") alphanum]
71 toplabel = ALPHA [*(alphanum / "-") alphanum]
72 IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
73 IPv6reference = "[" hexpart [":" IPv4address] "]"
74 hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
75 hexseq = hex4 *(":" hex4)
76 hex4 = 1*4HEXDIG
77 port = *DIGIT
78 escaped = "%" HEXDIG HEXDIG
79 reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
80 mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
81 alphanum = ALPHA / DIGIT
82 unreserved = alphanum / mark
83 uric = escaped / reserved / unreserved
84 pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
85
86
87 ; RFC 1738, RFC 2396
88 ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
89 segment = *pchar
90
91
92 ; RFC 1738, RFC 2396
93 http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
94 segment = *(pchar / ";")
95
96
97 ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
98 file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
99 segment = *pchar
100 netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
101
102
103 ; RFC 2368, RFC 2396
104 mailto-url = "MAILTO:" [to] [headers]
105 to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
106 headers = "?" header *("&" header)
107 header = hname "=" hvalue
108 hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
109 hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
110
111
112 ; private (see RFC 1738, RFC 2396)
113 vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
114 segment = *(pchar / ";")
115
116
117 ; private
118 private-url = "PRIVATE:" path ["?" *uric]
119 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
120
121
122 ; private
123 vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
124 name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
125 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
126
127
128 ; private
129 https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
130 segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
131
132
133 ; private
134 slot-url = "SLOT:" path ["?" *uric]
135 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
136
137
138 ; private
139 macro-url = "MACRO:" path ["?" *uric]
140 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
141
142
143 ; private
144 javascript-url = "JAVASCRIPT:" *uric
145
146
147 ; RFC 2397
148 data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
149 mediatype = [type "/" subtype] *(";" attribute "=" value)
150 type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
151 subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
152 attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
153 value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
154
155
156 ; RFC 2392, RFC 2396
157 cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
158
159
160 ; private
161 vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
162 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
163
164
165 ; private
166 uno-url = ".UNO:" path ["?" *uric]
167 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
168
169
170 ; private
171 component-url = ".COMPONENT:" path ["?" *uric]
172 path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
173
174
175 ; private
176 vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
177 reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
178
179
180 ; RFC 2255
181 ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
182 dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
183 attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
184 filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
185 extension = ["!"] ["X-"] extoken ["=" exvalue]
186 extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
187 exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
188
189
190 ; private
191 db-url = "DB:" *uric
192
193
194 ; private
195 vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
196 opaque_part = uric_no_slash *uric
197 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
198
199
200 ; RFC 1738
201 telnet-url = "TELNET://" login ["/"]
202
203
204 ; private
205 vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
206 opaque_part = uric_no_slash *uric
207 uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
208
209
210 ; private
211 vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
212 segment = *pchar
213
214
215 ; private
216 unknown-url = scheme ":" 1*uric
217 scheme = ALPHA *(alphanum / "+" / "-" / ".")
218
219
220 ; private (http://ubiqx.org/cifs/Appendix-D.html):
221 smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
222 segment = *(pchar / ";")
223 */
224
226{
227 sal_Int32 nDelta = -m_nLength;
228 m_nBegin = -1;
229 m_nLength = 0;
230 return nDelta;
231}
232
233sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
234 std::u16string_view rSubString)
235{
236 sal_Int32 nDelta = rSubString.size() - m_nLength;
237
238 rString.remove(m_nBegin, m_nLength);
239 rString.insert(m_nBegin, rSubString);
240
241 m_nLength = rSubString.size();
242 return nDelta;
243}
244
245sal_Int32 INetURLObject::SubString::set(OUString & rString,
246 std::u16string_view rSubString)
247{
248 sal_Int32 nDelta = rSubString.size() - m_nLength;
249
250 rString = OUString::Concat(rString.subView(0, m_nBegin)) +
251 rSubString + rString.subView(m_nBegin + m_nLength);
252
253 m_nLength = rSubString.size();
254 return nDelta;
255}
256
257sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
258 std::u16string_view rSubString,
259 sal_Int32 nTheBegin)
260{
261 m_nBegin = nTheBegin;
262 return set(rString, rSubString);
263}
264
265inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
266{
267 if (isPresent())
268 m_nBegin = m_nBegin + nDelta;
269}
270
272 OUStringBuffer const & rThisString,
273 OUStringBuffer const & rOtherString) const
274{
275 sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
276 sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
277 sal_Unicode const * end = p1 + len;
278 sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
279 while (p1 != end) {
280 if (*p1 < *p2) {
281 return -1;
282 } else if (*p1 > *p2) {
283 return 1;
284 }
285 ++p1;
286 ++p2;
287 }
288 return m_nLength < rOther.m_nLength ? -1
289 : m_nLength > rOther.m_nLength ? 1
290 : 0;
291}
292
294{
295 rtl::OUStringConstExpr m_sScheme;
296 char const * m_pPrefix;
305};
306
308{
309 enum class Kind { Official, Internal, External }; // order is important!
310
311 char const * m_pPrefix;
315};
316
317// static
318inline INetURLObject::SchemeInfo const &
320{
321 static constexpr OUStringLiteral EMPTY = u"";
322 static constexpr OUStringLiteral FTP = u"ftp";
323 static constexpr OUStringLiteral HTTP = u"http";
324 static constexpr OUStringLiteral FILE1 = u"file"; // because FILE is already defined
325 static constexpr OUStringLiteral MAILTO = u"mailto";
326 static constexpr OUStringLiteral VND_WEBDAV = u"vnd.sun.star.webdav";
327 static constexpr OUStringLiteral PRIVATE = u"private";
328 static constexpr OUStringLiteral VND_HELP = u"vnd.sun.star.help";
329 static constexpr OUStringLiteral HTTPS = u"https";
330 static constexpr OUStringLiteral SLOT = u"slot";
331 static constexpr OUStringLiteral MACRO = u"macro";
332 static constexpr OUStringLiteral JAVASCRIPT = u"javascript";
333 static constexpr OUStringLiteral DATA = u"data";
334 static constexpr OUStringLiteral CID = u"cid";
335 static constexpr OUStringLiteral VND_HIER = u"vnd.sun.star.hier";
336 static constexpr OUStringLiteral UNO = u".uno";
337 static constexpr OUStringLiteral COMPONENT = u".component";
338 static constexpr OUStringLiteral VND_PKG = u"vnd.sun.star.pkg";
339 static constexpr OUStringLiteral LDAP = u"ldap";
340 static constexpr OUStringLiteral DB = u"db";
341 static constexpr OUStringLiteral VND_CMD = u"vnd.sun.star.cmd";
342 static constexpr OUStringLiteral TELNET = u"telnet";
343 static constexpr OUStringLiteral VND_EXPAND = u"vnd.sun.star.expand";
344 static constexpr OUStringLiteral VND_TDOC = u"vnd.sun.star.tdoc";
345 static constexpr OUStringLiteral SMB = u"smb";
346 static constexpr OUStringLiteral HID = u"hid";
347 static constexpr OUStringLiteral SFTP = u"sftp";
348 static constexpr OUStringLiteral VND_CMIS = u"vnd.libreoffice.cmis";
349
351 // [-loplugin:redundantfcast]:
353 EMPTY, "", false, false, false, false, false, false, false, false},
355 FTP, "ftp://", true, true, false, true, true, true, true,
356 false},
358 HTTP, "http://", true, false, false, false, true, true, true,
359 true},
361 FILE1, "file://", true, false, false, false, true, false, true,
362 false},
364 MAILTO, "mailto:", false, false, false, false, false, false,
365 false, true},
367 VND_WEBDAV, "vnd.sun.star.webdav://", true, false,
368 false, false, true, true, true, true},
370 PRIVATE, "private:", false, false, false, false, false, false,
371 false, true},
373 VND_HELP, "vnd.sun.star.help://", true, false, false,
374 false, false, false, true, true},
376 HTTPS, "https://", true, false, false, false, true, true,
377 true, true},
379 SLOT, "slot:", false, false, false, false, false, false, false,
380 true},
382 MACRO, "macro:", false, false, false, false, false, false,
383 false, true},
385 JAVASCRIPT, "javascript:", false, false, false, false, false,
386 false, false, false},
388 DATA, "data:", false, false, false, false, false, false, false,
389 false},
391 CID, "cid:", false, false, false, false, false, false, false,
392 false},
394 VND_HIER, "vnd.sun.star.hier:", true, false, false,
395 false, false, false, true, false},
397 UNO, ".uno:", false, false, false, false, false, false, false,
398 true},
400 COMPONENT, ".component:", false, false, false, false, false,
401 false, false, true},
403 VND_PKG, "vnd.sun.star.pkg://", true, false, false,
404 false, false, false, true, true},
406 LDAP, "ldap://", true, false, false, false, true, true,
407 false, true},
409 DB, "db:", false, false, false, false, false, false, false,
410 false},
412 VND_CMD, "vnd.sun.star.cmd:", false, false, false,
413 false, false, false, false, false},
415 TELNET, "telnet://", true, true, false, true, true, true,
416 true, false},
418 VND_EXPAND, "vnd.sun.star.expand:", false, false,
419 false, false, false, false, false, false},
421 VND_TDOC, "vnd.sun.star.tdoc:", false, false, false,
422 false, false, false, true, false},
424 EMPTY, "", false, false, false, false, true, true, true, false },
426 SMB, "smb://", true, true, false, true, true, true, true,
427 true},
429 HID, "hid:", false, false, false, false, false, false, false,
430 true},
432 SFTP, "sftp://", true, true, false, true, true, true, true,
433 true},
435 VND_CMIS, "vnd.libreoffice.cmis://", true, true,
436 false, false, true, false, true, true} };
437 return map[eTheScheme];
438};
439
441{
442 return getSchemeInfo(m_eScheme);
443}
444
445namespace {
446
447sal_Unicode getHexDigit(sal_uInt32 nWeight)
448{
449 assert(nWeight < 16);
450 static const sal_Unicode aDigits[16]
451 = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
452 'D', 'E', 'F' };
453 return aDigits[nWeight];
454}
455
456}
457
458// static
459inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
460 sal_uInt32 nOctet)
461{
462 rTheText.append( '%' );
463 rTheText.append( getHexDigit(nOctet >> 4) );
464 rTheText.append( getHexDigit(nOctet & 15) );
465}
466
467namespace {
468
469enum
470{
487};
488
489sal_uInt32 const aMustEncodeMap[128]
490 = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
491 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
492/* */ PP,
493/* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
494/* " */ PM+PN +PP,
495/* # */ PM,
496/* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
497/* % */ PM,
498/* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR,
499/* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
500/* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
501/* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
502/* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
503/* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
504/* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR,
505/* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
506/* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
507/* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO,
508/* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
509/* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
510/* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
511/* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
512/* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
513/* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
514/* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
515/* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
516/* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
517/* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
518/* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
519/* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR,
520/* < */ +PI +PM+PN +PP,
521/* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR,
522/* > */ +PI +PM+PN +PP,
523/* ? */ +PG +PM +PO +PQ,
524/* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
525/* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
526/* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
527/* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
528/* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
529/* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
530/* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
531/* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
532/* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
533/* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
534/* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
535/* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
536/* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
537/* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
538/* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
539/* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
540/* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
541/* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
542/* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
543/* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
544/* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
545/* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
546/* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
547/* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
548/* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
549/* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
550/* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
551/* [ */ PG +PM+PN+PO,
552/* \ */ +PM+PN +PP,
553/* ] */ PG +PM+PN+PO,
554/* ^ */ PM+PN +PP,
555/* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
556/* ` */ PM+PN +PP,
557/* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
558/* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
559/* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
560/* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
561/* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
562/* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
563/* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
564/* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
565/* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
566/* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
567/* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
568/* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
569/* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
570/* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
571/* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
572/* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
573/* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
574/* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
575/* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
576/* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
577/* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
578/* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
579/* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
580/* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
581/* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
582/* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
583/* { */ PM+PN +PP,
584/* | */ +PM+PN +PP,
585/* } */ PM+PN +PP,
586/* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
587 0 };
588
589bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
590{
591 return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
592}
593
594}
595
597{
598 m_aAbsURIRef.setLength(0);
601 m_aUser.clear();
602 m_aAuth.clear();
603 m_aHost.clear();
604 m_aPort.clear();
605 m_aPath.clear();
606 m_aQuery.clear();
608}
609
610namespace {
611
612std::unique_ptr<SvMemoryStream> memoryStream(
613 void const * data, sal_Int32 length)
614{
615 std::unique_ptr<char[]> b(
616 new char[length]);
617 memcpy(b.get(), data, length);
618 std::unique_ptr<SvMemoryStream> s(
619 new SvMemoryStream(b.get(), length, StreamMode::READ));
620 s->ObjectOwnsMemory(true);
621 // coverity[leaked_storage : FALSE] - belongs to SvMemoryStream s at this point
622 b.release();
623 return s;
624}
625
626}
627
628std::unique_ptr<SvMemoryStream> INetURLObject::getData() const
629{
631 {
632 return nullptr;
633 }
634
635 OUString sURLPath = GetURLPath( DecodeMechanism::WithCharset, RTL_TEXTENCODING_ISO_8859_1 );
636 sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath );
637 sal_Int32 nCharactersSkipped = pSkippedMediatype == nullptr
638 ? 0 : pSkippedMediatype-sURLPath.getStr();
639 if (sURLPath.match(",", nCharactersSkipped))
640 {
641 nCharactersSkipped += strlen(",");
642 OString sURLEncodedData(
643 sURLPath.getStr() + nCharactersSkipped,
644 sURLPath.getLength() - nCharactersSkipped,
645 RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS);
646 return memoryStream(
647 sURLEncodedData.getStr(), sURLEncodedData.getLength());
648 }
649 else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped))
650 {
651 nCharactersSkipped += strlen(";base64,");
652 std::u16string_view sBase64Data = sURLPath.subView( nCharactersSkipped );
653 css::uno::Sequence< sal_Int8 > aDecodedData;
654 if (comphelper::Base64::decodeSomeChars(aDecodedData, sBase64Data)
655 == sBase64Data.size())
656 {
657 return memoryStream(
658 aDecodedData.getArray(), aDecodedData.getLength());
659 }
660 }
661 return nullptr;
662}
663
664namespace {
665
666FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
667 sal_Unicode const * pEnd,
668 FSysStyle eStyle)
669{
670 DBG_ASSERT(eStyle
673 "guessFSysStyleByCounting(): Bad style");
674 DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
675 && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
676 "guessFSysStyleByCounting(): Too big");
677 sal_Int32 nSlashCount
678 = (eStyle & FSysStyle::Unix) ?
679 0 : std::numeric_limits< sal_Int32 >::min();
680 sal_Int32 nBackslashCount
681 = (eStyle & FSysStyle::Dos) ?
682 0 : std::numeric_limits< sal_Int32 >::min();
683 while (pBegin != pEnd)
684 switch (*pBegin++)
685 {
686 case '/':
687 ++nSlashCount;
688 break;
689
690 case '\\':
691 ++nBackslashCount;
692 break;
693 }
694 return nSlashCount >= nBackslashCount ?
696}
697
698OUString parseScheme(
699 sal_Unicode const ** begin, sal_Unicode const * end,
700 sal_uInt32 fragmentDelimiter)
701{
702 sal_Unicode const * p = *begin;
703 if (p != end && rtl::isAsciiAlpha(*p)) {
704 do {
705 ++p;
706 } while (p != end
707 && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
708 || *p == '.'));
709 // #i34835# To avoid problems with Windows file paths like "C:\foo",
710 // do not accept generic schemes that are only one character long:
711 if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
712 && p - *begin >= 2)
713 {
714 OUString scheme(
715 OUString(*begin, p - *begin).toAsciiLowerCase());
716 *begin = p + 1;
717 return scheme;
718 }
719 }
720 return OUString();
721}
722
723}
724
725bool INetURLObject::setAbsURIRef(std::u16string_view rTheAbsURIRef,
726 EncodeMechanism eMechanism,
727 rtl_TextEncoding eCharset,
728 bool bSmart,
729 FSysStyle eStyle)
730{
731 sal_Unicode const * pPos = rTheAbsURIRef.data();
732 sal_Unicode const * pEnd = pPos + rTheAbsURIRef.size();
733
734 setInvalid();
735
736 sal_uInt32 nFragmentDelimiter = '#';
737
738 m_aAbsURIRef.setLength(0);
739
740 // Parse <scheme>:
741 sal_Unicode const * p = pPos;
742 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
743 if (pPrefix)
744 {
745 pPos = p;
746 m_eScheme = pPrefix->m_eScheme;
747
748 char const * pTemp = pPrefix->m_eKind >= PrefixInfo::Kind::External ?
749 pPrefix->m_pTranslatedPrefix :
750 pPrefix->m_pPrefix;
751 m_aAbsURIRef.appendAscii(pTemp);
752 m_aScheme = SubString( 0, strstr(pTemp, ":") - pTemp );
753 }
754 else
755 {
756 if (bSmart)
757 {
758 // For scheme detection, the first (if any) of the following
759 // productions that matches the input string (and for which the
760 // appropriate style bit is set in eStyle, if applicable)
761 // determines the scheme. The productions use the auxiliary rules
762
763 // domain = label *("." label)
764 // label = alphanum [*(alphanum / "-") alphanum]
765 // alphanum = ALPHA / DIGIT
766 // IPv6reference = "[" IPv6address "]"
767 // IPv6address = hexpart [":" IPv4address]
768 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
769 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
770 // hexseq = hex4 *(":" hex4)
771 // hex4 = 1*4HEXDIG
772 // UCS4 = <any UCS4 character>
773
774 // 1st Production (known scheme; handled by the "if (pPrefix)" branch above):
775 // <one of the known schemes, ignoring case> ":" *UCS4
776 // 2nd Production (mailto):
777 // domain "@" domain
778 // 3rd Production (ftp):
779 // "FTP" 2*("." label) ["/" *UCS4]
780 // 4th Production (http):
781 // label 2*("." label) ["/" *UCS4]
782 // 5th Production (file):
783 // "//" (domain / IPv6reference) ["/" *UCS4]
784 // 6th Production (Unix file):
785 // "/" *UCS4
786 // 7th Production (UNC file; FSysStyle::Dos only):
787 // "\\" domain ["\" *UCS4]
788 // 8th Production (Unix-like DOS file; FSysStyle::Dos only):
789 // ALPHA ":" ["/" *UCS4]
790 // 9th Production (DOS file; FSysStyle::Dos only):
791 // ALPHA ":" ["\" *UCS4]
792 // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code
793 // after this else branch):
794 // <any scheme> ":" *UCS4
795
796 // For the 'non URL' file productions 6--9, the interpretation of
797 // the input as a (degenerate) URI is turned off, i.e., escape
798 // sequences and fragments are never detected as such, but are
799 // taken as literal characters.
800
801 sal_Unicode const * p1 = pPos;
802 if (eStyle & FSysStyle::Dos
803 && pEnd - p1 >= 2
804 && rtl::isAsciiAlpha(p1[0])
805 && p1[1] == ':'
806 && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
807 {
808 m_eScheme = INetProtocol::File; // 8th, 9th
809 eMechanism = EncodeMechanism::All;
810 nFragmentDelimiter = 0x80000000;
811 }
812 else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
813 {
814 p1 += 2;
815 if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
816 && (p1 == pEnd || *p1 == '/'))
818 }
819 else if (p1 != pEnd && *p1 == '/')
820 {
822 eMechanism = EncodeMechanism::All;
823 nFragmentDelimiter = 0x80000000;
824 }
825 else if (eStyle & FSysStyle::Dos
826 && pEnd - p1 >= 2
827 && p1[0] == '\\'
828 && p1[1] == '\\')
829 {
830 p1 += 2;
831 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
832 p1, pEnd - p1, '\\');
833 sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
834 if (
836 p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
837 true, nullptr) ||
838 (scanDomain(p1, pe) > 0 && p1 == pe)
839 )
840 {
842 eMechanism = EncodeMechanism::All;
843 nFragmentDelimiter = 0x80000000;
844 }
845 }
846 else
847 {
848 sal_Unicode const * pDomainEnd = p1;
849 sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
850 if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
851 {
852 ++pDomainEnd;
853 if (scanDomain(pDomainEnd, pEnd) > 0
854 && pDomainEnd == pEnd)
856 }
857 else if (nLabels >= 3
858 && (pDomainEnd == pEnd || *pDomainEnd == '/'))
860 = pDomainEnd - p1 >= 4
861 && (p1[0] == 'f' || p1[0] == 'F')
862 && (p1[1] == 't' || p1[1] == 'T')
863 && (p1[2] == 'p' || p1[2] == 'P')
864 && p1[3] == '.' ?
866 }
867 }
868
869 OUString aSynScheme;
871 sal_Unicode const * p1 = pPos;
872 aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
873 if (!aSynScheme.isEmpty())
874 {
875 if (bSmart && m_eSmartScheme != m_eScheme && p1 != pEnd && rtl::isAsciiDigit(*p1))
876 {
877 // rTheAbsURIRef doesn't define a known scheme (handled by the "if (pPrefix)"
878 // branch above); but a known scheme is defined in m_eSmartScheme. If this
879 // scheme may have a port in authority component, then avoid misinterpreting
880 // URLs like www.foo.bar:123/baz as using unknown "www.foo.bar" scheme with
881 // 123/baz rootless path. For now, do not try to handle possible colons in
882 // user information, require such ambiguous URLs to have explicit scheme part.
883 // Also ignore possibility of empty port.
885 if (rInfo.m_bAuthority && rInfo.m_bPort)
886 {
887 // Make sure that all characters from colon to [/?#] or to EOL are digits.
888 // Or maybe make it simple, and just assume that "xyz:1..." is more likely
889 // to be host "xyz" and port "1...", than scheme "xyz" and path "1..."?
890 sal_Unicode const* p2 = p1 + 1;
891 while (p2 != pEnd && rtl::isAsciiDigit(*p2))
892 ++p2;
893 if (p2 == pEnd || *p2 == '/' || *p2 == '?' || *p2 == '#')
895 }
896 }
897
899 {
901 pPos = p1;
902 }
903 }
904 }
905
906 if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd
907 && *pPos != nFragmentDelimiter)
908 {
910 }
911
913 {
914 setInvalid();
915 return false;
916 }
917
919 aSynScheme = static_cast<const OUString&>(getSchemeInfo().m_sScheme);
920 }
921 m_aScheme.set(m_aAbsURIRef, aSynScheme, m_aAbsURIRef.getLength());
922 m_aAbsURIRef.append(':');
923 }
924
925 sal_uInt32 nSegmentDelimiter = '/';
926 sal_uInt32 nAltSegmentDelimiter = 0x80000000;
927 bool bSkippedInitialSlash = false;
928
929 // Parse //<user>;AUTH=<auth>@<host>:<port> or
930 // //<user>:<password>@<host>:<port> or
931 // //<reg_name>
933 {
934 sal_Unicode const * pUserInfoBegin = nullptr;
935 sal_Unicode const * pUserInfoEnd = nullptr;
936 sal_Unicode const * pHostPortBegin = nullptr;
937 sal_Unicode const * pHostPortEnd = nullptr;
938
939 switch (m_eScheme)
940 {
942 {
943 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
944 {
945 setInvalid();
946 return false;
947 }
948 m_aAbsURIRef.append("//");
949 OUStringBuffer aSynAuthority;
950 while (pPos < pEnd
951 && *pPos != '/' && *pPos != '?'
952 && *pPos != nFragmentDelimiter)
953 {
954 EscapeType eEscapeType;
955 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
956 eMechanism,
957 eCharset, eEscapeType);
958 appendUCS4(aSynAuthority, nUTF32, eEscapeType,
959 PART_AUTHORITY, eCharset, false);
960 }
962 aSynAuthority,
963 m_aAbsURIRef.getLength());
964 // misusing m_aHost to store the authority
965 break;
966 }
967
969 {
970 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
971 {
972 pPos += 2;
973 m_aAbsURIRef.append("//");
974 OUStringBuffer aSynAuthority;
975 while (pPos < pEnd
976 && *pPos != '/' && *pPos != '?'
977 && *pPos != nFragmentDelimiter)
978 {
979 EscapeType eEscapeType;
980 sal_uInt32 nUTF32 = getUTF32(pPos,
981 pEnd,
982 eMechanism,
983 eCharset,
984 eEscapeType);
985 appendUCS4(aSynAuthority,
986 nUTF32,
987 eEscapeType,
989 eCharset,
990 false);
991 }
992 if (aSynAuthority.isEmpty())
993 {
994 setInvalid();
995 return false;
996 }
998 aSynAuthority,
999 m_aAbsURIRef.getLength());
1000 // misusing m_aHost to store the authority
1001 }
1002 break;
1003 }
1004
1006 case INetProtocol::Cmis:
1007 {
1008 if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
1009 {
1010 setInvalid();
1011 return false;
1012 }
1013 m_aAbsURIRef.append("//");
1014 OUStringBuffer aSynUser(128);
1015
1016 bool bHasUser = false;
1017 while (pPos < pEnd && *pPos != '@'
1018 && *pPos != '/' && *pPos != '?'
1019 && *pPos != nFragmentDelimiter)
1020 {
1021 EscapeType eEscapeType;
1022 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1023 eMechanism,
1024 eCharset, eEscapeType);
1025 appendUCS4(aSynUser, nUTF32, eEscapeType,
1026 PART_USER_PASSWORD, eCharset, false);
1027
1028 bHasUser = *pPos == '@';
1029 }
1030
1031 OUStringBuffer aSynAuthority(64);
1032 if ( !bHasUser )
1033 {
1034 aSynAuthority = aSynUser;
1035 }
1036 else
1037 {
1039 aSynUser,
1040 m_aAbsURIRef.getLength());
1041 m_aAbsURIRef.append("@");
1042 ++pPos;
1043
1044 while (pPos < pEnd
1045 && *pPos != '/' && *pPos != '?'
1046 && *pPos != nFragmentDelimiter)
1047 {
1048 EscapeType eEscapeType;
1049 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1050 eMechanism,
1051 eCharset, eEscapeType);
1052 appendUCS4(aSynAuthority, nUTF32, eEscapeType,
1053 PART_AUTHORITY, eCharset, false);
1054 }
1055 }
1056 if (aSynAuthority.isEmpty())
1057 {
1058 setInvalid();
1059 return false;
1060 }
1062 aSynAuthority,
1063 m_aAbsURIRef.getLength());
1064 // misusing m_aHost to store the authority
1065 break;
1066 }
1067
1068 case INetProtocol::File:
1069 if (bSmart)
1070 {
1071 // The first of the following seven productions that
1072 // matches the rest of the input string (and for which the
1073 // appropriate style bit is set in eStyle, if applicable)
1074 // determines the used notation. The productions use the
1075 // auxiliary rules
1076
1077 // domain = label *("." label)
1078 // label = alphanum [*(alphanum / "-") alphanum]
1079 // alphanum = ALPHA / DIGIT
1080 // IPv6reference = "[" IPv6address "]"
1081 // IPv6address = hexpart [":" IPv4address]
1082 // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
1083 // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1084 // hexseq = hex4 *(":" hex4)
1085 // hex4 = 1*4HEXDIG
1086 // path = <any UCS4 character except "#">
1087 // UCS4 = <any UCS4 character>
1088
1089 // 1st Production (URL):
1090 // "//" [domain / IPv6reference] ["/" *path]
1091 // ["#" *UCS4]
1092 // becomes
1093 // "file://" domain "/" *path ["#" *UCS4]
1094 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1095 {
1096 sal_Unicode const * p1 = pPos + 2;
1097 while (p1 != pEnd && *p1 != '/' &&
1098 *p1 != nFragmentDelimiter)
1099 {
1100 ++p1;
1101 }
1103 pPos + 2, p1, EncodeMechanism::All,
1104 RTL_TEXTENCODING_DONTKNOW, true, nullptr))
1105 {
1106 m_aAbsURIRef.append("//");
1107 pHostPortBegin = pPos + 2;
1108 pHostPortEnd = p1;
1109 pPos = p1;
1110 break;
1111 }
1112 }
1113
1114 // 2nd Production (MS IE generated 1; FSysStyle::Dos only):
1115 // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1116 // becomes
1117 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1118 // replacing "\" by "/" within <*path>
1119 // 3rd Production (MS IE generated 2; FSysStyle::Dos only):
1120 // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1121 // becomes
1122 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1123 // replacing "\" by "/" within <*path>
1124 // 4th Production (miscounted slashes):
1125 // "//" *path ["#" *UCS4]
1126 // becomes
1127 // "file:///" *path ["#" *UCS4]
1128 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1129 {
1130 m_aAbsURIRef.append("//");
1131 pPos += 2;
1132 bSkippedInitialSlash = true;
1133 if ((eStyle & FSysStyle::Dos)
1134 && pEnd - pPos >= 2
1135 && rtl::isAsciiAlpha(pPos[0])
1136 && pPos[1] == ':'
1137 && (pEnd - pPos == 2
1138 || pPos[2] == '/' || pPos[2] == '\\'))
1139 nAltSegmentDelimiter = '\\';
1140 break;
1141 }
1142
1143 // 5th Production (Unix):
1144 // "/" *path ["#" *UCS4]
1145 // becomes
1146 // "file:///" *path ["#" *UCS4]
1147 if (pPos < pEnd && *pPos == '/')
1148 {
1149 m_aAbsURIRef.append("//");
1150 break;
1151 }
1152
1153 // 6th Production (UNC; FSysStyle::Dos only):
1154 // "\\" domain ["\" *path] ["#" *UCS4]
1155 // becomes
1156 // "file://" domain "/" *path ["#" *UCS4]
1157 // replacing "\" by "/" within <*path>
1158 if (eStyle & FSysStyle::Dos
1159 && pEnd - pPos >= 2
1160 && pPos[0] == '\\'
1161 && pPos[1] == '\\')
1162 {
1163 sal_Unicode const * p1 = pPos + 2;
1164 sal_Unicode const * pe = p1;
1165 while (pe < pEnd && *pe != '\\' &&
1166 *pe != nFragmentDelimiter)
1167 {
1168 ++pe;
1169 }
1170 if (
1172 p1, pe, EncodeMechanism::All,
1173 RTL_TEXTENCODING_DONTKNOW, true, nullptr) ||
1174 (scanDomain(p1, pe) > 0 && p1 == pe)
1175 )
1176 {
1177 m_aAbsURIRef.append("//");
1178 pHostPortBegin = pPos + 2;
1179 pHostPortEnd = pe;
1180 pPos = pe;
1181 nSegmentDelimiter = '\\';
1182 break;
1183 }
1184 }
1185
1186 // 7th Production (Unix-like DOS; FSysStyle::Dos only):
1187 // ALPHA ":" ["/" *path] ["#" *UCS4]
1188 // becomes
1189 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1190 // replacing "\" by "/" within <*path>
1191 // 8th Production (DOS; FSysStyle::Dos only):
1192 // ALPHA ":" ["\" *path] ["#" *UCS4]
1193 // becomes
1194 // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1195 // replacing "\" by "/" within <*path>
1196 if (eStyle & FSysStyle::Dos
1197 && pEnd - pPos >= 2
1198 && rtl::isAsciiAlpha(pPos[0])
1199 && pPos[1] == ':'
1200 && (pEnd - pPos == 2
1201 || pPos[2] == '/'
1202 || pPos[2] == '\\'))
1203 {
1204 m_aAbsURIRef.append("//");
1205 nAltSegmentDelimiter = '\\';
1206 bSkippedInitialSlash = true;
1207 break;
1208 }
1209
1210 // 9th Production (any):
1211 // *path ["#" *UCS4]
1212 // becomes
1213 // "file:///" *path ["#" *UCS4]
1214 // replacing the delimiter by "/" within <*path>. The
1215 // delimiter is that character from the set { "/", "\"}
1216 // which appears most often in <*path> (if FSysStyle::Unix
1217 // is not among the style bits, "/" is removed from the
1218 // set; if FSysStyle::Dos is not among the style bits, "\" is
1219 // removed from the set). If two or
1220 // more characters appear the same number of times, the
1221 // character mentioned first in that set is chosen. If
1222 // the first character of <*path> is the delimiter, that
1223 // character is not copied
1224 if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
1225 {
1226 m_aAbsURIRef.append("//");
1227 switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1228 {
1229 case FSysStyle::Unix:
1230 nSegmentDelimiter = '/';
1231 break;
1232
1233 case FSysStyle::Dos:
1234 nSegmentDelimiter = '\\';
1235 break;
1236
1237 default:
1238 OSL_FAIL(
1239 "INetURLObject::setAbsURIRef():"
1240 " Bad guessFSysStyleByCounting");
1241 break;
1242 }
1243 bSkippedInitialSlash
1244 = pPos != pEnd && *pPos != nSegmentDelimiter;
1245 break;
1246 }
1247 }
1248 [[fallthrough]];
1249 default:
1250 {
1251 // For INetProtocol::File, allow an empty authority ("//") to be
1252 // missing if the following path starts with an explicit "/"
1253 // (Java is notorious in generating such file URLs, so be
1254 // liberal here):
1255 if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1256 pPos += 2;
1257 else if (!bSmart
1259 && pPos != pEnd && *pPos == '/'))
1260 {
1261 setInvalid();
1262 return false;
1263 }
1264 m_aAbsURIRef.append("//");
1265
1266 sal_Unicode const * pAuthority = pPos;
1267 sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1268 while (pPos < pEnd && *pPos != '/' && *pPos != c
1269 && *pPos != nFragmentDelimiter)
1270 ++pPos;
1271 if (getSchemeInfo().m_bUser)
1272 if (getSchemeInfo().m_bHost)
1273 {
1274 sal_Unicode const * p1 = pAuthority;
1275 while (p1 < pPos && *p1 != '@')
1276 ++p1;
1277 if (p1 == pPos)
1278 {
1279 pHostPortBegin = pAuthority;
1280 pHostPortEnd = pPos;
1281 }
1282 else
1283 {
1284 pUserInfoBegin = pAuthority;
1285 pUserInfoEnd = p1;
1286 pHostPortBegin = p1 + 1;
1287 pHostPortEnd = pPos;
1288 }
1289 }
1290 else
1291 {
1292 pUserInfoBegin = pAuthority;
1293 pUserInfoEnd = pPos;
1294 }
1295 else if (getSchemeInfo().m_bHost)
1296 {
1297 pHostPortBegin = pAuthority;
1298 pHostPortEnd = pPos;
1299 }
1300 else if (pPos != pAuthority)
1301 {
1302 setInvalid();
1303 return false;
1304 }
1305 break;
1306 }
1307 }
1308
1309 if (pUserInfoBegin)
1310 {
1311 Part ePart = PART_USER_PASSWORD;
1312 bool bSupportsPassword = getSchemeInfo().m_bPassword;
1313 bool bSupportsAuth
1314 = !bSupportsPassword && getSchemeInfo().m_bAuth;
1315 bool bHasAuth = false;
1316 OUStringBuffer aSynUser;
1317 sal_Unicode const * p1 = pUserInfoBegin;
1318 while (p1 < pUserInfoEnd)
1319 {
1320 EscapeType eEscapeType;
1321 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1322 eMechanism, eCharset, eEscapeType);
1323 if (eEscapeType == EscapeType::NONE)
1324 {
1325 if (nUTF32 == ':' && bSupportsPassword)
1326 {
1327 bHasAuth = true;
1328 break;
1329 }
1330 else if (nUTF32 == ';' && bSupportsAuth
1331 && pUserInfoEnd - p1
1332 > RTL_CONSTASCII_LENGTH("auth=")
1334 p1,
1335 p1 + RTL_CONSTASCII_LENGTH("auth="),
1336 "auth="))
1337 {
1338 p1 += RTL_CONSTASCII_LENGTH("auth=");
1339 bHasAuth = true;
1340 break;
1341 }
1342 }
1343 appendUCS4(aSynUser, nUTF32, eEscapeType, ePart,
1344 eCharset, false);
1345 }
1346 m_aUser.set(m_aAbsURIRef, aSynUser, m_aAbsURIRef.getLength());
1347 if (bHasAuth)
1348 {
1349 if (bSupportsPassword)
1350 {
1351 m_aAbsURIRef.append(':');
1352 OUStringBuffer aSynAuth;
1353 while (p1 < pUserInfoEnd)
1354 {
1355 EscapeType eEscapeType;
1356 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1357 eMechanism, eCharset,
1358 eEscapeType);
1359 appendUCS4(aSynAuth, nUTF32, eEscapeType,
1360 ePart, eCharset, false);
1361 }
1362 m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength());
1363 }
1364 else
1365 {
1366 m_aAbsURIRef.append(";AUTH=");
1367 OUStringBuffer aSynAuth;
1368 while (p1 < pUserInfoEnd)
1369 {
1370 EscapeType eEscapeType;
1371 sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1372 eMechanism, eCharset,
1373 eEscapeType);
1374 if (!INetMIME::isIMAPAtomChar(nUTF32))
1375 {
1376 setInvalid();
1377 return false;
1378 }
1379 appendUCS4(aSynAuth, nUTF32, eEscapeType,
1380 ePart, eCharset, false);
1381 }
1382 m_aAuth.set(m_aAbsURIRef, aSynAuth, m_aAbsURIRef.getLength());
1383 }
1384 }
1385 if (pHostPortBegin)
1386 m_aAbsURIRef.append('@');
1387 }
1388
1389 if (pHostPortBegin)
1390 {
1391 sal_Unicode const * pPort = pHostPortEnd;
1392 if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1393 {
1394 sal_Unicode const * p1 = pHostPortEnd - 1;
1395 while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
1396 --p1;
1397 if (*p1 == ':')
1398 pPort = p1;
1399 }
1400 bool bNetBiosName = false;
1401 switch (m_eScheme)
1402 {
1403 case INetProtocol::File:
1404 // If the host equals "LOCALHOST" (unencoded and ignoring
1405 // case), turn it into an empty host:
1406 if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1407 "localhost"))
1408 pHostPortBegin = pPort;
1409 bNetBiosName = true;
1410 break;
1411
1412 case INetProtocol::Ldap:
1413 case INetProtocol::Smb:
1414 if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1415 {
1416 setInvalid();
1417 return false;
1418 }
1419 break;
1420 default:
1421 if (pHostPortBegin == pPort)
1422 {
1423 setInvalid();
1424 return false;
1425 }
1426 break;
1427 }
1428 sal_Int32 nLenBeforeHost = m_aAbsURIRef.getLength();
1430 pHostPortBegin, pPort, eMechanism, eCharset,
1431 bNetBiosName, &m_aAbsURIRef))
1432 {
1433 setInvalid();
1434 return false;
1435 }
1436 m_aHost = SubString(nLenBeforeHost, m_aAbsURIRef.getLength() - nLenBeforeHost);
1437 if (pPort != pHostPortEnd)
1438 {
1439 m_aAbsURIRef.append(':');
1441 std::u16string_view{pPort + 1, static_cast<size_t>(pHostPortEnd - (pPort + 1))},
1442 m_aAbsURIRef.getLength());
1443 }
1444 }
1445 }
1446
1447 // Parse <path>
1448 sal_Int32 nBeforePathLength = m_aAbsURIRef.getLength();
1449 if (!parsePath(m_eScheme, &pPos, pEnd, eMechanism, eCharset,
1450 bSkippedInitialSlash, nSegmentDelimiter,
1451 nAltSegmentDelimiter,
1452 getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1453 nFragmentDelimiter, m_aAbsURIRef))
1454 {
1455 setInvalid();
1456 return false;
1457 }
1458 m_aPath = SubString(nBeforePathLength, m_aAbsURIRef.getLength() - nBeforePathLength);
1459
1460 // Parse ?<query>
1461 if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1462 {
1463 m_aAbsURIRef.append('?');
1464 OUStringBuffer aSynQuery;
1465 for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1466 {
1467 EscapeType eEscapeType;
1468 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1469 eMechanism, eCharset, eEscapeType);
1470 appendUCS4(aSynQuery, nUTF32, eEscapeType,
1471 PART_URIC, eCharset, true);
1472 }
1473 m_aQuery.set(m_aAbsURIRef, aSynQuery, m_aAbsURIRef.getLength());
1474 }
1475
1476 // Parse #<fragment>
1477 if (pPos < pEnd && *pPos == nFragmentDelimiter)
1478 {
1479 m_aAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1480 OUStringBuffer aSynFragment;
1481 for (++pPos; pPos < pEnd;)
1482 {
1483 EscapeType eEscapeType;
1484 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1485 eMechanism, eCharset, eEscapeType);
1486 appendUCS4(aSynFragment, nUTF32, eEscapeType, PART_URIC,
1487 eCharset, true);
1488 }
1489 m_aFragment.set(m_aAbsURIRef, aSynFragment, m_aAbsURIRef.getLength());
1490 }
1491
1492 if (pPos != pEnd)
1493 {
1494 setInvalid();
1495 return false;
1496 }
1497
1498 return true;
1499}
1500
1502 sal_Int32 oldSchemeLen = 0;
1503 const OUString& rOldSchemeName = getSchemeInfo().m_sScheme;
1505 oldSchemeLen = m_aScheme.getLength();
1506 else
1507 oldSchemeLen = rOldSchemeName.getLength();
1508 m_eScheme=eTargetScheme;
1509 const OUString& rNewSchemeName = getSchemeInfo().m_sScheme;
1510 sal_Int32 newSchemeLen = rNewSchemeName.getLength();
1511 m_aAbsURIRef.remove(0, oldSchemeLen);
1512 m_aAbsURIRef.insert(0, rNewSchemeName);
1513 sal_Int32 delta=newSchemeLen-oldSchemeLen;
1514 m_aUser+=delta;
1515 m_aAuth+=delta;
1516 m_aHost+=delta;
1517 m_aPort+=delta;
1518 m_aPath+=delta;
1519 m_aQuery+=delta;
1520 m_aFragment+=delta;
1521}
1522
1523bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
1524 INetURLObject & rTheAbsURIRef,
1525 bool & rWasAbsolute,
1526 EncodeMechanism eMechanism,
1527 rtl_TextEncoding eCharset,
1528 bool bIgnoreFragment, bool bSmart,
1529 bool bRelativeNonURIs, FSysStyle eStyle)
1530 const
1531{
1532 sal_Unicode const * p = rTheRelURIRef.getStr();
1533 sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1534
1535 sal_Unicode const * pPrefixBegin = p;
1536 PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1537 bool hasScheme = pPrefix != nullptr;
1538 if (!hasScheme) {
1539 pPrefixBegin = p;
1540 hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
1541 }
1542
1543 sal_uInt32 nSegmentDelimiter = '/';
1544 sal_uInt32 nQueryDelimiter
1545 = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1546 sal_uInt32 nFragmentDelimiter = '#';
1547 Part ePart = PART_VISIBLE;
1548
1549 if (!hasScheme && bSmart)
1550 {
1551 // If the input matches any of the following productions (for which
1552 // the appropriate style bit is set in eStyle), it is assumed to be an
1553 // absolute file system path, rather than a relative URI reference.
1554 // (This is only a subset of the productions used for scheme detection
1555 // in INetURLObject::setAbsURIRef(), because most of those productions
1556 // interfere with the syntax of relative URI references.) The
1557 // productions use the auxiliary rules
1558
1559 // domain = label *("." label)
1560 // label = alphanum [*(alphanum / "-") alphanum]
1561 // alphanum = ALPHA / DIGIT
1562 // UCS4 = <any UCS4 character>
1563
1564 // 1st Production (UNC file; FSysStyle::Dos only):
1565 // "\\" domain ["\" *UCS4]
1566 // 2nd Production (Unix-like DOS file; FSysStyle::Dos only):
1567 // ALPHA ":" ["/" *UCS4]
1568 // 3rd Production (DOS file; FSysStyle::Dos only):
1569 // ALPHA ":" ["\" *UCS4]
1570 if (eStyle & FSysStyle::Dos)
1571 {
1572 bool bFSys = false;
1573 sal_Unicode const * q = p;
1574 if (pEnd - q >= 2
1575 && rtl::isAsciiAlpha(q[0])
1576 && q[1] == ':'
1577 && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1578 bFSys = true; // 2nd, 3rd
1579 else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1580 {
1581 q += 2;
1582 sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1583 q, pEnd - q, '\\');
1584 sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1586 q, qe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
1587 true, nullptr))
1588 {
1589 bFSys = true; // 1st
1590 }
1591 }
1592 if (bFSys)
1593 {
1594 INetURLObject aNewURI;
1595 aNewURI.setAbsURIRef(rTheRelURIRef, eMechanism,
1596 eCharset, true, eStyle);
1597 if (!aNewURI.HasError())
1598 {
1599 rTheAbsURIRef = aNewURI;
1600 rWasAbsolute = true;
1601 return true;
1602 }
1603 }
1604 }
1605
1606 // When the base URL is a file URL, accept relative file system paths
1607 // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1608 // and "#"), as well as relative URIs using "/" as delimiter:
1610 switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1611 {
1612 case FSysStyle::Unix:
1613 nSegmentDelimiter = '/';
1614 break;
1615
1616 case FSysStyle::Dos:
1617 nSegmentDelimiter = '\\';
1618 bRelativeNonURIs = true;
1619 break;
1620
1621 default:
1622 OSL_FAIL("INetURLObject::convertRelToAbs():"
1623 " Bad guessFSysStyleByCounting");
1624 break;
1625 }
1626
1627 if (bRelativeNonURIs)
1628 {
1629 eMechanism = EncodeMechanism::All;
1630 nQueryDelimiter = 0x80000000;
1631 nFragmentDelimiter = 0x80000000;
1633 }
1634 }
1635
1636 // If the relative URI has the same scheme as the base URI, and that
1637 // scheme is hierarchical, then ignore its presence in the relative
1638 // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1639 // step 3):
1640 if (pPrefix && pPrefix->m_eScheme == m_eScheme
1641 && getSchemeInfo().m_bHierarchical)
1642 {
1643 hasScheme = false;
1644 while (p != pEnd && *p++ != ':') ;
1645 }
1646 rWasAbsolute = hasScheme;
1647
1648 // Fast solution for non-relative URIs:
1649 if (hasScheme)
1650 {
1651 INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1652 if (aNewURI.HasError())
1653 {
1654 rWasAbsolute = false;
1655 return false;
1656 }
1657
1658 if (bIgnoreFragment)
1659 aNewURI.clearFragment();
1660 rTheAbsURIRef = aNewURI;
1661 return true;
1662 }
1663
1664 enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1665 STATE_DONE };
1666
1667 OUStringBuffer aSynAbsURIRef(128);
1668 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1669 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1671 {
1672 aSynAbsURIRef.append(getSchemeInfo().m_sScheme.asView());
1673 }
1674 else
1675 {
1676 sal_Unicode const * pSchemeBegin
1677 = m_aAbsURIRef.getStr();
1678 sal_Unicode const * pSchemeEnd = pSchemeBegin;
1679 while (pSchemeEnd[0] != ':')
1680 {
1681 ++pSchemeEnd;
1682 }
1683 aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1684 }
1685 aSynAbsURIRef.append(':');
1686
1687 State eState = STATE_AUTH;
1688 bool bSameDoc = true;
1689
1690 if (getSchemeInfo().m_bAuthority)
1691 {
1692 if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1693 {
1694 aSynAbsURIRef.append("//");
1695 p += 2;
1696 eState = STATE_ABS_PATH;
1697 bSameDoc = false;
1698 while (p != pEnd)
1699 {
1700 EscapeType eEscapeType;
1701 sal_uInt32 nUTF32
1702 = getUTF32(p, pEnd, eMechanism,
1703 eCharset, eEscapeType);
1704 if (eEscapeType == EscapeType::NONE)
1705 {
1706 if (nUTF32 == nSegmentDelimiter)
1707 break;
1708 else if (nUTF32 == nFragmentDelimiter)
1709 {
1710 eState = STATE_FRAGMENT;
1711 break;
1712 }
1713 }
1714 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1715 PART_VISIBLE, eCharset, true);
1716 }
1717 }
1718 else
1719 {
1720 SubString aAuthority(getAuthority());
1721 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1722 + aAuthority.getBegin(),
1723 aAuthority.getLength());
1724 }
1725 }
1726
1727 if (eState == STATE_AUTH)
1728 {
1729 if (p == pEnd)
1730 eState = STATE_DONE;
1731 else if (*p == nFragmentDelimiter)
1732 {
1733 ++p;
1734 eState = STATE_FRAGMENT;
1735 }
1736 else if (*p == nSegmentDelimiter)
1737 {
1738 ++p;
1739 eState = STATE_ABS_PATH;
1740 bSameDoc = false;
1741 }
1742 else
1743 {
1744 eState = STATE_REL_PATH;
1745 bSameDoc = false;
1746 }
1747 }
1748
1749 if (eState == STATE_ABS_PATH)
1750 {
1751 aSynAbsURIRef.append('/');
1752 eState = STATE_DONE;
1753 while (p != pEnd)
1754 {
1755 EscapeType eEscapeType;
1756 sal_uInt32 nUTF32
1757 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1758 if (eEscapeType == EscapeType::NONE)
1759 {
1760 if (nUTF32 == nFragmentDelimiter)
1761 {
1762 eState = STATE_FRAGMENT;
1763 break;
1764 }
1765 else if (nUTF32 == nSegmentDelimiter)
1766 nUTF32 = '/';
1767 }
1768 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1769 eCharset, true);
1770 }
1771 }
1772 else if (eState == STATE_REL_PATH)
1773 {
1774 if (!getSchemeInfo().m_bHierarchical)
1775 {
1776 // Detect cases where a relative input could not be made absolute
1777 // because the given base URL is broken (most probably because it is
1778 // empty):
1780 HasError(), "tools.urlobj",
1781 "cannot make <" << rTheRelURIRef
1782 << "> absolute against broken base <"
1784 rWasAbsolute = false;
1785 return false;
1786 }
1787
1788 sal_Unicode const * pBasePathBegin
1789 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1790 sal_Unicode const * pBasePathEnd
1791 = pBasePathBegin + m_aPath.getLength();
1792 while (pBasePathEnd != pBasePathBegin)
1793 if (*(--pBasePathEnd) == '/')
1794 {
1795 ++pBasePathEnd;
1796 break;
1797 }
1798
1799 sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1800 aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1801 DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1802 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1803 "INetURLObject::convertRelToAbs(): Bad base path");
1804
1805 while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1806 {
1807 if (*p == '.')
1808 {
1809 if (pEnd - p == 1
1810 || p[1] == nSegmentDelimiter
1811 || p[1] == nQueryDelimiter
1812 || p[1] == nFragmentDelimiter)
1813 {
1814 ++p;
1815 if (p != pEnd && *p == nSegmentDelimiter)
1816 ++p;
1817 continue;
1818 }
1819 else if (pEnd - p >= 2
1820 && p[1] == '.'
1821 && (pEnd - p == 2
1822 || p[2] == nSegmentDelimiter
1823 || p[2] == nQueryDelimiter
1824 || p[2] == nFragmentDelimiter)
1825 && aSynAbsURIRef.getLength() - nPathBegin > 1)
1826 {
1827 p += 2;
1828 if (p != pEnd && *p == nSegmentDelimiter)
1829 ++p;
1830
1831 sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1832 while (i > nPathBegin && aSynAbsURIRef[i] != '/')
1833 --i;
1834 aSynAbsURIRef.setLength(i + 1);
1835 DBG_ASSERT(
1836 aSynAbsURIRef.getLength() > nPathBegin
1837 && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1838 "INetURLObject::convertRelToAbs(): Bad base path");
1839 continue;
1840 }
1841 }
1842
1843 while (p != pEnd
1844 && *p != nSegmentDelimiter
1845 && *p != nQueryDelimiter
1846 && *p != nFragmentDelimiter)
1847 {
1848 EscapeType eEscapeType;
1849 sal_uInt32 nUTF32
1850 = getUTF32(p, pEnd, eMechanism,
1851 eCharset, eEscapeType);
1852 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1853 eCharset, true);
1854 }
1855 if (p != pEnd && *p == nSegmentDelimiter)
1856 {
1857 aSynAbsURIRef.append('/');
1858 ++p;
1859 }
1860 }
1861
1862 while (p != pEnd && *p != nFragmentDelimiter)
1863 {
1864 EscapeType eEscapeType;
1865 sal_uInt32 nUTF32
1866 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1867 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1868 eCharset, true);
1869 }
1870
1871 if (p == pEnd)
1872 eState = STATE_DONE;
1873 else
1874 {
1875 ++p;
1876 eState = STATE_FRAGMENT;
1877 }
1878 }
1879 else if (bSameDoc)
1880 {
1881 aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1882 m_aPath.getLength());
1883 if (m_aQuery.isPresent())
1884 aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1885 + m_aQuery.getBegin() - 1,
1886 m_aQuery.getLength() + 1);
1887 }
1888
1889 if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1890 {
1891 aSynAbsURIRef.append('#');
1892 while (p != pEnd)
1893 {
1894 EscapeType eEscapeType;
1895 sal_uInt32 nUTF32
1896 = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1897 appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1898 PART_VISIBLE, eCharset, true);
1899 }
1900 }
1901
1902 INetURLObject aNewURI(aSynAbsURIRef);
1903 if (aNewURI.HasError())
1904 {
1905 // Detect cases where a relative input could not be made absolute
1906 // because the given base URL is broken (most probably because it is
1907 // empty):
1909 HasError(), "tools.urlobj",
1910 "cannot make <" << rTheRelURIRef
1911 << "> absolute against broken base <" << GetMainURL(DecodeMechanism::NONE)
1912 << ">");
1913 rWasAbsolute = false;
1914 return false;
1915 }
1916
1917 rTheAbsURIRef = aNewURI;
1918 return true;
1919}
1920
1921bool INetURLObject::convertAbsToRel(OUString const & rTheAbsURIRef,
1922 OUString & rTheRelURIRef,
1923 EncodeMechanism eEncodeMechanism,
1924 DecodeMechanism eDecodeMechanism,
1925 rtl_TextEncoding eCharset,
1926 FSysStyle eStyle) const
1927{
1928 // Check for hierarchical base URL:
1929 if (!getSchemeInfo().m_bHierarchical)
1930 {
1931 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1932 return false;
1933 }
1934
1935 // Convert the input (absolute or relative URI ref) to an absolute URI
1936 // ref:
1937 INetURLObject aSubject;
1938 bool bWasAbsolute;
1939 if (!convertRelToAbs(rTheAbsURIRef, aSubject, bWasAbsolute,
1940 eEncodeMechanism, eCharset, false, false, false,
1941 eStyle))
1942 {
1943 rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1944 return false;
1945 }
1946
1947 // Check for differing scheme or authority parts:
1948 if ((m_aScheme.compare(
1949 aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1950 != 0)
1951 || (m_aUser.compare(
1952 aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1953 != 0)
1954 || (m_aAuth.compare(
1955 aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1956 != 0)
1957 || (m_aHost.compare(
1958 aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1959 != 0)
1960 || (m_aPort.compare(
1961 aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1962 != 0))
1963 {
1964 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1965 return false;
1966 }
1967
1968 sal_Unicode const * pBasePathBegin
1969 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1970 sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1971 sal_Unicode const * pSubjectPathBegin
1972 = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1973 sal_Unicode const * pSubjectPathEnd
1974 = pSubjectPathBegin + aSubject.m_aPath.getLength();
1975
1976 // Make nMatch point past the last matching slash, or past the end of the
1977 // paths, in case they are equal:
1978 sal_Unicode const * pSlash = nullptr;
1979 sal_Unicode const * p1 = pBasePathBegin;
1980 sal_Unicode const * p2 = pSubjectPathBegin;
1981 for (;;)
1982 {
1983 if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1984 {
1985 if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1986 pSlash = p1;
1987 break;
1988 }
1989
1990 sal_Unicode c = *p1++;
1991 if (c != *p2++)
1992 break;
1993 if (c == '/')
1994 pSlash = p1;
1995 }
1996 if (!pSlash)
1997 {
1998 // One of the paths does not start with '/':
1999 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
2000 return false;
2001 }
2002 sal_Int32 nMatch = pSlash - pBasePathBegin;
2003
2004 // If the two URLs are DOS file URLs starting with different volumes
2005 // (e.g., file:///a:/... and file:///b:/...), the subject is not made
2006 // relative (it could be, but some people do not like that):
2008 && nMatch <= 1
2009 && hasDosVolume(eStyle)
2010 && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
2011 {
2012 rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
2013 return false;
2014 }
2015
2016 // For every slash in the base path after nMatch, a prefix of "../" is
2017 // added to the new relative URL (if the common prefix of the two paths is
2018 // only "/"---but see handling of file URLs above---, the complete subject
2019 // path could go into the new relative URL instead, but some people don't
2020 // like that):
2021 OUStringBuffer aSynRelURIRef;
2022 for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
2023 ++p)
2024 {
2025 if (*p == '/')
2026 aSynRelURIRef.append("../");
2027 }
2028
2029 // If the new relative URL would start with "//" (i.e., it would be
2030 // mistaken for a relative URL starting with an authority part), or if the
2031 // new relative URL would neither be empty nor start with <"/"> nor start
2032 // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
2033 // with a scheme part), then the new relative URL is prefixed with "./":
2034 if (aSynRelURIRef.isEmpty())
2035 {
2036 if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
2037 && pSubjectPathBegin[nMatch] == '/'
2038 && pSubjectPathBegin[nMatch + 1] == '/')
2039 {
2040 aSynRelURIRef.append("./");
2041 }
2042 else
2043 {
2044 for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
2045 p != pSubjectPathEnd && *p != '/'; ++p)
2046 {
2047 if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
2048 {
2049 aSynRelURIRef.append("./");
2050 break;
2051 }
2052 }
2053 }
2054 }
2055
2056 // The remainder of the subject path, starting at nMatch, is appended to
2057 // the new relative URL:
2058 aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2059 eDecodeMechanism, eCharset));
2060
2061 // If the subject has defined query or fragment parts, they are appended
2062 // to the new relative URL:
2063 if (aSubject.m_aQuery.isPresent())
2064 {
2065 aSynRelURIRef.append("?"
2066 + aSubject.decode(aSubject.m_aQuery, eDecodeMechanism, eCharset));
2067 }
2068 if (aSubject.m_aFragment.isPresent())
2069 {
2070 aSynRelURIRef.append("#"
2071 + aSubject.decode(aSubject.m_aFragment, eDecodeMechanism, eCharset));
2072 }
2073
2074 rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2075 return true;
2076}
2077
2078// static
2079bool INetURLObject::convertIntToExt(std::u16string_view rTheIntURIRef,
2080 OUString & rTheExtURIRef,
2081 DecodeMechanism eDecodeMechanism,
2082 rtl_TextEncoding eCharset)
2083{
2084 OUStringBuffer aSynExtURIRef(256);
2085 encodeText(aSynExtURIRef, rTheIntURIRef, PART_VISIBLE,
2086 EncodeMechanism::NotCanonical, eCharset, true);
2087 sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2088 sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2089 sal_Unicode const * p = pBegin;
2090 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2091 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::Internal;
2092 if (bConvert)
2093 {
2094 comphelper::string::replaceAt(aSynExtURIRef, 0, p - pBegin,
2095 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2096 }
2097 rTheExtURIRef = decode(aSynExtURIRef, eDecodeMechanism, eCharset);
2098 return bConvert;
2099}
2100
2101// static
2102bool INetURLObject::convertExtToInt(std::u16string_view rTheExtURIRef,
2103 OUString & rTheIntURIRef,
2104 DecodeMechanism eDecodeMechanism,
2105 rtl_TextEncoding eCharset)
2106{
2107 OUStringBuffer aSynIntURIRef(256);
2108 encodeText(aSynIntURIRef, rTheExtURIRef, PART_VISIBLE,
2109 EncodeMechanism::NotCanonical, eCharset, true);
2110 sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2111 sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2112 sal_Unicode const * p = pBegin;
2113 PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2114 bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::External;
2115 if (bConvert)
2116 {
2117 comphelper::string::replaceAt(aSynIntURIRef, 0, p - pBegin,
2118 OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2119 }
2120 rTheIntURIRef = decode(aSynIntURIRef, eDecodeMechanism, eCharset);
2121 return bConvert;
2122}
2123
2124// static
2126 sal_Unicode const * pEnd)
2127{
2128 static PrefixInfo const aMap[]
2129 = { // dummy entry at front needed, because pLast may point here:
2131 { ".component:", "staroffice.component:", INetProtocol::Component,
2133 { ".uno:", "staroffice.uno:", INetProtocol::Uno,
2135 { "cid:", nullptr, INetProtocol::Cid, PrefixInfo::Kind::Official },
2136 { "data:", nullptr, INetProtocol::Data, PrefixInfo::Kind::Official },
2137 { "db:", "staroffice.db:", INetProtocol::Db, PrefixInfo::Kind::Internal },
2138 { "file:", nullptr, INetProtocol::File, PrefixInfo::Kind::Official },
2139 { "ftp:", nullptr, INetProtocol::Ftp, PrefixInfo::Kind::Official },
2140 { "hid:", "staroffice.hid:", INetProtocol::Hid,
2142 { "http:", nullptr, INetProtocol::Http, PrefixInfo::Kind::Official },
2143 { "https:", nullptr, INetProtocol::Https, PrefixInfo::Kind::Official },
2144 { "javascript:", nullptr, INetProtocol::Javascript, PrefixInfo::Kind::Official },
2145 { "ldap:", nullptr, INetProtocol::Ldap, PrefixInfo::Kind::Official },
2146 { "macro:", "staroffice.macro:", INetProtocol::Macro,
2148 { "mailto:", nullptr, INetProtocol::Mailto, PrefixInfo::Kind::Official },
2149 { "private:", "staroffice.private:", INetProtocol::PrivSoffice,
2151 { "private:factory/", "staroffice.factory:",
2153 { "private:helpid/", "staroffice.helpid:", INetProtocol::PrivSoffice,
2155 { "private:java/", "staroffice.java:", INetProtocol::PrivSoffice,
2157 { "private:searchfolder:", "staroffice.searchfolder:",
2159 { "private:trashcan:", "staroffice.trashcan:",
2161 { "sftp:", nullptr, INetProtocol::Sftp, PrefixInfo::Kind::Official },
2162 { "slot:", "staroffice.slot:", INetProtocol::Slot,
2164 { "smb:", nullptr, INetProtocol::Smb, PrefixInfo::Kind::Official },
2165 { "staroffice.component:", ".component:", INetProtocol::Component,
2167 { "staroffice.db:", "db:", INetProtocol::Db, PrefixInfo::Kind::External },
2168 { "staroffice.factory:", "private:factory/",
2170 { "staroffice.helpid:", "private:helpid/", INetProtocol::PrivSoffice,
2172 { "staroffice.hid:", "hid:", INetProtocol::Hid,
2174 { "staroffice.java:", "private:java/", INetProtocol::PrivSoffice,
2176 { "staroffice.macro:", "macro:", INetProtocol::Macro,
2178 { "staroffice.private:", "private:", INetProtocol::PrivSoffice,
2180 { "staroffice.searchfolder:", "private:searchfolder:",
2182 { "staroffice.slot:", "slot:", INetProtocol::Slot,
2184 { "staroffice.trashcan:", "private:trashcan:",
2186 { "staroffice.uno:", ".uno:", INetProtocol::Uno,
2188 { "staroffice:", "private:", INetProtocol::PrivSoffice,
2190 { "telnet:", nullptr, INetProtocol::Telnet, PrefixInfo::Kind::Official },
2191 { "vnd.libreoffice.cmis:", nullptr, INetProtocol::Cmis, PrefixInfo::Kind::Internal },
2192 { "vnd.sun.star.cmd:", nullptr, INetProtocol::VndSunStarCmd,
2194 { "vnd.sun.star.expand:", nullptr, INetProtocol::VndSunStarExpand,
2196 { "vnd.sun.star.help:", nullptr, INetProtocol::VndSunStarHelp,
2198 { "vnd.sun.star.hier:", nullptr, INetProtocol::VndSunStarHier,
2200 { "vnd.sun.star.pkg:", nullptr, INetProtocol::VndSunStarPkg,
2202 { "vnd.sun.star.tdoc:", nullptr, INetProtocol::VndSunStarTdoc,
2204 { "vnd.sun.star.webdav:", nullptr, INetProtocol::VndSunStarWebdav,
2206 };
2207/* This list needs to be sorted, or you'll introduce serious bugs */
2208
2209 PrefixInfo const * pFirst = aMap + 1;
2210 PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2211 PrefixInfo const * pMatch = nullptr;
2212 sal_Unicode const * pMatched = rBegin;
2213 sal_Unicode const * p = rBegin;
2214 sal_Int32 i = 0;
2215 for (; pFirst < pLast; ++i)
2216 {
2217 if (pFirst->m_pPrefix[i] == '\0')
2218 {
2219 pMatch = pFirst++;
2220 pMatched = p;
2221 }
2222 if (p >= pEnd)
2223 break;
2224 sal_uInt32 nChar = rtl::toAsciiLowerCase(*p++);
2225 while (pFirst <= pLast && static_cast<unsigned char>(pFirst->m_pPrefix[i]) < nChar)
2226 ++pFirst;
2227 while (pFirst <= pLast && static_cast<unsigned char>(pLast->m_pPrefix[i]) > nChar)
2228 --pLast;
2229 }
2230 if (pFirst == pLast)
2231 {
2232 char const * q = pFirst->m_pPrefix + i;
2233 while (p < pEnd && *q != '\0'
2234 && rtl::toAsciiLowerCase(*p) == static_cast<unsigned char>(*q))
2235 {
2236 ++p;
2237 ++q;
2238 }
2239 if (*q == '\0')
2240 {
2241 rBegin = p;
2242 return pFirst;
2243 }
2244 }
2245 rBegin = pMatched;
2246 return pMatch;
2247}
2248
2250{
2251 DBG_ASSERT(getSchemeInfo().m_bAuthority,
2252 "INetURLObject::getAuthority(): Bad scheme");
2253 sal_Int32 nBegin;
2254 if (m_aUser.isPresent())
2255 nBegin = m_aUser.getBegin();
2256 else if (m_aHost.isPresent())
2257 nBegin = m_aHost.getBegin();
2258 else
2259 nBegin = m_aPath.getBegin();
2260 nBegin -= RTL_CONSTASCII_LENGTH("//");
2261 DBG_ASSERT(m_aAbsURIRef[nBegin] == '/' && m_aAbsURIRef[nBegin + 1] == '/',
2262 "INetURLObject::getAuthority(): Bad authority");
2263 return nBegin;
2264}
2265
2267{
2268 sal_Int32 nBegin = getAuthorityBegin();
2269 sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2273 nBegin + RTL_CONSTASCII_LENGTH("//");
2274 return SubString(nBegin, nEnd - nBegin);
2275}
2276
2277bool INetURLObject::setUser(std::u16string_view rTheUser,
2278 rtl_TextEncoding eCharset)
2279{
2280 if (
2281 !getSchemeInfo().m_bUser
2282 )
2283 {
2284 return false;
2285 }
2286
2287 OUStringBuffer aNewUser;
2288 encodeText(aNewUser, rTheUser, PART_USER_PASSWORD,
2289 EncodeMechanism::WasEncoded, eCharset, false);
2290 sal_Int32 nDelta;
2291 if (m_aUser.isPresent())
2292 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2293 else if (m_aHost.isPresent())
2294 {
2295 m_aAbsURIRef.insert(m_aHost.getBegin(), u'@');
2296 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2297 }
2298 else if (getSchemeInfo().m_bHost)
2299 return false;
2300 else
2301 nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2302 m_aAuth += nDelta;
2303 m_aHost += nDelta;
2304 m_aPort += nDelta;
2305 m_aPath += nDelta;
2306 m_aQuery += nDelta;
2307 m_aFragment += nDelta;
2308 return true;
2309}
2310
2311namespace
2312{
2313 void lcl_Erase(OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2314 {
2315 OUString sTemp(rBuf.makeStringAndClear());
2316 rBuf.append(sTemp.replaceAt(index, count, u""));
2317 }
2318}
2319
2321{
2322 if (!getSchemeInfo().m_bPassword)
2323 return false;
2324 if (m_aAuth.isPresent())
2325 {
2326 lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2327 m_aAuth.getLength() + 1);
2328 sal_Int32 nDelta = m_aAuth.clear() - 1;
2329 m_aHost += nDelta;
2330 m_aPort += nDelta;
2331 m_aPath += nDelta;
2332 m_aQuery += nDelta;
2333 m_aFragment += nDelta;
2334 }
2335 return true;
2336}
2337
2338bool INetURLObject::setPassword(std::u16string_view rThePassword,
2339 rtl_TextEncoding eCharset)
2340{
2341 if (!getSchemeInfo().m_bPassword)
2342 return false;
2343 OUStringBuffer aNewAuth;
2344 encodeText(aNewAuth, rThePassword, PART_USER_PASSWORD,
2345 EncodeMechanism::WasEncoded, eCharset, false);
2346 sal_Int32 nDelta;
2347 if (m_aAuth.isPresent())
2348 nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2349 else if (m_aUser.isPresent())
2350 {
2351 m_aAbsURIRef.insert(m_aUser.getEnd(), u':');
2352 nDelta
2353 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2354 }
2355 else if (m_aHost.isPresent())
2356 {
2357 m_aAbsURIRef.insert(m_aHost.getBegin(), ":@" );
2358 m_aUser.set(m_aAbsURIRef, std::u16string_view{}, m_aHost.getBegin());
2359 nDelta
2360 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2361 }
2362 else if (getSchemeInfo().m_bHost)
2363 return false;
2364 else
2365 {
2366 m_aAbsURIRef.insert(m_aPath.getBegin(), u':');
2367 m_aUser.set(m_aAbsURIRef, std::u16string_view{}, m_aPath.getBegin());
2368 nDelta
2369 = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2370 }
2371 m_aHost += nDelta;
2372 m_aPort += nDelta;
2373 m_aPath += nDelta;
2374 m_aQuery += nDelta;
2375 m_aFragment += nDelta;
2376 return true;
2377}
2378
2379// static
2380bool INetURLObject::parseHost(sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2381 OUStringBuffer* pCanonic)
2382{
2383 // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2384 // IPv4 address directly follows the abbreviating "::". The ABNF in
2385 // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2386 // mentions "::13:1.68.3". This algorithm accepts both variants:
2387 enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2388 STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2389 STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2390 STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2391 STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2392 STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2393 STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2394 STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2395 sal_uInt32 nNumber = 0;
2396 int nDigits = 0;
2397 int nOctets = 0;
2398 State eState = STATE_INITIAL;
2399 sal_Unicode const * p = rBegin;
2400 sal_Int32 nOriginalCanonicLength = pCanonic ? pCanonic->getLength() : 0;
2401 for (; p != pEnd; ++p)
2402 switch (eState)
2403 {
2404 case STATE_INITIAL:
2405 if (*p == '[')
2406 {
2407 if (pCanonic)
2408 pCanonic->append('[');
2409 eState = STATE_IP6;
2410 }
2411 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2412 eState = STATE_TOPLABEL;
2413 else if (rtl::isAsciiDigit(*p))
2414 {
2415 nNumber = INetMIME::getWeight(*p);
2416 nDigits = 1;
2417 nOctets = 1;
2418 eState = STATE_IP4;
2419 }
2420 else
2421 goto done;
2422 break;
2423
2424 case STATE_LABEL:
2425 if (*p == '.')
2426 eState = STATE_LABEL_DOT;
2427 else if (*p == '-')
2428 eState = STATE_LABEL_HYPHEN;
2429 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2430 goto done;
2431 break;
2432
2433 case STATE_LABEL_HYPHEN:
2434 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2435 eState = STATE_LABEL;
2436 else if (*p != '-')
2437 goto done;
2438 break;
2439
2440 case STATE_LABEL_DOT:
2441 if (rtl::isAsciiAlpha(*p) || *p == '_')
2442 eState = STATE_TOPLABEL;
2443 else if (rtl::isAsciiDigit(*p))
2444 eState = STATE_LABEL;
2445 else
2446 goto done;
2447 break;
2448
2449 case STATE_TOPLABEL:
2450 if (*p == '.')
2451 eState = STATE_TOPLABEL_DOT;
2452 else if (*p == '-')
2453 eState = STATE_TOPLABEL_HYPHEN;
2454 else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2455 goto done;
2456 break;
2457
2458 case STATE_TOPLABEL_HYPHEN:
2459 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2460 eState = STATE_TOPLABEL;
2461 else if (*p != '-')
2462 goto done;
2463 break;
2464
2465 case STATE_TOPLABEL_DOT:
2466 if (rtl::isAsciiAlpha(*p) || *p == '_')
2467 eState = STATE_TOPLABEL;
2468 else if (rtl::isAsciiDigit(*p))
2469 eState = STATE_LABEL;
2470 else
2471 goto done;
2472 break;
2473
2474 case STATE_IP4:
2475 if (*p == '.')
2476 if (nOctets < 4)
2477 {
2478 if (pCanonic)
2479 {
2480 pCanonic->append(static_cast<sal_Int64>(nNumber));
2481 pCanonic->append( '.' );
2482 }
2483 ++nOctets;
2484 eState = STATE_IP4_DOT;
2485 }
2486 else
2487 eState = STATE_LABEL_DOT;
2488 else if (*p == '-')
2489 eState = STATE_LABEL_HYPHEN;
2490 else if (rtl::isAsciiAlpha(*p) || *p == '_')
2491 eState = STATE_LABEL;
2492 else if (rtl::isAsciiDigit(*p))
2493 if (nDigits < 3)
2494 {
2495 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2496 ++nDigits;
2497 }
2498 else
2499 eState = STATE_LABEL;
2500 else
2501 goto done;
2502 break;
2503
2504 case STATE_IP4_DOT:
2505 if (rtl::isAsciiAlpha(*p) || *p == '_')
2506 eState = STATE_TOPLABEL;
2507 else if (rtl::isAsciiDigit(*p))
2508 {
2509 nNumber = INetMIME::getWeight(*p);
2510 nDigits = 1;
2511 eState = STATE_IP4;
2512 }
2513 else
2514 goto done;
2515 break;
2516
2517 case STATE_IP6:
2518 if (*p == ':')
2519 eState = STATE_IP6_COLON;
2520 else if (rtl::isAsciiHexDigit(*p))
2521 {
2522 nNumber = INetMIME::getHexWeight(*p);
2523 nDigits = 1;
2524 eState = STATE_IP6_HEXSEQ1;
2525 }
2526 else
2527 goto done;
2528 break;
2529
2530 case STATE_IP6_COLON:
2531 if (*p == ':')
2532 {
2533 if (pCanonic)
2534 pCanonic->append("::");
2535 eState = STATE_IP6_2COLON;
2536 }
2537 else
2538 goto done;
2539 break;
2540
2541 case STATE_IP6_2COLON:
2542 if (*p == ']')
2543 eState = STATE_IP6_DONE;
2544 else if (*p == ':')
2545 {
2546 if (pCanonic)
2547 pCanonic->append(':');
2548 eState = STATE_IP6_3COLON;
2549 }
2550 else if (rtl::isAsciiDigit(*p))
2551 {
2552 nNumber = INetMIME::getWeight(*p);
2553 nDigits = 1;
2554 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2555 }
2556 else if (rtl::isAsciiHexDigit(*p))
2557 {
2558 nNumber = INetMIME::getHexWeight(*p);
2559 nDigits = 1;
2560 eState = STATE_IP6_HEXSEQ2;
2561 }
2562 else
2563 goto done;
2564 break;
2565
2566 case STATE_IP6_3COLON:
2567 if (rtl::isAsciiDigit(*p))
2568 {
2569 nNumber = INetMIME::getWeight(*p);
2570 nDigits = 1;
2571 nOctets = 1;
2572 eState = STATE_IP6_IP4;
2573 }
2574 else
2575 goto done;
2576 break;
2577
2578 case STATE_IP6_HEXSEQ1:
2579 if (*p == ']')
2580 {
2581 if (pCanonic)
2582 pCanonic->append(
2583 OUString::number(nNumber, 16));
2584 eState = STATE_IP6_DONE;
2585 }
2586 else if (*p == ':')
2587 {
2588 if (pCanonic)
2589 {
2590 pCanonic->append(
2591 OUString::number(nNumber, 16));
2592 pCanonic->append(':');
2593 }
2594 eState = STATE_IP6_HEXSEQ1_COLON;
2595 }
2596 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2597 {
2598 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2599 ++nDigits;
2600 }
2601 else
2602 goto done;
2603 break;
2604
2605 case STATE_IP6_HEXSEQ1_COLON:
2606 if (*p == ':')
2607 {
2608 if (pCanonic)
2609 pCanonic->append(':');
2610 eState = STATE_IP6_2COLON;
2611 }
2612 else if (rtl::isAsciiDigit(*p))
2613 {
2614 nNumber = INetMIME::getWeight(*p);
2615 nDigits = 1;
2616 eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2617 }
2618 else if (rtl::isAsciiHexDigit(*p))
2619 {
2620 nNumber = INetMIME::getHexWeight(*p);
2621 nDigits = 1;
2622 eState = STATE_IP6_HEXSEQ1;
2623 }
2624 else
2625 goto done;
2626 break;
2627
2628 case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2629 if (*p == ']')
2630 {
2631 if (pCanonic)
2632 pCanonic->append(
2633 OUString::number(nNumber, 16));
2634 eState = STATE_IP6_DONE;
2635 }
2636 else if (*p == ':')
2637 {
2638 if (pCanonic)
2639 {
2640 pCanonic->append(
2641 OUString::number(nNumber, 16));
2642 pCanonic->append(':');
2643 }
2644 eState = STATE_IP6_HEXSEQ1_COLON;
2645 }
2646 else if (*p == '.')
2647 {
2648 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2649 + (nNumber & 15);
2650 if (pCanonic)
2651 {
2652 pCanonic->append(
2653 OUString::number(nNumber));
2654 pCanonic->append('.');
2655 }
2656 nOctets = 2;
2657 eState = STATE_IP6_IP4_DOT;
2658 }
2659 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2660 {
2661 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2662 ++nDigits;
2663 }
2664 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2665 {
2666 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2667 ++nDigits;
2668 eState = STATE_IP6_HEXSEQ1;
2669 }
2670 else
2671 goto done;
2672 break;
2673
2674 case STATE_IP6_HEXSEQ2:
2675 if (*p == ']')
2676 {
2677 if (pCanonic)
2678 pCanonic->append(
2679 OUString::number(nNumber, 16));
2680 eState = STATE_IP6_DONE;
2681 }
2682 else if (*p == ':')
2683 {
2684 if (pCanonic)
2685 {
2686 pCanonic->append(
2687 OUString::number(nNumber, 16));
2688 pCanonic->append(':');
2689 }
2690 eState = STATE_IP6_HEXSEQ2_COLON;
2691 }
2692 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2693 {
2694 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2695 ++nDigits;
2696 }
2697 else
2698 goto done;
2699 break;
2700
2701 case STATE_IP6_HEXSEQ2_COLON:
2702 if (rtl::isAsciiDigit(*p))
2703 {
2704 nNumber = INetMIME::getWeight(*p);
2705 nDigits = 1;
2706 eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2707 }
2708 else if (rtl::isAsciiHexDigit(*p))
2709 {
2710 nNumber = INetMIME::getHexWeight(*p);
2711 nDigits = 1;
2712 eState = STATE_IP6_HEXSEQ2;
2713 }
2714 else
2715 goto done;
2716 break;
2717
2718 case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2719 if (*p == ']')
2720 {
2721 if (pCanonic)
2722 pCanonic->append(
2723 OUString::number(nNumber, 16));
2724 eState = STATE_IP6_DONE;
2725 }
2726 else if (*p == ':')
2727 {
2728 if (pCanonic)
2729 {
2730 pCanonic->append(
2731 OUString::number(nNumber, 16));
2732 pCanonic->append(':');
2733 }
2734 eState = STATE_IP6_HEXSEQ2_COLON;
2735 }
2736 else if (*p == '.')
2737 {
2738 nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2739 + (nNumber & 15);
2740 if (pCanonic)
2741 {
2742 pCanonic->append(
2743 OUString::number(nNumber));
2744 pCanonic->append('.');
2745 }
2746 nOctets = 2;
2747 eState = STATE_IP6_IP4_DOT;
2748 }
2749 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2750 {
2751 nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2752 ++nDigits;
2753 }
2754 else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2755 {
2756 nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2757 ++nDigits;
2758 eState = STATE_IP6_HEXSEQ2;
2759 }
2760 else
2761 goto done;
2762 break;
2763
2764 case STATE_IP6_IP4:
2765 if (*p == ']')
2766 if (nOctets == 4)
2767 {
2768 if (pCanonic)
2769 pCanonic->append(
2770 OUString::number(nNumber));
2771 eState = STATE_IP6_DONE;
2772 }
2773 else
2774 goto done;
2775 else if (*p == '.')
2776 if (nOctets < 4)
2777 {
2778 if (pCanonic)
2779 {
2780 pCanonic->append(
2781 OUString::number(nNumber));
2782 pCanonic->append('.');
2783 }
2784 ++nOctets;
2785 eState = STATE_IP6_IP4_DOT;
2786 }
2787 else
2788 goto done;
2789 else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2790 {
2791 nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2792 ++nDigits;
2793 }
2794 else
2795 goto done;
2796 break;
2797
2798 case STATE_IP6_IP4_DOT:
2799 if (rtl::isAsciiDigit(*p))
2800 {
2801 nNumber = INetMIME::getWeight(*p);
2802 nDigits = 1;
2803 eState = STATE_IP6_IP4;
2804 }
2805 else
2806 goto done;
2807 break;
2808
2809 case STATE_IP6_DONE:
2810 goto done;
2811 }
2812 done:
2813 switch (eState)
2814 {
2815 case STATE_LABEL:
2816 case STATE_TOPLABEL:
2817 case STATE_TOPLABEL_DOT:
2818 if (pCanonic)
2819 {
2820 pCanonic->setLength(nOriginalCanonicLength);
2821 pCanonic->append(rBegin, p - rBegin);
2822 }
2823 rBegin = p;
2824 return true;
2825
2826 case STATE_IP4:
2827 if (nOctets == 4)
2828 {
2829 if (pCanonic)
2830 pCanonic->append(
2831 OUString::number(nNumber));
2832 rBegin = p;
2833 return true;
2834 }
2835 if (pCanonic)
2836 pCanonic->setLength(nOriginalCanonicLength);
2837 return false;
2838
2839 case STATE_IP6_DONE:
2840 if (pCanonic)
2841 pCanonic->append(']');
2842 rBegin = p;
2843 return true;
2844
2845 default:
2846 if (pCanonic)
2847 pCanonic->setLength(nOriginalCanonicLength);
2848 return false;
2849 }
2850}
2851
2852// static
2854 sal_Unicode const * pBegin, sal_Unicode const * pEnd,
2855 EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2856 OUStringBuffer* pCanonic)
2857{
2858 if (pBegin >= pEnd)
2859 return true;
2860 sal_Int32 nOriginalCanonicLength = pCanonic ? pCanonic->getLength() : 0;
2861 if (sal_Unicode const* p = pBegin; parseHost(p, pEnd, pCanonic) && p == pEnd)
2862 return true;
2863 if (pCanonic)
2864 pCanonic->setLength(nOriginalCanonicLength); // discard parseHost results
2865 if (!bNetBiosName)
2866 return false;
2867 while (pBegin < pEnd)
2868 {
2869 EscapeType eEscapeType;
2870 switch (sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd, eMechanism, eCharset, eEscapeType))
2871 {
2872 default:
2873 if (INetMIME::isVisible(nUTF32))
2874 {
2875 if (pCanonic)
2876 appendUCS4(*pCanonic, nUTF32, eEscapeType, PART_URIC, eCharset, true);
2877 break;
2878 }
2879 [[fallthrough]];
2880 case '"':
2881 case '*':
2882 case '+':
2883 case ',':
2884 case '/':
2885 case ':':
2886 case ';':
2887 case '<':
2888 case '=':
2889 case '>':
2890 case '?':
2891 case '[':
2892 case '\\':
2893 case ']':
2894 case '`':
2895 case '|':
2896 if (pCanonic)
2897 pCanonic->setLength(nOriginalCanonicLength);
2898 return false;
2899 }
2900 }
2901 return true;
2902}
2903
2904bool INetURLObject::setHost(std::u16string_view rTheHost,
2905 rtl_TextEncoding eCharset)
2906{
2907 if (!getSchemeInfo().m_bHost)
2908 return false;
2909 OUStringBuffer aSynHost(rTheHost);
2910 bool bNetBiosName = false;
2911 switch (m_eScheme)
2912 {
2913 case INetProtocol::File:
2914 {
2915 if (OUString::unacquired(aSynHost).equalsIgnoreAsciiCase("localhost"))
2916 {
2917 aSynHost.setLength(0);
2918 }
2919 bNetBiosName = true;
2920 }
2921 break;
2922 case INetProtocol::Ldap:
2923 if (aSynHost.isEmpty() && m_aPort.isPresent())
2924 return false;
2925 break;
2926
2927 default:
2928 if (aSynHost.isEmpty())
2929 return false;
2930 break;
2931 }
2933 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2934 EncodeMechanism::WasEncoded, eCharset, bNetBiosName, &aSynHost))
2935 return false;
2936 sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost);
2937 m_aPort += nDelta;
2938 m_aPath += nDelta;
2939 m_aQuery += nDelta;
2940 m_aFragment += nDelta;
2941 return true;
2942}
2943
2944// static
2946 sal_Unicode const ** pBegin,
2947 sal_Unicode const * pEnd,
2948 EncodeMechanism eMechanism,
2949 rtl_TextEncoding eCharset,
2950 bool bSkippedInitialSlash,
2951 sal_uInt32 nSegmentDelimiter,
2952 sal_uInt32 nAltSegmentDelimiter,
2953 sal_uInt32 nQueryDelimiter,
2954 sal_uInt32 nFragmentDelimiter,
2955 OUStringBuffer &rSynPath)
2956{
2957 DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2958
2959 sal_Unicode const * pPos = *pBegin;
2960 const sal_Int32 nSynPathBeforeLen = rSynPath.getLength();
2961 switch (eScheme)
2962 {
2964 return false;
2965
2966 case INetProtocol::Ftp:
2967 if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2968 goto failed;
2969 while (pPos < pEnd && *pPos != nFragmentDelimiter)
2970 {
2971 EscapeType eEscapeType;
2972 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2973 eCharset, eEscapeType);
2974 appendUCS4(rSynPath, nUTF32, eEscapeType,
2975 PART_HTTP_PATH, eCharset, true);
2976 }
2977 if (rSynPath.getLength() - nSynPathBeforeLen == 0)
2978 rSynPath.append('/');
2979 break;
2980
2981 case INetProtocol::Http:
2984 case INetProtocol::Smb:
2985 case INetProtocol::Cmis:
2986 if (pPos < pEnd && *pPos != '/' && *pPos != nQueryDelimiter
2987 && *pPos != nFragmentDelimiter)
2988 goto failed;
2989 while (pPos < pEnd && *pPos != nQueryDelimiter
2990 && *pPos != nFragmentDelimiter)
2991 {
2992 EscapeType eEscapeType;
2993 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2994 eCharset, eEscapeType);
2995 appendUCS4(rSynPath, nUTF32, eEscapeType,
2996 PART_HTTP_PATH, eCharset, true);
2997 }
2998 if (rSynPath.getLength() - nSynPathBeforeLen == 0)
2999 rSynPath.append('/');
3000 break;
3001
3002 case INetProtocol::File:
3003 {
3004 if (bSkippedInitialSlash)
3005 rSynPath.append('/');
3006 else if (pPos < pEnd
3007 && *pPos != nSegmentDelimiter
3008 && *pPos != nAltSegmentDelimiter)
3009 goto failed;
3010 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3011 {
3012 EscapeType eEscapeType;
3013 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3014 eCharset, eEscapeType);
3015 if (eEscapeType == EscapeType::NONE)
3016 {
3017 if (nUTF32 == nSegmentDelimiter
3018 || nUTF32 == nAltSegmentDelimiter)
3019 {
3020 rSynPath.append('/');
3021 continue;
3022 }
3023 else if (nUTF32 == '|'
3024 && (pPos == pEnd
3025 || *pPos == nFragmentDelimiter
3026 || *pPos == nSegmentDelimiter
3027 || *pPos == nAltSegmentDelimiter)
3028 && rSynPath.getLength() - nSynPathBeforeLen == 2
3029 && rtl::isAsciiAlpha(rSynPath[nSynPathBeforeLen + 1]))
3030 {
3031 // A first segment of <ALPHA "|"> is translated to
3032 // <ALPHA ":">:
3033 rSynPath.append(':');
3034 continue;
3035 }
3036 }
3037 appendUCS4(rSynPath, nUTF32, eEscapeType,
3038 PART_PCHAR, eCharset, true);
3039 }
3040 if (rSynPath.getLength() - nSynPathBeforeLen == 0)
3041 rSynPath.append('/');
3042 break;
3043 }
3044
3046 while (pPos < pEnd && *pPos != nQueryDelimiter
3047 && *pPos != nFragmentDelimiter)
3048 {
3049 EscapeType eEscapeType;
3050 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3051 eCharset, eEscapeType);
3052 appendUCS4(rSynPath, nUTF32, eEscapeType,
3053 PART_MAILTO, eCharset, true);
3054 }
3055 break;
3056
3057
3059 case INetProtocol::Slot:
3060 case INetProtocol::Hid:
3062 case INetProtocol::Uno:
3064 case INetProtocol::Ldap:
3065 while (pPos < pEnd && *pPos != nQueryDelimiter
3066 && *pPos != nFragmentDelimiter)
3067 {
3068 EscapeType eEscapeType;
3069 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3070 eCharset, eEscapeType);
3071 appendUCS4(rSynPath, nUTF32, eEscapeType,
3072 PART_PATH_BEFORE_QUERY, eCharset, true);
3073 }
3074 break;
3075
3077 if (pPos == pEnd
3078 || *pPos == nQueryDelimiter
3079 || *pPos == nFragmentDelimiter)
3080 rSynPath.append('/');
3081 else
3082 {
3083 if (*pPos != '/')
3084 goto failed;
3085 while (pPos < pEnd && *pPos != nQueryDelimiter
3086 && *pPos != nFragmentDelimiter)
3087 {
3088 EscapeType eEscapeType;
3089 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
3090 eMechanism,
3091 eCharset, eEscapeType);
3092 appendUCS4(rSynPath, nUTF32, eEscapeType,
3093 PART_HTTP_PATH, eCharset, true);
3094 }
3095 }
3096 break;
3097
3099 case INetProtocol::Data:
3100 case INetProtocol::Cid:
3101 case INetProtocol::Db:
3102 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3103 {
3104 EscapeType eEscapeType;
3105 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3106 eCharset, eEscapeType);
3107 appendUCS4(rSynPath, nUTF32, eEscapeType,
3108 PART_URIC, eCharset, true);
3109 }
3110 break;
3111
3114 if (pPos < pEnd && *pPos != '/'
3115 && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3116 goto failed;
3117 while (pPos < pEnd && *pPos != nQueryDelimiter
3118 && *pPos != nFragmentDelimiter)
3119 {
3120 EscapeType eEscapeType;
3121 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3122 eCharset, eEscapeType);
3123 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3124 rSynPath.append('/');
3125 else
3126 appendUCS4(rSynPath, nUTF32, eEscapeType,
3127 PART_PCHAR, eCharset, false);
3128 }
3129 if (rSynPath.getLength() - nSynPathBeforeLen == 0)
3130 rSynPath.append('/');
3131 break;
3132
3135 {
3136 if (pPos == pEnd || *pPos == nFragmentDelimiter)
3137 goto failed;
3138 Part ePart = PART_URIC_NO_SLASH;
3139 while (pPos != pEnd && *pPos != nFragmentDelimiter)
3140 {
3141 EscapeType eEscapeType;
3142 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3143 eCharset, eEscapeType);
3144 appendUCS4(rSynPath, nUTF32, eEscapeType, ePart,
3145 eCharset, true);
3146 ePart = PART_URIC;
3147 }
3148 break;
3149 }
3150
3152 if (pPos < pEnd)
3153 {
3154 if (*pPos != '/' || pEnd - pPos > 1)
3155 goto failed;
3156 ++pPos;
3157 }
3158 rSynPath.append('/');
3159 break;
3160
3162 if (pPos == pEnd || *pPos != '/')
3163 goto failed;
3164 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3165 {
3166 EscapeType eEscapeType;
3167 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3168 eCharset, eEscapeType);
3169 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3170 rSynPath.append('/');
3171 else
3172 appendUCS4(rSynPath, nUTF32, eEscapeType,
3173 PART_PCHAR, eCharset, false);
3174 }
3175 break;
3176
3178 case INetProtocol::Sftp:
3179 while (pPos < pEnd && *pPos != nFragmentDelimiter)
3180 {
3181 EscapeType eEscapeType;
3182 sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3183 eCharset, eEscapeType);
3184 appendUCS4(rSynPath, nUTF32, eEscapeType,
3185 PART_URIC, eCharset, true);
3186 }
3187 if (rSynPath.isEmpty())
3188 goto failed;
3189 break;
3190 default:
3191 OSL_ASSERT(false);
3192 break;
3193 }
3194
3195 *pBegin = pPos;
3196 return true;
3197failed:
3198 rSynPath.setLength(nSynPathBeforeLen);
3199 return false;
3200}
3201
3202bool INetURLObject::setPath(std::u16string_view rThePath,
3203 EncodeMechanism eMechanism,
3204 rtl_TextEncoding eCharset)
3205{
3206 OUStringBuffer aSynPath(256);
3207 sal_Unicode const * p = rThePath.data();
3208 sal_Unicode const * pEnd = p + rThePath.size();
3209 if (!parsePath(m_eScheme, &p, pEnd, eMechanism, eCharset, false,
3210 '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3211 || p != pEnd)
3212 return false;
3213 sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath);
3214 m_aQuery += nDelta;
3215 m_aFragment += nDelta;
3216 return true;
3217}
3218
3221 OSL_FAIL(
3222 "INetURLObject::checkHierarchical vnd.sun.star.expand");
3223 return true;
3224 } else {
3226 }
3227}
3228
3229bool INetURLObject::Append(std::u16string_view rTheSegment,
3230 EncodeMechanism eMechanism,
3231 rtl_TextEncoding eCharset)
3232{
3233 return insertName(rTheSegment, false, LAST_SEGMENT, eMechanism, eCharset);
3234}
3235
3237 bool bIgnoreFinalSlash)
3238 const
3239{
3241 "INetURLObject::getSegment(): Bad index");
3242
3243 if (!checkHierarchical())
3244 return SubString();
3245
3246 sal_Unicode const * pPathBegin
3247 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3248 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3249 sal_Unicode const * pSegBegin;
3250 sal_Unicode const * pSegEnd;
3251 if (nIndex == LAST_SEGMENT)
3252 {
3253 pSegEnd = pPathEnd;
3254 if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3255 --pSegEnd;
3256 if (pSegEnd <= pPathBegin)
3257 return SubString();
3258 pSegBegin = pSegEnd - 1;
3259 while (pSegBegin > pPathBegin && *pSegBegin != '/')
3260 --pSegBegin;
3261 }
3262 else
3263 {
3264 pSegBegin = pPathBegin;
3265 while (nIndex-- > 0)
3266 do
3267 {
3268 ++pSegBegin;
3269 if (pSegBegin >= pPathEnd)
3270 return SubString();
3271 }
3272 while (*pSegBegin != '/');
3273 pSegEnd = pSegBegin + 1;
3274 while (pSegEnd < pPathEnd && *pSegEnd != '/')
3275 ++pSegEnd;
3276 }
3277
3278 return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3279 pSegEnd - pSegBegin);
3280}
3281
3282bool INetURLObject::insertName(std::u16string_view rTheName,
3283 bool bAppendFinalSlash, sal_Int32 nIndex,
3284 EncodeMechanism eMechanism,
3285 rtl_TextEncoding eCharset)
3286{
3288 "INetURLObject::insertName(): Bad index");
3289
3290 if (!checkHierarchical())
3291 return false;
3292
3293 sal_Unicode const * pPathBegin
3294 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3295 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3296 sal_Unicode const * pPrefixEnd;
3297 bool bInsertSlash;
3298 sal_Unicode const * pSuffixBegin;
3299 if (nIndex == LAST_SEGMENT)
3300 {
3301 pPrefixEnd = pPathEnd;
3302 if (pPrefixEnd > pPathBegin &&
3303 pPrefixEnd[-1] == '/')
3304 {
3305 --pPrefixEnd;
3306 }
3307 bInsertSlash = bAppendFinalSlash;
3308 pSuffixBegin = pPathEnd;
3309 }
3310 else if (nIndex == 0)
3311 {
3312 pPrefixEnd = pPathBegin;
3313 bInsertSlash =
3314 (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3315 (pPathBegin == pPathEnd && bAppendFinalSlash);
3316 pSuffixBegin =
3317 (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3318 !bAppendFinalSlash)
3319 ? pPathEnd : pPathBegin;
3320 }
3321 else
3322 {
3323 pPrefixEnd = pPathBegin;
3324 sal_Unicode const * pEnd = pPathEnd;
3325 if (pEnd > pPathBegin && pEnd[-1] == '/')
3326 --pEnd;
3327 bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3328 bInsertSlash = false;
3329 pSuffixBegin = pPathEnd;
3330 while (nIndex-- > 0)
3331 for (;;)
3332 {
3333 if (bSkip)
3334 ++pPrefixEnd;
3335 bSkip = true;
3336 if (pPrefixEnd >= pEnd)
3337 {
3338 if (nIndex == 0)
3339 {
3340 bInsertSlash = bAppendFinalSlash;
3341 break;
3342 }
3343 else
3344 return false;
3345 }
3346 if (*pPrefixEnd == '/')
3347 {
3348 pSuffixBegin = pPrefixEnd;
3349 break;
3350 }
3351 }
3352 }
3353
3354 OUStringBuffer aNewPath(256);
3355 aNewPath.append(
3356 OUString::Concat(std::u16string_view(pPathBegin, pPrefixEnd - pPathBegin))
3357 + "/");
3358 encodeText(aNewPath, rTheName, PART_PCHAR,
3359 eMechanism, eCharset, true);
3360 if (bInsertSlash) {
3361 aNewPath.append('/');
3362 }
3363 aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3364
3365 return setPath(aNewPath, EncodeMechanism::NotCanonical,
3366 RTL_TEXTENCODING_UTF8);
3367}
3368
3370{
3371 if (HasError())
3372 return;
3373 if (m_aQuery.isPresent())
3374 {
3375 lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3376 m_aQuery.getLength() + 1);
3377 m_aFragment += m_aQuery.clear() - 1;
3378 }
3379}
3380
3381bool INetURLObject::setQuery(std::u16string_view rTheQuery,
3382 EncodeMechanism eMechanism,
3383 rtl_TextEncoding eCharset)
3384{
3385 if (!getSchemeInfo().m_bQuery)
3386 return false;
3387 OUStringBuffer aNewQuery;
3388 encodeText(aNewQuery, rTheQuery, PART_URIC,
3389 eMechanism, eCharset, true);
3390 sal_Int32 nDelta;
3391 if (m_aQuery.isPresent())
3392 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3393 else
3394 {
3395 m_aAbsURIRef.insert(m_aPath.getEnd(), u'?');
3396 nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3397 + 1;
3398 }
3399 m_aFragment += nDelta;
3400 return true;
3401}
3402
3404{
3405 if (HasError())
3406 return false;
3407 if (m_aFragment.isPresent())
3408 {
3409 m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3411 }
3412 return true;
3413}
3414
3415bool INetURLObject::setFragment(std::u16string_view rTheFragment,
3416 EncodeMechanism eMechanism,
3417 rtl_TextEncoding eCharset)
3418{
3419 if (HasError())
3420 return false;
3421 OUStringBuffer aNewFragment;
3422 encodeText(aNewFragment, rTheFragment, PART_URIC,
3423 eMechanism, eCharset, true);
3424 if (m_aFragment.isPresent())
3425 m_aFragment.set(m_aAbsURIRef, aNewFragment);
3426 else
3427 {
3428 m_aAbsURIRef.append('#');
3429 m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3430 }
3431 return true;
3432}
3433
3435{
3436 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3437 return (eStyle & FSysStyle::Dos)
3438 && m_aPath.getLength() >= 3
3439 && p[0] == '/'
3440 && rtl::isAsciiAlpha(p[1])
3441 && p[2] == ':'
3442 && (m_aPath.getLength() == 3 || p[3] == '/');
3443}
3444
3445// static
3446void INetURLObject::encodeText( OUStringBuffer& rOutputBuffer,
3447 sal_Unicode const * pBegin,
3448 sal_Unicode const * pEnd,
3449 Part ePart, EncodeMechanism eMechanism,
3450 rtl_TextEncoding eCharset,
3451 bool bKeepVisibleEscapes)
3452{
3453 while (pBegin < pEnd)
3454 {
3455 EscapeType eEscapeType;
3456 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3457 eMechanism, eCharset, eEscapeType);
3458 appendUCS4(rOutputBuffer, nUTF32, eEscapeType, ePart,
3459 eCharset, bKeepVisibleEscapes);
3460 }
3461}
3462
3463// static
3464OUString INetURLObject::decode(sal_Unicode const * pBegin,
3465 sal_Unicode const * pEnd,
3466 DecodeMechanism eMechanism,
3467 rtl_TextEncoding eCharset)
3468{
3469 switch (eMechanism)
3470 {
3472 return OUString(pBegin, pEnd - pBegin);
3473
3475 eCharset = RTL_TEXTENCODING_UTF8;
3476 break;
3477
3478 default:
3479 break;
3480 }
3481 OUStringBuffer aResult(static_cast<int>(pEnd-pBegin));
3482 while (pBegin < pEnd)
3483 {
3484 EscapeType eEscapeType;
3485 sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3486 EncodeMechanism::WasEncoded, eCharset, eEscapeType);
3487 switch (eEscapeType)
3488 {
3489 case EscapeType::NONE:
3490 aResult.appendUtf32(nUTF32);
3491 break;
3492
3493 case EscapeType::Octet:
3494 appendEscape(aResult, nUTF32);
3495 break;
3496
3497 case EscapeType::Utf32:
3498 if (
3499 rtl::isAscii(nUTF32) &&
3500 (
3501 eMechanism == DecodeMechanism::ToIUri ||
3502 (
3503 eMechanism == DecodeMechanism::Unambiguous &&
3504 mustEncode(nUTF32, PART_UNAMBIGUOUS)
3505 )
3506 )
3507 )
3508 {
3509 appendEscape(aResult, nUTF32);
3510 }
3511 else
3512 aResult.appendUtf32(nUTF32);
3513 break;
3514 }
3515 }
3516 return aResult.makeStringAndClear();
3517}
3518
3520 rtl_TextEncoding eCharset) const
3521{
3522 INetURLObject aTemp(*this);
3523 aTemp.clearPassword();
3524 return aTemp.GetMainURL(eMechanism, eCharset);
3525}
3526
3528 rtl_TextEncoding eCharset) const
3529{
3530 INetURLObject aTemp(*this);
3531 aTemp.clearFragment();
3532 return aTemp.GetMainURL(eMechanism, eCharset);
3533}
3534
3535OUString
3537 uno::Reference< util::XStringWidth > const & rStringWidth,
3538 sal_Int32 nWidth,
3539 DecodeMechanism eMechanism,
3540 rtl_TextEncoding eCharset)
3541 const
3542{
3543 OSL_ENSURE(rStringWidth.is(), "specification violation");
3544 OUStringBuffer aBuffer;
3545 // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3546 // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3548 {
3549 aBuffer.append(getSchemeInfo().m_sScheme.asView());
3550 }
3551 else
3552 {
3553 if (!m_aAbsURIRef.isEmpty())
3554 {
3555 sal_Unicode const * pSchemeBegin
3556 = m_aAbsURIRef.getStr();
3557 sal_Unicode const * pSchemeEnd = pSchemeBegin;
3558
3559 while (pSchemeEnd[0] != ':')
3560 {
3561 ++pSchemeEnd;
3562 }
3563 aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3564 }
3565 }
3566 aBuffer.append(':');
3567 bool bAuthority = getSchemeInfo().m_bAuthority;
3568 sal_Unicode const * pCoreBegin
3569 = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3570 m_aPath.getBegin());
3571 sal_Unicode const * pCoreEnd
3572 = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3573 bool bSegment = false;
3575 {
3576 OUString aRest;
3577 if (m_aQuery.isPresent())
3578 aRest = "?...";
3579 else if (m_aFragment.isPresent())
3580 aRest = "#...";
3581 OUStringBuffer aTrailer;
3582 sal_Unicode const * pBegin = pCoreBegin;
3583 sal_Unicode const * pEnd = pCoreEnd;
3584 sal_Unicode const * pPrefixBegin = pBegin;
3585 sal_Unicode const * pSuffixEnd = pEnd;
3586 bool bPrefix = true;
3587 bool bSuffix = true;
3588 do
3589 {
3590 if (bSuffix)
3591 {
3592 sal_Unicode const * p = pSuffixEnd - 1;
3593 if (pSuffixEnd == pCoreEnd && *p == '/')
3594 --p;
3595 while (*p != '/')
3596 --p;
3597 if (bAuthority && p == pCoreBegin + 1)
3598 --p;
3599 OUString
3600 aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3601 1 : 0),
3602 pSuffixEnd,
3603 eMechanism,
3604 eCharset));
3605 pSuffixEnd = p;
3606 OUStringBuffer aResult(aBuffer);
3607 if (pSuffixEnd != pBegin)
3608 aResult.append("...");
3609 aResult.append(aSegment + aTrailer + aRest);
3610 if (rStringWidth->
3611 queryStringWidth(aResult.makeStringAndClear())
3612 <= nWidth)
3613 {
3614 aTrailer.insert(0, aSegment);
3615 bSegment = true;
3616 pEnd = pSuffixEnd;
3617 }
3618 else
3619 bSuffix = false;
3620 if (pPrefixBegin > pSuffixEnd)
3621 pPrefixBegin = pSuffixEnd;
3622 if (pBegin == pEnd)
3623 break;
3624 }
3625 if (bPrefix)
3626 {
3627 sal_Unicode const * p
3628 = pPrefixBegin
3629 + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3630 1);
3631 OSL_ASSERT(p <= pEnd);
3632 while (p < pEnd && *p != '/')
3633 ++p;
3634 if (p == pCoreEnd - 1 && *p == '/')
3635 ++p;
3636 OUString
3637 aSegment(decode(pPrefixBegin
3638 + (pPrefixBegin == pCoreBegin ? 0 :
3639 1),
3640 p == pEnd ? p : p + 1,
3641 eMechanism,
3642 eCharset));
3643 pPrefixBegin = p;
3644 OUStringBuffer aResult(aBuffer + aSegment);
3645 if (pPrefixBegin != pEnd)
3646 aResult.append("...");
3647 aResult.append(aTrailer + aRest);
3648 if (rStringWidth->
3649 queryStringWidth(aResult.makeStringAndClear())
3650 <= nWidth)
3651 {
3652 aBuffer.append(aSegment);
3653 bSegment = true;
3654 pBegin = pPrefixBegin;
3655 }
3656 else
3657 bPrefix = false;
3658 if (pPrefixBegin > pSuffixEnd)
3659 pSuffixEnd = pPrefixBegin;
3660 if (pBegin == pEnd)
3661 break;
3662 }
3663 }
3664 while (bPrefix || bSuffix);
3665 if (bSegment)
3666 {
3667 if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3668 aBuffer.append("...");
3669 aBuffer.append(aTrailer);
3670 }
3671 }
3672 if (!bSegment)
3673 aBuffer.append(decode(pCoreBegin,
3674 pCoreEnd,
3675 eMechanism,
3676 eCharset));
3677 if (m_aQuery.isPresent())
3678 {
3679 aBuffer.append("?" + decode(m_aQuery, eMechanism, eCharset));
3680 }
3681 if (m_aFragment.isPresent())
3682 {
3683 aBuffer.append("#" + decode(m_aFragment, eMechanism, eCharset));
3684 }
3685 if (!aBuffer.isEmpty())
3686 {
3687 OUStringBuffer aResult(aBuffer);
3688 if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3689 > nWidth)
3690 for (sal_Int32 i = aBuffer.getLength();;)
3691 {
3692 if (i == 0)
3693 {
3694 aBuffer.setLength(aBuffer.getLength() - 1);
3695 if (aBuffer.isEmpty())
3696 break;
3697 }
3698 else
3699 {
3700 aBuffer.setLength(--i);
3701 aBuffer.append("...");
3702 }
3703 aResult = aBuffer;
3704 if (rStringWidth->
3705 queryStringWidth(aResult.makeStringAndClear())
3706 <= nWidth)
3707 break;
3708 }
3709 }
3710 return aBuffer.makeStringAndClear();
3711}
3712
3714{
3715 if (m_eScheme != rObject.m_eScheme)
3716 return false;
3718 return std::u16string_view(m_aAbsURIRef) == std::u16string_view(rObject.m_aAbsURIRef);
3719 if ((m_aScheme.compare(
3720 rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
3721 != 0)
3726 || GetPort() != rObject.GetPort()
3727 || HasParam() != rObject.HasParam()
3728 || GetParam() != rObject.GetParam())
3729 return false;
3730 OUString aPath1(GetURLPath(DecodeMechanism::NONE));
3731 OUString aPath2(rObject.GetURLPath(DecodeMechanism::NONE));
3732 switch (m_eScheme)
3733 {
3734 case INetProtocol::File:
3735 {
3736 // If the URL paths of two file URLs only differ in that one has a
3737 // final '/' and the other has not, take the two paths as
3738 // equivalent (this could be useful for other schemes, too):
3739 sal_Int32 nLength = aPath1.getLength();
3740 switch (nLength - aPath2.getLength())
3741 {
3742 case -1:
3743 if (aPath2[nLength] != '/')
3744 return false;
3745 break;
3746
3747 case 0:
3748 break;
3749
3750 case 1:
3751 if (aPath1[--nLength] != '/')
3752 return false;
3753 break;
3754
3755 default:
3756 return false;
3757 }
3758 return aPath1.compareTo(aPath2, nLength) == 0;
3759 }
3760
3761 default:
3762 return aPath1 == aPath2;
3763 }
3764}
3765
3767 std::u16string_view rTheUser,
3768 std::u16string_view rThePassword,
3769 std::u16string_view rTheHost,
3770 sal_uInt32 nThePort,
3771 std::u16string_view rThePath)
3772{
3773 setInvalid();
3774 m_eScheme = eTheScheme;
3776 return false;
3777 m_aAbsURIRef.setLength(0);
3778 m_aAbsURIRef.append(getSchemeInfo().m_sScheme.asView());
3779 m_aAbsURIRef.append(':');
3780 if (getSchemeInfo().m_bAuthority)
3781 {
3782 m_aAbsURIRef.append("//");
3783 bool bUserInfo = false;
3784 if (getSchemeInfo().m_bUser)
3785 {
3786 if (!rTheUser.empty())
3787 {
3788 OUStringBuffer aNewUser;
3789 encodeText(aNewUser, rTheUser, PART_USER_PASSWORD,
3790 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false);
3791 m_aUser.set(m_aAbsURIRef, aNewUser, m_aAbsURIRef.getLength());
3792 bUserInfo = true;
3793 }
3794 }
3795 else if (!rTheUser.empty())
3796 {
3797 setInvalid();
3798 return false;
3799 }
3800 if (!rThePassword.empty())
3801 {
3802 if (getSchemeInfo().m_bPassword)
3803 {
3804 m_aAbsURIRef.append(':');
3805 OUStringBuffer aNewAuth;
3806 encodeText(aNewAuth, rThePassword, PART_USER_PASSWORD,
3807 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false);
3808 m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aAbsURIRef.getLength());
3809 bUserInfo = true;
3810 }
3811 else
3812 {
3813 setInvalid();
3814 return false;
3815 }
3816 }
3817 if (bUserInfo && getSchemeInfo().m_bHost)
3818 m_aAbsURIRef.append('@');
3819 if (getSchemeInfo().m_bHost)
3820 {
3821 OUStringBuffer aSynHost(rTheHost);
3822 bool bNetBiosName = false;
3823 switch (m_eScheme)
3824 {
3825 case INetProtocol::File:
3826 {
3827 if (OUString::unacquired(aSynHost).equalsIgnoreAsciiCase( "localhost" ))
3828 {
3829 aSynHost.setLength(0);
3830 }
3831 bNetBiosName = true;
3832 }
3833 break;
3834
3835 case INetProtocol::Ldap:
3836 if (aSynHost.isEmpty() && nThePort != 0)
3837 {
3838 setInvalid();
3839 return false;
3840 }
3841 break;
3842
3843 default:
3844 if (aSynHost.isEmpty())
3845 {
3846 setInvalid();
3847 return false;
3848 }
3849 break;
3850 }
3852 aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
3853 EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, bNetBiosName, &aSynHost))
3854 {
3855 setInvalid();
3856 return false;
3857 }
3858 m_aHost.set(m_aAbsURIRef, aSynHost, m_aAbsURIRef.getLength());
3859 if (nThePort != 0)
3860 {
3861 if (getSchemeInfo().m_bPort)
3862 {
3863 m_aAbsURIRef.append(':');
3865 OUString::number(nThePort),
3866 m_aAbsURIRef.getLength());
3867 }
3868 else
3869 {
3870 setInvalid();
3871 return false;
3872 }
3873 }
3874 }
3875 else if (!rTheHost.empty() || nThePort != 0)
3876 {
3877 setInvalid();
3878 return false;
3879 }
3880 }
3881 OUStringBuffer aSynPath(256);
3882 sal_Unicode const * p = rThePath.data();
3883 sal_Unicode const * pEnd = p + rThePath.size();
3884 if (!parsePath(m_eScheme, &p, pEnd, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false, '/',
3885 0x80000000, 0x80000000, 0x80000000, aSynPath)
3886 || p != pEnd)
3887 {
3888 setInvalid();
3889 return false;
3890 }
3891 m_aPath.set(m_aAbsURIRef, aSynPath, m_aAbsURIRef.getLength());
3892 return true;
3893}
3894
3895// static
3896OUString INetURLObject::GetAbsURL(std::u16string_view rTheBaseURIRef,
3897 OUString const & rTheRelURIRef,
3898 EncodeMechanism eEncodeMechanism,
3899 DecodeMechanism eDecodeMechanism,
3900 rtl_TextEncoding eCharset)
3901{
3902 // Backwards compatibility:
3903 if (rTheRelURIRef.isEmpty() || rTheRelURIRef[0] == '#')
3904 return rTheRelURIRef;
3905
3906 INetURLObject aTheAbsURIRef;
3907 bool bWasAbsolute;
3908 return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
3909 convertRelToAbs(rTheRelURIRef, aTheAbsURIRef,
3910 bWasAbsolute, eEncodeMechanism,
3911 eCharset, false, false,
3912 false, FSysStyle::Detect)
3913 || eEncodeMechanism != EncodeMechanism::WasEncoded
3914 || eDecodeMechanism != DecodeMechanism::ToIUri
3915 || eCharset != RTL_TEXTENCODING_UTF8 ?
3916 aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
3917 rTheRelURIRef;
3918}
3919
3921{
3922 OUString aTheExtURIRef;
3924 m_aAbsURIRef, aTheExtURIRef);
3925 return aTheExtURIRef;
3926}
3927
3928bool INetURLObject::isSchemeEqualTo(std::u16string_view scheme) const {
3929 return m_aScheme.isPresent()
3930 && (rtl_ustr_compareIgnoreAsciiCase_WithLength(
3931 scheme.data(), scheme.size(),
3932 m_aAbsURIRef.getStr() + m_aScheme.getBegin(),
3934 == 0);
3935}
3936
3938 return ( isSchemeEqualTo( INetProtocol::Http ) ||
3941 isSchemeEqualTo( u"vnd.sun.star.webdavs" ) ||
3942 isSchemeEqualTo( u"webdav" ) ||
3943 isSchemeEqualTo( u"webdavs" ));
3944}
3945
3946// static
3948{
3949 return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
3950}
3951
3952// static
3953const OUString & INetURLObject::GetSchemeName(INetProtocol eTheScheme)
3954{
3955 return getSchemeInfo(eTheScheme).m_sScheme;
3956}
3957
3958// static
3959INetProtocol INetURLObject::CompareProtocolScheme(std::u16string_view aTheAbsURIRef)
3960{
3961 sal_Unicode const * p = aTheAbsURIRef.data();
3962 PrefixInfo const * pPrefix = getPrefix(p, p + aTheAbsURIRef.size());
3963 return pPrefix ? pPrefix->m_eScheme : INetProtocol::NotValid;
3964}
3965
3967 rtl_TextEncoding eCharset) const
3968{
3969 // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
3970 // PROT_VND_SUN_STAR_PKG misuse m_aHost:
3971 if (!getSchemeInfo().m_bHost)
3972 return OUString();
3973 OUStringBuffer aHostPort(decode(m_aHost, eMechanism, eCharset));
3974 if (m_aPort.isPresent())
3975 {
3976 aHostPort.append(":" + decode(m_aPort, eMechanism, eCharset));
3977 }
3978 return aHostPort.makeStringAndClear();
3979}
3980
3981sal_uInt32 INetURLObject::GetPort() const
3982{
3983 if (m_aPort.isPresent())
3984 {
3985 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
3986 sal_Unicode const * pEnd = p + m_aPort.getLength();
3987 sal_uInt32 nThePort;
3988 if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
3989 return nThePort;
3990 }
3991 return 0;
3992}
3993
3994bool INetURLObject::SetPort(sal_uInt32 nThePort)
3995{
3996 if (getSchemeInfo().m_bPort && m_aHost.isPresent())
3997 {
3998 sal_Int32 nDelta;
3999 if (m_aPort.isPresent())
4000 nDelta = m_aPort.set(m_aAbsURIRef, OUString::number(nThePort));
4001 else
4002 {
4003 m_aAbsURIRef.insert(m_aHost.getEnd(), u':');
4004 nDelta = m_aPort.set(m_aAbsURIRef, OUString::number(nThePort), m_aHost.getEnd() + 1)
4005 + 1;
4006 }
4007 m_aPath += nDelta;
4008 m_aQuery += nDelta;
4009 m_aFragment += nDelta;
4010 return true;
4011 }
4012 return false;
4013}
4014
4015sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
4016{
4017 if (!checkHierarchical())
4018 return 0;
4019
4020 sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4021 sal_Unicode const * pEnd = p + m_aPath.getLength();
4022 if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
4023 --pEnd;
4024 sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
4025 while (p != pEnd)
4026 if (*p++ == '/')
4027 ++n;
4028 return n;
4029}
4030
4031bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4032{
4033 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4034 if (!aSegment.isPresent())
4035 return false;
4036
4037 OUStringBuffer aNewPath(m_aPath.getLength());
4038 aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
4039 aSegment.getBegin() - m_aPath.getBegin());
4040 if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
4041 aNewPath.append('/');
4042 else
4043 aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
4044 m_aPath.getEnd() - aSegment.getEnd());
4045 if (aNewPath.isEmpty() && !aSegment.isEmpty() &&
4046 m_aAbsURIRef[aSegment.getBegin()] == '/')
4047 {
4048 aNewPath.append('/');
4049 }
4050
4051 return setPath(aNewPath, EncodeMechanism::NotCanonical,
4052 RTL_TEXTENCODING_UTF8);
4053}
4054
4055OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4056 DecodeMechanism eMechanism,
4057 rtl_TextEncoding eCharset) const
4058{
4059 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4060 if (!aSegment.isPresent())
4061 return OUString();
4062
4063 sal_Unicode const * pSegBegin
4064 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4065 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4066
4067 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4068 ++pSegBegin;
4069 sal_Unicode const * p = pSegBegin;
4070 while (p != pSegEnd && *p != ';')
4071 ++p;
4072
4073 return decode(pSegBegin, p, eMechanism, eCharset);
4074}
4075
4076bool INetURLObject::setName(std::u16string_view rTheName, EncodeMechanism eMechanism,
4077 rtl_TextEncoding eCharset)
4078{
4079 SubString aSegment(getSegment(LAST_SEGMENT, true));
4080 if (!aSegment.isPresent())
4081 return false;
4082
4083 sal_Unicode const * pPathBegin
4084 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4085 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4086 sal_Unicode const * pSegBegin
4087 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4088 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4089
4090 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4091 ++pSegBegin;
4092 sal_Unicode const * p = pSegBegin;
4093 while (p != pSegEnd && *p != ';')
4094 ++p;
4095
4096 OUStringBuffer aNewPath(256);
4097 aNewPath.append(std::u16string_view(pPathBegin, pSegBegin - pPathBegin));
4098 encodeText(aNewPath, rTheName, PART_PCHAR, eMechanism, eCharset, true);
4099 aNewPath.append(std::u16string_view(p, pPathEnd - p));
4100 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4101}
4102
4104 const
4105{
4106 SubString aSegment(getSegment(LAST_SEGMENT, true/*bIgnoreFinalSlash*/));
4107 if (!aSegment.isPresent())
4108 return false;
4109
4110 sal_Unicode const * pSegBegin
4111 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4112 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4113
4114 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4115 ++pSegBegin;
4116 for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4117 if (*p == '.' && p != pSegBegin)
4118 return true;
4119 return false;
4120}
4121
4122OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4123 DecodeMechanism eMechanism,
4124 rtl_TextEncoding eCharset) const
4125{
4126 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4127 if (!aSegment.isPresent())
4128 return OUString();
4129
4130 sal_Unicode const * pSegBegin
4131 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4132 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4133
4134 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4135 ++pSegBegin;
4136 sal_Unicode const * pExtension = nullptr;
4137 sal_Unicode const * p = pSegBegin;
4138 for (; p != pSegEnd && *p != ';'; ++p)
4139 if (*p == '.' && p != pSegBegin)
4140 pExtension = p;
4141 if (!pExtension)
4142 pExtension = p;
4143
4144 return decode(pSegBegin, pExtension, eMechanism, eCharset);
4145}
4146
4147bool INetURLObject::setBase(std::u16string_view rTheBase, sal_Int32 nIndex,
4148 EncodeMechanism eMechanism,
4149 rtl_TextEncoding eCharset)
4150{
4151 SubString aSegment(getSegment(nIndex, true/*bIgnoreFinalSlash*/));
4152 if (!aSegment.isPresent())
4153 return false;
4154
4155 sal_Unicode const * pPathBegin
4156 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4157 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4158 sal_Unicode const * pSegBegin
4159 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4160 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4161
4162 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4163 ++pSegBegin;
4164 sal_Unicode const * pExtension = nullptr;
4165 sal_Unicode const * p = pSegBegin;
4166 for (; p != pSegEnd && *p != ';'; ++p)
4167 if (*p == '.' && p != pSegBegin)
4168 pExtension = p;
4169 if (!pExtension)
4170 pExtension = p;
4171
4172 OUStringBuffer aNewPath(256);
4173 aNewPath.append(std::u16string_view(pPathBegin, pSegBegin - pPathBegin));
4174 encodeText(aNewPath, rTheBase, PART_PCHAR, eMechanism, eCharset, true);
4175 aNewPath.append(std::u16string_view(pExtension, pPathEnd - pExtension));
4176 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4177}
4178
4179OUString INetURLObject::getExtension(sal_Int32 nIndex,
4180 bool bIgnoreFinalSlash,
4181 DecodeMechanism eMechanism,
4182 rtl_TextEncoding eCharset) const
4183{
4184 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4185 if (!aSegment.isPresent())
4186 return OUString();
4187
4188 sal_Unicode const * pSegBegin
4189 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4190 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4191
4192 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4193 ++pSegBegin;
4194 sal_Unicode const * pExtension = nullptr;
4195 sal_Unicode const * p = pSegBegin;
4196 for (; p != pSegEnd && *p != ';'; ++p)
4197 if (*p == '.' && p != pSegBegin)
4198 pExtension = p;
4199
4200 if (!pExtension)
4201 return OUString();
4202
4203 return decode(pExtension + 1, p, eMechanism, eCharset);
4204}
4205
4206bool INetURLObject::setExtension(std::u16string_view rTheExtension,
4207 sal_Int32 nIndex, bool bIgnoreFinalSlash,
4208 rtl_TextEncoding eCharset)
4209{
4210 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4211 if (!aSegment.isPresent())
4212 return false;
4213
4214 sal_Unicode const * pPathBegin
4215 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4216 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4217 sal_Unicode const * pSegBegin
4218 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4219 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4220
4221 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4222 ++pSegBegin;
4223 sal_Unicode const * pExtension = nullptr;
4224 sal_Unicode const * p = pSegBegin;
4225 for (; p != pSegEnd && *p != ';'; ++p)
4226 if (*p == '.' && p != pSegBegin)
4227 pExtension = p;
4228 if (!pExtension)
4229 pExtension = p;
4230
4231 OUStringBuffer aNewPath(256);
4232 aNewPath.append(OUString::Concat(std::u16string_view(pPathBegin, pExtension - pPathBegin)) + ".");
4233 encodeText(aNewPath, rTheExtension, PART_PCHAR, EncodeMechanism::WasEncoded, eCharset, true);
4234 aNewPath.append(std::u16string_view(p, pPathEnd - p));
4235 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4236}
4237
4238bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4239{
4240 SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4241 if (!aSegment.isPresent())
4242 return false;
4243
4244 sal_Unicode const * pPathBegin
4245 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4246 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4247 sal_Unicode const * pSegBegin
4248 = m_aAbsURIRef.getStr() + aSegment.getBegin();
4249 sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4250
4251 if (pSegBegin < pSegEnd && *pSegBegin == '/')
4252 ++pSegBegin;
4253 sal_Unicode const * pExtension = nullptr;
4254 sal_Unicode const * p = pSegBegin;
4255 for (; p != pSegEnd && *p != ';'; ++p)
4256 if (*p == '.' && p != pSegBegin)
4257 pExtension = p;
4258 if (!pExtension)
4259 return true;
4260
4261 OUString aNewPath =
4262 OUString::Concat(std::u16string_view(pPathBegin, pExtension - pPathBegin)) +
4263 std::u16string_view(p, pPathEnd - p);
4264
4265 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4266}
4267
4269{
4270 if (!checkHierarchical())
4271 return false;
4272
4273 sal_Unicode const * pPathBegin
4274 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4275 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4276 return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4277}
4278
4280{
4281 if (!checkHierarchical())
4282 return false;
4283
4284 sal_Unicode const * pPathBegin
4285 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4286 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4287 if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4288 return true;
4289
4290 OUString aNewPath
4291 = OUString::Concat(std::u16string_view(pPathBegin, pPathEnd - pPathBegin)) + "/";
4292
4293 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4294}
4295
4297{
4298 if (!checkHierarchical())
4299 return false;
4300
4301 sal_Unicode const * pPathBegin
4302 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4303 sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4304 if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4305 return true;
4306
4307 --pPathEnd;
4308 if (pPathEnd == pPathBegin && *pPathBegin == '/')
4309 return false;
4310 OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4311
4312 return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4313}
4314
4316 sal_Unicode * pDelimiter) const
4317{
4319 return OUString();
4320
4321 if (((eStyle & FSysStyle::Vos) ? 1 : 0)
4322 + ((eStyle & FSysStyle::Unix) ? 1 : 0)
4323 + ((eStyle & FSysStyle::Dos) ? 1 : 0)
4324 > 1)
4325 {
4326 if(eStyle & FSysStyle::Vos && m_aHost.isPresent() && m_aHost.getLength() > 0)
4327 {
4328 eStyle= FSysStyle::Vos;
4329 }
4330 else
4331 {
4332 if(hasDosVolume(eStyle) || ((eStyle & FSysStyle::Dos) && m_aHost.isPresent() && m_aHost.getLength() > 0))
4333 {
4334 eStyle = FSysStyle::Dos;
4335 }
4336 else
4337 {
4338 if(eStyle & FSysStyle::Unix && (!m_aHost.isPresent() || m_aHost.getLength() == 0))
4339 {
4340 eStyle = FSysStyle::Unix;
4341 }
4342 else
4343 {
4344 eStyle= FSysStyle(0);
4345 }
4346 }
4347 }
4348 }
4349
4350 switch (eStyle)
4351 {
4352 case FSysStyle::Vos:
4353 {
4354 if (pDelimiter)
4355 *pDelimiter = '/';
4356
4357 OUStringBuffer aSynFSysPath("//");
4358 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4359 aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset,
4360 RTL_TEXTENCODING_UTF8));
4361 else
4362 aSynFSysPath.append('.');
4363 aSynFSysPath.append(decode(m_aPath, DecodeMechanism::WithCharset,
4364 RTL_TEXTENCODING_UTF8));
4365 return aSynFSysPath.makeStringAndClear();
4366 }
4367
4368 case FSysStyle::Unix:
4369 {
4370 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4371 return OUString();
4372
4373 if (pDelimiter)
4374 *pDelimiter = '/';
4375
4376 return decode(m_aPath, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8);
4377 }
4378
4379 case FSysStyle::Dos:
4380 {
4381 if (pDelimiter)
4382 *pDelimiter = '\\';
4383
4384 OUStringBuffer aSynFSysPath(64);
4385 if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4386 {
4387 aSynFSysPath.append("\\\\"
4388 + decode(m_aHost, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8)
4389 + "\\");
4390 }
4391 sal_Unicode const * p
4392 = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4393 sal_Unicode const * pEnd = p + m_aPath.getLength();
4394 DBG_ASSERT(p < pEnd && *p == '/',
4395 "INetURLObject::getFSysPath(): Bad path");
4396 ++p;
4397 while (p < pEnd)
4398 {
4399 EscapeType eEscapeType;
4400 sal_uInt32 nUTF32 = getUTF32(p, pEnd, EncodeMechanism::WasEncoded,
4401 RTL_TEXTENCODING_UTF8,
4402 eEscapeType);
4403 if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
4404 aSynFSysPath.append('\\');
4405 else
4406 aSynFSysPath.appendUtf32(nUTF32);
4407 }
4408 return aSynFSysPath.makeStringAndClear();
4409 }
4410
4411 default:
4412 return OUString();
4413 }
4414}
4415
4416// static
4417void INetURLObject::appendUCS4Escape(OUStringBuffer & rTheText,
4418 sal_uInt32 nUCS4)
4419{
4420 DBG_ASSERT(nUCS4 < 0x80000000,
4421 "INetURLObject::appendUCS4Escape(): Bad char");
4422 if (nUCS4 < 0x80)
4423 appendEscape(rTheText, nUCS4);
4424 else if (nUCS4 < 0x800)
4425 {
4426 appendEscape(rTheText, nUCS4 >> 6 | 0xC0);
4427 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4428 }
4429 else if (nUCS4 < 0x10000)
4430 {
4431 appendEscape(rTheText, nUCS4 >> 12 | 0xE0);
4432 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4433 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4434 }
4435 else if (nUCS4 < 0x200000)
4436 {
4437 appendEscape(rTheText, nUCS4 >> 18 | 0xF0);
4438 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4439 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4440 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4441 }
4442 else if (nUCS4 < 0x4000000)
4443 {
4444 appendEscape(rTheText, nUCS4 >> 24 | 0xF8);
4445 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4446 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4447 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4448 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4449 }
4450 else
4451 {
4452 appendEscape(rTheText, nUCS4 >> 30 | 0xFC);
4453 appendEscape(rTheText, (nUCS4 >> 24 & 0x3F) | 0x80);
4454 appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4455 appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4456 appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4457 appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4458 }
4459}
4460
4461// static
4462void INetURLObject::appendUCS4(OUStringBuffer& rTheText, sal_uInt32 nUCS4,
4463 EscapeType eEscapeType,
4464 Part ePart, rtl_TextEncoding eCharset,
4465 bool bKeepVisibleEscapes)
4466{
4467 bool bEscape;
4468 rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
4469 switch (eEscapeType)
4470 {
4471 case EscapeType::NONE:
4472 if (mustEncode(nUCS4, ePart))
4473 {
4474 bEscape = true;
4475 eTargetCharset = RTL_TEXTENCODING_UTF8;
4476 }
4477 else
4478 bEscape = false;
4479 break;
4480
4481 case EscapeType::Octet:
4482 bEscape = true;
4483 eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
4484 break;
4485
4486 case EscapeType::Utf32:
4487 if (mustEncode(nUCS4, ePart))
4488 {
4489 bEscape = true;
4490 eTargetCharset = eCharset;
4491 }
4492 else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
4493 {
4494 bEscape = true;
4495 eTargetCharset = RTL_TEXTENCODING_ASCII_US;
4496 }
4497 else
4498 bEscape = false;
4499 break;
4500 default:
4501 bEscape = false;
4502 }
4503
4504 if (bEscape)
4505 {
4506 switch (eTargetCharset)
4507 {
4508 default:
4509 OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset");
4510 [[fallthrough]];
4511 case RTL_TEXTENCODING_ASCII_US:
4512 case RTL_TEXTENCODING_ISO_8859_1:
4513 appendEscape(rTheText, nUCS4);
4514 break;
4515 case RTL_TEXTENCODING_UTF8:
4516 appendUCS4Escape(rTheText, nUCS4);
4517 break;
4518 }
4519 }
4520 else
4521 rTheText.append(sal_Unicode(nUCS4));
4522}
4523
4524// static
4525sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
4526 sal_Unicode const * pEnd,
4527 EncodeMechanism eMechanism,
4528 rtl_TextEncoding eCharset,
4529 EscapeType & rEscapeType)
4530{
4531 DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
4532 sal_uInt32 nUTF32 = INetMIME::getUTF32Character(rBegin, pEnd);
4533 switch (eMechanism)
4534 {
4536 rEscapeType = EscapeType::NONE;
4537 break;
4538
4540 {
4541 int nWeight1;
4542 int nWeight2;
4543 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4544 && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
4545 && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
4546 {
4547 rBegin += 2;
4548 nUTF32 = nWeight1 << 4 | nWeight2;
4549 switch (eCharset)
4550 {
4551 default:
4552 OSL_FAIL(
4553 "INetURLObject::getUTF32(): Unsupported charset");
4554 [[fallthrough]];
4555 case RTL_TEXTENCODING_ASCII_US:
4556 rEscapeType = rtl::isAscii(nUTF32) ?
4558 break;
4559
4560 case RTL_TEXTENCODING_ISO_8859_1:
4561 rEscapeType = EscapeType::Utf32;
4562 break;
4563
4564 case RTL_TEXTENCODING_UTF8:
4565 if (rtl::isAscii(nUTF32))
4566 rEscapeType = EscapeType::Utf32;
4567 else
4568 {
4569 if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
4570 {
4571 sal_uInt32 nEncoded;
4572 int nShift;
4573 sal_uInt32 nMin;
4574 if (nUTF32 <= 0xDF)
4575 {
4576 nEncoded = (nUTF32 & 0x1F) << 6;
4577 nShift = 0;
4578 nMin = 0x80;
4579 }
4580 else if (nUTF32 <= 0xEF)
4581 {
4582 nEncoded = (nUTF32 & 0x0F) << 12;
4583 nShift = 6;
4584 nMin = 0x800;
4585 }
4586 else
4587 {
4588 nEncoded = (nUTF32 & 0x07) << 18;
4589 nShift = 12;
4590 nMin = 0x10000;
4591 }
4592 sal_Unicode const * p = rBegin;
4593 bool bUTF8 = true;
4594 for (;;)
4595 {
4596 if (pEnd - p < 3
4597 || p[0] != '%'
4598 || (nWeight1
4600 < 8
4601 || nWeight1 > 11
4602 || (nWeight2
4604 < 0)
4605 {
4606 bUTF8 = false;
4607 break;
4608 }
4609 p += 3;
4610 nEncoded
4611 |= ((nWeight1 & 3) << 4 | nWeight2)
4612 << nShift;
4613 if (nShift == 0)
4614 break;
4615 nShift -= 6;
4616 }
4617 if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
4618 && nEncoded >= nMin)
4619 {
4620 rBegin = p;
4621 nUTF32 = nEncoded;
4622 rEscapeType = EscapeType::Utf32;
4623 break;
4624 }
4625 }
4626 rEscapeType = EscapeType::Octet;
4627 }
4628 break;
4629 }
4630 }
4631 else
4632 rEscapeType = EscapeType::NONE;
4633 break;
4634 }
4635
4637 {
4638 int nWeight1;
4639 int nWeight2;
4640 if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4641 && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
4642 && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
4643 {
4644 rBegin += 2;
4645 nUTF32 = nWeight1 << 4 | nWeight2;
4646 rEscapeType = EscapeType::Octet;
4647 }
4648 else
4649 rEscapeType = EscapeType::NONE;
4650 break;
4651 }
4652 }
4653 return nUTF32;
4654}
4655
4656// static
4657sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
4658 sal_Unicode const * pEnd,
4659 bool bEager)
4660{
4661 enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
4662 State eState = STATE_DOT;
4663 sal_Int32 nLabels = 0;
4664 sal_Unicode const * pLastAlphanumeric = nullptr;
4665 for (sal_Unicode const * p = rBegin;; ++p)
4666 switch (eState)
4667 {
4668 case STATE_DOT:
4669 if (p != pEnd && (rtl::isAsciiAlphanumeric(*p) || *p == '_'))
4670 {
4671 ++nLabels;
4672 eState = STATE_LABEL;
4673 break;
4674 }
4675 if (bEager || nLabels == 0)
4676 return 0;
4677 rBegin = p - 1;
4678 return nLabels;
4679
4680 case STATE_LABEL:
4681 if (p != pEnd)
4682 {
4683 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4684 break;
4685 else if (*p == '.')
4686 {
4687 eState = STATE_DOT;
4688 break;
4689 }
4690 else if (*p == '-')
4691 {
4692 pLastAlphanumeric = p;
4693 eState = STATE_HYPHEN;
4694 break;
4695 }
4696 }
4697 rBegin = p;
4698 return nLabels;
4699
4700 case STATE_HYPHEN:
4701 if (p != pEnd)
4702 {
4703 if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4704 {
4705 eState = STATE_LABEL;
4706 break;
4707 }
4708 else if (*p == '-')
4709 break;
4710 }
4711 if (bEager)
4712 return 0;
4713 rBegin = pLastAlphanumeric;
4714 return nLabels;
4715 }
4716}
4717
4718// static
4720 sal_Unicode const * pEnd)
4721{
4722 if (rBegin != pEnd && *rBegin == '[') {
4723 sal_Unicode const * p = rBegin + 1;
4724 //TODO: check for valid IPv6address (RFC 2373):
4725 while (p != pEnd && (rtl::isAsciiHexDigit(*p) || *p == ':' || *p == '.'))
4726 {
4727 ++p;
4728 }
4729 if (p != pEnd && *p == ']') {
4730 rBegin = p + 1;
4731 return true;
4732 }
4733 }
4734 return false;
4735}
4736
4738 const
4739{
4740 if (!checkHierarchical())
4741 return OUString();
4742 INetURLObject aTemp(*this);
4743 aTemp.clearFragment();
4744 aTemp.clearQuery();
4745 aTemp.removeSegment(LAST_SEGMENT, false);
4746 aTemp.setFinalSlash();
4747 return aTemp.GetMainURL(DecodeMechanism::ToIUri);
4748}
4749
4751 rtl_TextEncoding eCharset) const
4752{
4753 return getName(LAST_SEGMENT, true, eMechanism, eCharset);
4754}
4755
4757{
4758 return getExtension(LAST_SEGMENT, false);
4759}
4760
4762{
4763 INetURLObject aTemp(*this);
4764 aTemp.clearFragment();
4765 aTemp.clearQuery();
4766 if (!aTemp.removeSegment(LAST_SEGMENT, false))
4767 return;
4768 *this = aTemp;
4769}
4770
4772{
4774 return OUString();
4775 OUString aSystemPath;
4776 if (osl::FileBase::getSystemPathFromFileURL(
4777 decode(m_aAbsURIRef.getStr(),
4778 m_aAbsURIRef.getStr() + m_aPath.getEnd(),
4779 DecodeMechanism::NONE, RTL_TEXTENCODING_UTF8),
4780 aSystemPath)
4781 != osl::FileBase::E_None)
4782 return OUString();
4783 return aSystemPath;
4784}
4785
4787{
4788 INetURLObject aTemp(*this);
4789 aTemp.removeFinalSlash();
4790 return aTemp.PathToFileName();
4791}
4792
4794{
4795 INetURLObject aTemp(*this);
4796 aTemp.removeSegment();
4797 aTemp.removeFinalSlash();
4798 return aTemp.PathToFileName();
4799}
4800
4801void INetURLObject::SetBase(std::u16string_view rTheBase)
4802{
4804}
4805
4807{
4809}
4810
4811void INetURLObject::SetExtension(std::u16string_view rTheExtension)
4812{
4813 setExtension(rTheExtension, LAST_SEGMENT, false);
4814}
4815
4817{
4818 OUString aTheExtension(getExtension(LAST_SEGMENT, false));
4819 return removeExtension(LAST_SEGMENT, false)
4820 ? aTheExtension : OUString();
4821}
4822
4823/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
const sal_Int32 m_nLength
static bool isVisible(sal_uInt32 nChar)
Check for US-ASCII visible character.
Definition: inetmime.hxx:207
static sal_uInt32 getUTF32Character(const sal_Unicode *&rBegin, const sal_Unicode *pEnd)
Get the UTF-32 character at the head of a UTF-16 encoded string.
Definition: inetmime.hxx:227
static bool equalIgnoreCase(const sal_Unicode *pBegin1, const sal_Unicode *pEnd1, const char *pString2)
Check two US-ASCII strings for equality, ignoring case.
Definition: inetmime.cxx:969
static int getHexWeight(sal_uInt32 nChar)
Get the hexadecimal digit weight of a US-ASCII character.
Definition: inetmime.hxx:219
static int getWeight(sal_uInt32 nChar)
Get the digit weight of a US-ASCII character.
Definition: inetmime.hxx:213
static bool isIMAPAtomChar(sal_uInt32 nChar)
Check whether some character is valid within an RFC 2060 <atom>.
Definition: inetmime.cxx:945
static sal_Unicode const * scanContentType(std::u16string_view rStr, OUString *pType=nullptr, OUString *pSubType=nullptr, INetContentTypeParameterList *pParameters=nullptr)
Parse the body of an RFC 2045 Content-Type header field.
Definition: inetmime.cxx:1009
static bool scanUnsigned(const sal_Unicode *&rBegin, const sal_Unicode *pEnd, bool bLeadingZeroes, sal_uInt32 &rValue)
Definition: inetmime.cxx:986
sal_Int32 getEnd() const
Definition: urlobj.hxx:939
sal_Int32 set(OUStringBuffer &rString, std::u16string_view rSubString, sal_Int32 nTheBegin)
Definition: urlobj.cxx:257
sal_Int32 getLength() const
Definition: urlobj.hxx:937
bool isPresent() const
Definition: urlobj.hxx:931
bool isEmpty() const
Definition: urlobj.hxx:933
int compare(SubString const &rOther, OUStringBuffer const &rThisString, OUStringBuffer const &rOtherString) const
Definition: urlobj.cxx:271
void operator+=(sal_Int32 nDelta)
Definition: urlobj.cxx:265
sal_Int32 getBegin() const
Definition: urlobj.hxx:935
OUString GetHostPort(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3966
SubString m_aPath
Definition: urlobj.hxx:966
OUString getExternalURL() const
Definition: urlobj.cxx:3920
OUString getName(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the name of a segment of the hierarchical path.
Definition: urlobj.cxx:4055
static OUString decode(std::u16string_view rText, DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Decode some text.
Definition: urlobj.hxx:1302
bool isAnyKnownWebDAVScheme() const
Check if the scheme is one of the WebDAV scheme we know about.
Definition: urlobj.cxx:3937
sal_uInt32 GetPort() const
Definition: urlobj.cxx:3981
static OUString GetScheme(INetProtocol eTheScheme)
Return the URL 'prefix' for a given scheme.
Definition: urlobj.cxx:3947
bool hasExtension() const
Determine whether the name of the last segment has an extension.
Definition: urlobj.cxx:4103
bool clearFragment()
Definition: urlobj.cxx:3403
std::unique_ptr< SvMemoryStream > getData() const
Definition: urlobj.cxx:628
bool removeExtension(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true)
Remove the extension of the name of a segment.
Definition: urlobj.cxx:4238
void SetExtension(std::u16string_view rTheExtension)
Definition: urlobj.cxx:4811
DecodeMechanism
The way strings that represent (parts of) URIs are returned from get- methods.
Definition: urlobj.hxx:224
@ WithCharset
All (sequences of) escape sequences that represent characters from the specified character set,...
@ Unambiguous
All (sequences of) escape sequences that represent characters from the specified character set,...
@ ToIUri
All sequences of escape sequences that represent UTF-8 coded UTF-32 characters with a numerical value...
@ NONE
The (part of the) URI is returned unchanged.
static TOOLS_DLLPRIVATE void appendEscape(OUStringBuffer &rTheText, sal_uInt32 nOctet)
Definition: urlobj.cxx:459
void CutLastName()
Definition: urlobj.cxx:4761
INetProtocol m_eSmartScheme
Definition: urlobj.hxx:970
SubString m_aHost
Definition: urlobj.hxx:964
static TOOLS_DLLPRIVATE PrefixInfo const * getPrefix(sal_Unicode const *&rBegin, sal_Unicode const *pEnd)
Definition: urlobj.cxx:2125
bool setAbsURIRef(std::u16string_view rTheAbsURIRef, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart, FSysStyle eStyle)
Definition: urlobj.cxx:725
void changeScheme(INetProtocol eTargetScheme)
Definition: urlobj.cxx:1501
bool setPath(std::u16string_view rThePath, EncodeMechanism eMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:3202
OUString GetMainURL(DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:262
INetProtocol m_eScheme
Definition: urlobj.hxx:969
static const OUString & GetSchemeName(INetProtocol eTheScheme)
Return the human-readable name for a given scheme.
Definition: urlobj.cxx:3953
SubString m_aScheme
Definition: urlobj.hxx:961
bool SetPort(sal_uInt32 nThePort)
Definition: urlobj.cxx:3994
bool removeFinalSlash()
Remove a final slash from the hierarchical path.
Definition: urlobj.cxx:4296
bool setPassword(std::u16string_view rThePassword, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2338
OUString GetFileExtension() const
Get the 'extension' of the last segment in the path.
Definition: urlobj.cxx:4756
OUString getExtension(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the extension of the name of a segment.
Definition: urlobj.cxx:4179
SubString m_aUser
Definition: urlobj.hxx:962
SubString m_aFragment
Definition: urlobj.hxx:968
static sal_uInt32 getUTF32(sal_Unicode const *&rBegin, sal_Unicode const *pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, EscapeType &rEscapeType)
Definition: urlobj.cxx:4525
static bool translateToExternal(std::u16string_view rTheIntURIRef, OUString &rTheExtURIRef, DecodeMechanism eDecodeMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Definition: urlobj.hxx:1231
TOOLS_DLLPRIVATE bool checkHierarchical() const
Definition: urlobj.cxx:3219
void SetBase(std::u16string_view rTheBase)
Definition: urlobj.cxx:4801
static TOOLS_DLLPRIVATE bool parseHostOrNetBiosName(sal_Unicode const *pBegin, sal_Unicode const *pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName, OUStringBuffer *pCanonic)
Definition: urlobj.cxx:2853
static bool convertIntToExt(std::u16string_view rTheIntURIRef, OUString &rTheExtURIRef, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2079
bool HasError() const
Definition: urlobj.hxx:260
static bool convertExtToInt(std::u16string_view rTheExtURIRef, OUString &rTheIntURIRef, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2102
bool convertRelToAbs(OUString const &rTheRelURIRef, INetURLObject &rTheAbsURIRef, bool &rWasAbsolute, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs, FSysStyle eStyle) const
Definition: urlobj.cxx:1523
bool removeSegment(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true)
Remove a segment from the hierarchical path.
Definition: urlobj.cxx:4031
bool clearPassword()
Definition: urlobj.cxx:2320
bool setUser(std::u16string_view rTheUser, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2277
OUStringBuffer m_aAbsURIRef
Definition: urlobj.hxx:960
TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const
Definition: urlobj.cxx:3434
bool setFinalSlash()
Make the hierarchical path end in a final slash (if it does not already do so).
Definition: urlobj.cxx:4279
OUString GetPath() const
Definition: urlobj.cxx:4793
SubString m_aPort
Definition: urlobj.hxx:965
OUString GetPass(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:420
OUString GetURLNoMark(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3527
static void appendUCS4(OUStringBuffer &rTheText, sal_uInt32 nUCS4, EscapeType eEscapeType, Part ePart, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes)
Definition: urlobj.cxx:4462
SubString m_aQuery
Definition: urlobj.hxx:967
OUString getBase(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the base of the name of a segment.
Definition: urlobj.cxx:4122
TOOLS_DLLPRIVATE void setInvalid()
Definition: urlobj.cxx:596
bool HasParam() const
Definition: urlobj.hxx:732
OUString CutExtension()
Definition: urlobj.cxx:4816
OUString GetLastName(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the last segment in the path.
Definition: urlobj.cxx:4750
bool ConcatData(INetProtocol eTheScheme, std::u16string_view rTheUser, std::u16string_view rThePassword, std::u16string_view rTheHost, sal_uInt32 nThePort, std::u16string_view rThePath)
Definition: urlobj.cxx:3766
static INetProtocol CompareProtocolScheme(std::u16string_view aTheAbsURIRef)
Definition: urlobj.cxx:3959
static void encodeText(OUStringBuffer &rOutputBuffer, sal_Unicode const *pBegin, sal_Unicode const *pEnd, Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes)
Definition: urlobj.cxx:3446
OUString GetUser(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:415
static sal_uInt32 scanDomain(sal_Unicode const *&rBegin, sal_Unicode const *pEnd, bool bEager=true)
Definition: urlobj.cxx:4657
bool setBase(std::u16string_view rTheBase, sal_Int32 nIndex=LAST_SEGMENT, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Set the base of the name of a segment (preserving the extension).
Definition: urlobj.cxx:4147
bool operator==(INetURLObject const &rObject) const
Definition: urlobj.cxx:3713
OUString GetPartBeforeLastName() const
Definition: urlobj.cxx:4737
void clearQuery()
Definition: urlobj.cxx:3369
sal_Int32 getSegmentCount(bool bIgnoreFinalSlash=true) const
The number of segments in the hierarchical path.
Definition: urlobj.cxx:4015
TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const
Definition: urlobj.cxx:2249
static TOOLS_DLLPRIVATE bool scanIPv6reference(sal_Unicode const *&rBegin, sal_Unicode const *pEnd)
Definition: urlobj.cxx:4719
OUString GetURLPath(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:456
bool setFragment(std::u16string_view rTheMark, EncodeMechanism eMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:3415
OUString GetBase() const
Definition: urlobj.cxx:4806
bool insertName(std::u16string_view rTheName, bool bAppendFinalSlash=false, sal_Int32 nIndex=LAST_SEGMENT, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Insert a new segment into the hierarchical path.
Definition: urlobj.cxx:3282
bool setHost(std::u16string_view rTheHost, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2904
OUString GetParam(rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:734
static TOOLS_DLLPRIVATE bool parseHost(sal_Unicode const *&rBegin, sal_Unicode const *pEnd, OUStringBuffer *pCanonic)
Definition: urlobj.cxx:2380
bool setName(std::u16string_view rTheName, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Set the name of the last segment (preserving any parameters and any query or fragment part).
Definition: urlobj.cxx:4076
OUString PathToFileName() const
Definition: urlobj.cxx:4771
bool hasFinalSlash() const
Determine whether the hierarchical path ends in a final slash.
Definition: urlobj.cxx:4268
@ PART_UNAMBIGUOUS
Definition: urlobj.hxx:797
@ PART_PATH_BEFORE_QUERY
Definition: urlobj.hxx:792
@ PART_USER_PASSWORD
Definition: urlobj.hxx:784
@ PART_UNO_PARAM_VALUE
Definition: urlobj.hxx:796
@ PART_MESSAGE_ID_PATH
Definition: urlobj.hxx:790
@ PART_URIC_NO_SLASH
Definition: urlobj.hxx:798
@ PART_VISIBLE_NONSPECIAL
Definition: urlobj.hxx:795
@ PART_REL_SEGMENT_EXTRA
Definition: urlobj.hxx:787
OUString GetHost(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:437
static OUString GetAbsURL(std::u16string_view rTheBaseURIRef, OUString const &rTheRelURIRef, EncodeMechanism eEncodeMechanism=EncodeMechanism::WasEncoded, DecodeMechanism eDecodeMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
@descr If rTheRelURIRef cannot be converted to an absolute URL (because of syntactic reasons),...
Definition: urlobj.cxx:3896
bool convertAbsToRel(OUString const &rTheAbsURIRef, OUString &rTheRelURIRef, EncodeMechanism eEncodeMechanism, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset, FSysStyle eStyle) const
Definition: urlobj.cxx:1921
static void appendUCS4Escape(OUStringBuffer &rTheText, sal_uInt32 nUCS4)
Definition: urlobj.cxx:4417
INetProtocol GetProtocol() const
Definition: urlobj.hxx:380
TOOLS_DLLPRIVATE SubString getSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash) const
Definition: urlobj.cxx:3236
bool setQuery(std::u16string_view rTheQuery, EncodeMechanism eMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:3381
bool setExtension(std::u16string_view rTheExtension, sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Set the extension of the name of a segment (replacing an already existing extension).
Definition: urlobj.cxx:4206
OUString GetFull() const
Definition: urlobj.cxx:4786
OUString getFSysPath(FSysStyle eStyle, sal_Unicode *pDelimiter=nullptr) const
Return the file system path represented by a file URL (ignoring any fragment part).
Definition: urlobj.cxx:4315
SubString m_aAuth
Definition: urlobj.hxx:963
bool Append(std::u16string_view rTheSegment, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Definition: urlobj.cxx:3229
static TOOLS_DLLPRIVATE bool parsePath(INetProtocol eScheme, sal_Unicode const **pBegin, sal_Unicode const *pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSkippedInitialSlash, sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter, sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter, OUStringBuffer &rSynPath)
Definition: urlobj.cxx:2945
TOOLS_DLLPRIVATE SchemeInfo const & getSchemeInfo() const
Definition: urlobj.cxx:440
EncodeMechanism
The way input strings that represent (parts of) URIs are interpreted in set-methods.
Definition: urlobj.hxx:195
@ WasEncoded
Sequences of escape sequences, that represent characters from the specified character set and that ca...
@ NotCanonical
All escape sequences that are already present are copied verbatim (but using upper case hex digits).
@ All
All escape sequences that are already present are ignored, and are interpreted as literal sequences o...
OUString GetURLNoPass(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3519
TOOLS_DLLPRIVATE SubString getAuthority() const
Definition: urlobj.cxx:2266
bool isSchemeEqualTo(INetProtocol scheme) const
Definition: urlobj.hxx:382
OUString getAbbreviated(css::uno::Reference< css::util::XStringWidth > const &rStringWidth, sal_Int32 nWidth, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3536
static std::size_t decodeSomeChars(css::uno::Sequence< sal_Int8 > &aPass, std::u16string_view sBuffer)
#define DBG_ASSERT(sCon, aError)
Definition: debug.hxx:57
float u
sal_Int32 nIndex
void * p
sal_Int64 n
#define SAL_WARN_IF(condition, area, stream)
JAVASCRIPT
void set(css::uno::UnoInterfaceReference const &value)
void replaceAt(OUStringBuffer &rIn, sal_Int32 nIndex, sal_Int32 nCount, std::u16string_view newStr)
int i
bool equalsIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
enumrange< T >::Iterator begin(enumrange< T >)
end
constexpr OUStringLiteral EMPTY
State
HashMap_OWString_Interface aMap
std::map< OUString, rtl::Reference< Entity > > map
@ READ
allow read accesses
char const * m_pTranslatedPrefix
Definition: urlobj.cxx:312
INetProtocol m_eScheme
Definition: urlobj.cxx:313
char const * m_pPrefix
Definition: urlobj.cxx:311
char const * m_pPrefix
Definition: urlobj.cxx:296
rtl::OUStringConstExpr m_sScheme
Definition: urlobj.cxx:295
PRIVATE
sal_uInt16 sal_Unicode
INetProtocol
Definition: urlobj.hxx:51
FSysStyle
The supported notations for file system paths.
Definition: urlobj.hxx:87
@ Detect
Detect the used notation.
@ Unix
Unix notation (e.g., "/dir/file").
@ Dos
DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
@ Vos
VOS notation (e.g., "//server/dir/file").
std::unique_ptr< char[]> aBuffer
sal_Int32 nLength