LibreOffice Module tools (master)  1
urlobj.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  * This file incorporates work covered by the following license notice:
10  *
11  * Licensed to the Apache Software Foundation (ASF) under one or more
12  * contributor license agreements. See the NOTICE file distributed
13  * with this work for additional information regarding copyright
14  * ownership. The ASF licenses this file to you under the Apache
15  * License, Version 2.0 (the "License"); you may not use this file
16  * except in compliance with the License. You may obtain a copy of
17  * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18  */
19 
20 #include <sal/config.h>
21 
22 #include <tools/urlobj.hxx>
23 #include <tools/debug.hxx>
24 #include <tools/inetmime.hxx>
25 #include <tools/stream.hxx>
26 #include <com/sun/star/uno/Reference.hxx>
27 #include <com/sun/star/util/XStringWidth.hpp>
28 #include <o3tl/enumarray.hxx>
29 #include <osl/diagnose.h>
30 #include <osl/file.hxx>
31 #include <rtl/character.hxx>
32 #include <rtl/string.h>
33 #include <rtl/textenc.h>
34 #include <rtl/ustring.hxx>
35 #include <sal/log.hxx>
36 #include <sal/types.h>
37 
38 #include <algorithm>
39 #include <cassert>
40 #include <limits>
41 #include <memory>
42 
43 #include <string.h>
44 
45 #include <com/sun/star/uno/Sequence.hxx>
46 #include <comphelper/base64.hxx>
47 
48 using namespace css;
49 
50 // INetURLObject
51 
52 /* The URI grammar (using RFC 2234 conventions).
53 
54  Constructs of the form
55  {reference <rule1> using rule2}
56  stand for a rule matching the given rule1 specified in the given reference,
57  encoded to URI syntax using rule2 (as specified in this URI grammar).
58 
59 
60  ; RFC 1738, RFC 2396, RFC 2732, private
61  login = [user [":" password] "@"] hostport
62  user = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
63  password = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ";" / "=" / "_" / "~")
64  hostport = host [":" port]
65  host = incomplete-hostname / hostname / IPv4address / IPv6reference
66  incomplete-hostname = *(domainlabel ".") domainlabel
67  hostname = *(domainlabel ".") toplabel ["."]
68  domainlabel = alphanum [*(alphanum / "-") alphanum]
69  toplabel = ALPHA [*(alphanum / "-") alphanum]
70  IPv4address = 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT "." 1*3DIGIT
71  IPv6reference = "[" hexpart [":" IPv4address] "]"
72  hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
73  hexseq = hex4 *(":" hex4)
74  hex4 = 1*4HEXDIG
75  port = *DIGIT
76  escaped = "%" HEXDIG HEXDIG
77  reserved = "$" / "&" / "+" / "," / "/" / ":" / ";" / "=" / "?" / "@" / "[" / "]"
78  mark = "!" / "'" / "(" / ")" / "*" / "-" / "." / "_" / "~"
79  alphanum = ALPHA / DIGIT
80  unreserved = alphanum / mark
81  uric = escaped / reserved / unreserved
82  pchar = escaped / unreserved / "$" / "&" / "+" / "," / ":" / "=" / "@"
83 
84 
85  ; RFC 1738, RFC 2396
86  ftp-url = "FTP://" login ["/" segment *("/" segment) [";TYPE=" ("A" / "D" / "I")]]
87  segment = *pchar
88 
89 
90  ; RFC 1738, RFC 2396
91  http-url = "HTTP://" hostport ["/" segment *("/" segment) ["?" *uric]]
92  segment = *(pchar / ";")
93 
94 
95  ; RFC 1738, RFC 2396, <http://support.microsoft.com/default.aspx?scid=KB;EN-US;Q188997&>
96  file-url = "FILE://" [host / "LOCALHOST" / netbios-name] ["/" segment *("/" segment)]
97  segment = *pchar
98  netbios-name = 1*{<alphanum / "!" / "#" / "$" / "%" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "^" / "_" / "{" / "}" / "~"> using (escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "-" / "." / "@" / "_" / "~")}
99 
100 
101  ; RFC 2368, RFC 2396
102  mailto-url = "MAILTO:" [to] [headers]
103  to = {RFC 822 <#mailbox> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
104  headers = "?" header *("&" header)
105  header = hname "=" hvalue
106  hname = {RFC 822 <field-name> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")} / "BODY"
107  hvalue = {RFC 822 <field-body> using *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
108 
109 
110  ; private (see RFC 1738, RFC 2396)
111  vnd-sun-star-webdav-url = "VND.SUN.STAR.WEBDAV://" hostport ["/" segment *("/" segment) ["?" *uric]]
112  segment = *(pchar / ";")
113 
114 
115  ; private
116  private-url = "PRIVATE:" path ["?" *uric]
117  path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
118 
119 
120  ; private
121  vnd-sun-star-help-url = "VND.SUN.STAR.HELP://" name *("/" segment) ["?" *uric]
122  name = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
123  segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
124 
125 
126  ; private
127  https-url = "HTTPS://" hostport ["/" segment *("/" segment) ["?" *uric]]
128  segment = *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
129 
130 
131  ; private
132  slot-url = "SLOT:" path ["?" *uric]
133  path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
134 
135 
136  ; private
137  macro-url = "MACRO:" path ["?" *uric]
138  path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
139 
140 
141  ; private
142  javascript-url = "JAVASCRIPT:" *uric
143 
144 
145  ; RFC 2397
146  data-url = "DATA:" [mediatype] [";BASE64"] "," *uric
147  mediatype = [type "/" subtype] *(";" attribute "=" value)
148  type = {RFC 2045 <type> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
149  subtype = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
150  attribute = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
151  value = {RFC 2045 <subtype> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / ":" / "?" / "@" / "_" / "~")}
152 
153 
154  ; RFC 2392, RFC 2396
155  cid-url = "CID:" {RFC 822 <addr-spec> using *uric}
156 
157 
158  ; private
159  vnd-sun-star-hier-url = "VND.SUN.STAR.HIER:" ["//"reg_name] *("/" *pchar)
160  reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
161 
162 
163  ; private
164  uno-url = ".UNO:" path ["?" *uric]
165  path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
166 
167 
168  ; private
169  component-url = ".COMPONENT:" path ["?" *uric]
170  path = *(escaped / alphanum / "!" / "$" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")
171 
172 
173  ; private
174  vnd-sun-star-pkg-url = "VND.SUN.STAR.PKG://" reg_name *("/" *pchar) ["?" *uric]
175  reg_name = 1*(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / ":" / ";" / "=" / "@" / "_" / "~")
176 
177 
178  ; RFC 2255
179  ldap-url = "LDAP://" [hostport] ["/" [dn ["?" [attrdesct *("," attrdesc)] ["?" ["base" / "one" / "sub"] ["?" [filter] ["?" extension *("," extension)]]]]]]
180  dn = {RFC 2253 <distinguishedName> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
181  attrdesc = {RFC 2251 <AttributeDescription> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
182  filter = {RFC 2254 <filter> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
183  extension = ["!"] ["X-"] extoken ["=" exvalue]
184  extoken = {RFC 2252 <oid> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "@" / "_" / "~")}
185  exvalue = {RFC 2251 <LDAPString> using *(escaped / alphanum / "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "-" / "." / "/" / ":" / ";" / "=" / "@" / "_" / "~")}
186 
187 
188  ; private
189  db-url = "DB:" *uric
190 
191 
192  ; private
193  vnd-sun-star-cmd-url = "VND.SUN.STAR.CMD:" opaque_part
194  opaque_part = uric_no_slash *uric
195  uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
196 
197 
198  ; RFC 1738
199  telnet-url = "TELNET://" login ["/"]
200 
201 
202  ; private
203  vnd-sun-star-expand-url = "VND.SUN.STAR.EXPAND:" opaque_part
204  opaque_part = uric_no_slash *uric
205  uric_no_slash = unreserved / escaped / ";" / "?" / ":" / "@" / "&" / "=" / "+" / "$" / ","
206 
207 
208  ; private
209  vnd-sun-star-tdoc-url = "VND.SUN.STAR.TDOC:/" segment *("/" segment)
210  segment = *pchar
211 
212 
213  ; private
214  unknown-url = scheme ":" 1*uric
215  scheme = ALPHA *(alphanum / "+" / "-" / ".")
216 
217 
218  ; private (http://ubiqx.org/cifs/Appendix-D.html):
219  smb-url = "SMB://" login ["/" segment *("/" segment) ["?" *uric]]
220  segment = *(pchar / ";")
221  */
222 
224 {
225  sal_Int32 nDelta = -m_nLength;
226  m_nBegin = -1;
227  m_nLength = 0;
228  return nDelta;
229 }
230 
231 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
232  OUString const & rSubString)
233 {
234  sal_Int32 nDelta = rSubString.getLength() - m_nLength;
235 
236  rString.remove(m_nBegin, m_nLength);
237  rString.insert(m_nBegin, rSubString);
238 
239  m_nLength = rSubString.getLength();
240  return nDelta;
241 }
242 
243 inline sal_Int32 INetURLObject::SubString::set(OUString & rString,
244  OUString const & rSubString)
245 {
246  sal_Int32 nDelta = rSubString.getLength() - m_nLength;
247 
248  rString = rString.replaceAt(m_nBegin, m_nLength, rSubString);
249 
250  m_nLength = rSubString.getLength();
251  return nDelta;
252 }
253 
254 inline sal_Int32 INetURLObject::SubString::set(OUStringBuffer & rString,
255  OUString const & rSubString,
256  sal_Int32 nTheBegin)
257 {
258  m_nBegin = nTheBegin;
259  return set(rString, rSubString);
260 }
261 
262 inline void INetURLObject::SubString::operator +=(sal_Int32 nDelta)
263 {
264  if (isPresent())
265  m_nBegin = m_nBegin + nDelta;
266 }
267 
269  OUStringBuffer const & rThisString,
270  OUStringBuffer const & rOtherString) const
271 {
272  sal_Int32 len = std::min(m_nLength, rOther.m_nLength);
273  sal_Unicode const * p1 = rThisString.getStr() + m_nBegin;
274  sal_Unicode const * end = p1 + len;
275  sal_Unicode const * p2 = rOtherString.getStr() + rOther.m_nBegin;
276  while (p1 != end) {
277  if (*p1 < *p2) {
278  return -1;
279  } else if (*p1 > *p2) {
280  return 1;
281  }
282  ++p1;
283  ++p2;
284  }
285  return m_nLength < rOther.m_nLength ? -1
286  : m_nLength > rOther.m_nLength ? 1
287  : 0;
288 }
289 
291 {
292  char const * m_pScheme;
293  char const * m_pPrefix;
295  bool m_bUser;
296  bool m_bAuth;
298  bool m_bHost;
299  bool m_bPort;
301  bool m_bQuery;
302 };
303 
305 {
306  enum class Kind { Official, Internal, External }; // order is important!
307 
308  char const * m_pPrefix;
309  char const * m_pTranslatedPrefix;
312 };
313 
314 // static
315 inline INetURLObject::SchemeInfo const &
317 {
319  SchemeInfo{
320  "", "", false, false, false, false, false, false, false, false},
321  SchemeInfo{
322  "ftp", "ftp://", true, true, false, true, true, true, true,
323  false},
324  SchemeInfo{
325  "http", "http://", true, false, false, false, true, true, true,
326  true},
327  SchemeInfo{
328  "file", "file://", true, false, false, false, true, false, true,
329  false},
330  SchemeInfo{
331  "mailto", "mailto:", false, false, false, false, false, false,
332  false, true},
333  SchemeInfo{
334  "vnd.sun.star.webdav", "vnd.sun.star.webdav://", true, false,
335  false, false, true, true, true, true},
336  SchemeInfo{
337  "private", "private:", false, false, false, false, false, false,
338  false, true},
339  SchemeInfo{
340  "vnd.sun.star.help", "vnd.sun.star.help://", true, false, false,
341  false, false, false, true, true},
342  SchemeInfo{
343  "https", "https://", true, false, false, false, true, true,
344  true, true},
345  SchemeInfo{
346  "slot", "slot:", false, false, false, false, false, false, false,
347  true},
348  SchemeInfo{
349  "macro", "macro:", false, false, false, false, false, false,
350  false, true},
351  SchemeInfo{
352  "javascript", "javascript:", false, false, false, false, false,
353  false, false, false},
354  SchemeInfo{
355  "data", "data:", false, false, false, false, false, false, false,
356  false},
357  SchemeInfo{
358  "cid", "cid:", false, false, false, false, false, false, false,
359  false},
360  SchemeInfo{
361  "vnd.sun.star.hier", "vnd.sun.star.hier:", true, false, false,
362  false, false, false, true, false},
363  SchemeInfo{
364  ".uno", ".uno:", false, false, false, false, false, false, false,
365  true},
366  SchemeInfo{
367  ".component", ".component:", false, false, false, false, false,
368  false, false, true},
369  SchemeInfo{
370  "vnd.sun.star.pkg", "vnd.sun.star.pkg://", true, false, false,
371  false, false, false, true, true},
372  SchemeInfo{
373  "ldap", "ldap://", true, false, false, false, true, true,
374  false, true},
375  SchemeInfo{
376  "db", "db:", false, false, false, false, false, false, false,
377  false},
378  SchemeInfo{
379  "vnd.sun.star.cmd", "vnd.sun.star.cmd:", false, false, false,
380  false, false, false, false, false},
381  SchemeInfo{
382  "telnet", "telnet://", true, true, false, true, true, true,
383  true, false},
384  SchemeInfo{
385  "vnd.sun.star.expand", "vnd.sun.star.expand:", false, false,
386  false, false, false, false, false, false},
387  SchemeInfo{
388  "vnd.sun.star.tdoc", "vnd.sun.star.tdoc:", false, false, false,
389  false, false, false, true, false},
390  SchemeInfo{
391  "", "", false, false, false, false, true, true, true, false },
392  SchemeInfo{
393  "smb", "smb://", true, true, false, true, true, true, true,
394  true},
395  SchemeInfo{
396  "hid", "hid:", false, false, false, false, false, false, false,
397  true},
398  SchemeInfo{
399  "sftp", "sftp://", true, true, false, true, true, true, true,
400  true},
401  SchemeInfo{
402  "vnd.libreoffice.cmis", "vnd.libreoffice.cmis://", true, true,
403  false, false, true, false, true, true} };
404  return map[eTheScheme];
405 };
406 
408 {
409  return getSchemeInfo(m_eScheme);
410 }
411 
412 namespace {
413 
414 sal_Unicode getHexDigit(sal_uInt32 nWeight)
415 {
416  assert(nWeight < 16);
417  static const sal_Unicode aDigits[16]
418  = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C',
419  'D', 'E', 'F' };
420  return aDigits[nWeight];
421 }
422 
423 }
424 
425 // static
426 inline void INetURLObject::appendEscape(OUStringBuffer & rTheText,
427  sal_uInt32 nOctet)
428 {
429  rTheText.append( '%' );
430  rTheText.append( getHexDigit(nOctet >> 4) );
431  rTheText.append( getHexDigit(nOctet & 15) );
432 }
433 
434 namespace {
435 
436 enum
437 {
454 };
455 
456 sal_uInt32 const aMustEncodeMap[128]
457  = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
458  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 /* */ PP,
460 /* ! */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
461 /* " */ PM+PN +PP,
462 /* # */ PM,
463 /* $ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
464 /* % */ PM,
465 /* & */ PA +PD+PE+PF+PG+PH+PI +PK+PL+PM+PN+PO +PQ+PR,
466 /* ' */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
467 /* ( */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
468 /* ) */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
469 /* * */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
470 /* + */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
471 /* , */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN +PQ+PR,
472 /* - */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
473 /* . */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
474 /* / */ +PD +PG+PH+PI+PJ+PK +PM+PN+PO,
475 /* 0 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
476 /* 1 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
477 /* 2 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
478 /* 3 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
479 /* 4 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
480 /* 5 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
481 /* 6 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
482 /* 7 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
483 /* 8 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
484 /* 9 */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
485 /* : */ +PD+PE +PG+PH+PI+PJ+PK+PL+PM+PN+PO +PQ+PR,
486 /* ; */ PA +PE+PF+PG+PH+PI+PJ+PK +PM +PQ+PR,
487 /* < */ +PI +PM+PN +PP,
488 /* = */ PA +PD+PE+PF+PG+PH +PK+PL+PM+PN +PQ+PR,
489 /* > */ +PI +PM+PN +PP,
490 /* ? */ +PG +PM +PO +PQ,
491 /* @ */ +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
492 /* A */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
493 /* B */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
494 /* C */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
495 /* D */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
496 /* E */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
497 /* F */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
498 /* G */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
499 /* H */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
500 /* I */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
501 /* J */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
502 /* K */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
503 /* L */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
504 /* M */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
505 /* N */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
506 /* O */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
507 /* P */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
508 /* Q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
509 /* R */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
510 /* S */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
511 /* T */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
512 /* U */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
513 /* V */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
514 /* W */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
515 /* X */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
516 /* Y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
517 /* Z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
518 /* [ */ PG +PM+PN+PO,
519 /* \ */ +PM+PN +PP,
520 /* ] */ PG +PM+PN+PO,
521 /* ^ */ PM+PN +PP,
522 /* _ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
523 /* ` */ PM+PN +PP,
524 /* a */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
525 /* b */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
526 /* c */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
527 /* d */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
528 /* e */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
529 /* f */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
530 /* g */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
531 /* h */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
532 /* i */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
533 /* j */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
534 /* k */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
535 /* l */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
536 /* m */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
537 /* n */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
538 /* o */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
539 /* p */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
540 /* q */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
541 /* r */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
542 /* s */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
543 /* t */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
544 /* u */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
545 /* v */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
546 /* w */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
547 /* x */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
548 /* y */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
549 /* z */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ+PR,
550 /* { */ PM+PN +PP,
551 /* | */ +PM+PN +PP,
552 /* } */ PM+PN +PP,
553 /* ~ */ PA +PD+PE+PF+PG+PH+PI+PJ+PK+PL+PM+PN+PO+PP+PQ,
554  0 };
555 
556 bool mustEncode(sal_uInt32 nUTF32, INetURLObject::Part ePart)
557 {
558  return !rtl::isAscii(nUTF32) || !(aMustEncodeMap[nUTF32] & ePart);
559 }
560 
561 }
562 
564 {
565  m_aAbsURIRef.setLength(0);
566  m_eScheme = INetProtocol::NotValid;
567  m_aScheme.clear();
568  m_aUser.clear();
569  m_aAuth.clear();
570  m_aHost.clear();
571  m_aPort.clear();
572  m_aPath.clear();
573  m_aQuery.clear();
574  m_aFragment.clear();
575 }
576 
577 namespace {
578 
579 std::unique_ptr<SvMemoryStream> memoryStream(
580  void const * data, sal_Int32 length)
581 {
582  std::unique_ptr<char[]> b(
583  new char[length]);
584  memcpy(b.get(), data, length);
585  std::unique_ptr<SvMemoryStream> s(
586  new SvMemoryStream(b.get(), length, StreamMode::READ));
587  s->ObjectOwnsMemory(true);
588  b.release();
589  return s;
590 }
591 
592 }
593 
594 std::unique_ptr<SvMemoryStream> INetURLObject::getData() const
595 {
596  if( GetProtocol() != INetProtocol::Data )
597  {
598  return nullptr;
599  }
600 
601  OUString sURLPath = GetURLPath( DecodeMechanism::WithCharset, RTL_TEXTENCODING_ISO_8859_1 );
602  sal_Unicode const * pSkippedMediatype = INetMIME::scanContentType( sURLPath );
603  sal_Int32 nCharactersSkipped = pSkippedMediatype == nullptr
604  ? 0 : pSkippedMediatype-sURLPath.getStr();
605  if (sURLPath.match(",", nCharactersSkipped))
606  {
607  nCharactersSkipped += strlen(",");
608  OString sURLEncodedData(
609  sURLPath.getStr() + nCharactersSkipped,
610  sURLPath.getLength() - nCharactersSkipped,
611  RTL_TEXTENCODING_ISO_8859_1, OUSTRING_TO_OSTRING_CVTFLAGS);
612  return memoryStream(
613  sURLEncodedData.getStr(), sURLEncodedData.getLength());
614  }
615  else if (sURLPath.matchIgnoreAsciiCase(";base64,", nCharactersSkipped))
616  {
617  nCharactersSkipped += strlen(";base64,");
618  OUString sBase64Data = sURLPath.copy( nCharactersSkipped );
619  css::uno::Sequence< sal_Int8 > aDecodedData;
620  if (comphelper::Base64::decodeSomeChars(aDecodedData, sBase64Data)
621  == sBase64Data.getLength())
622  {
623  return memoryStream(
624  aDecodedData.getArray(), aDecodedData.getLength());
625  }
626  }
627  return nullptr;
628 }
629 
630 namespace {
631 
632 FSysStyle guessFSysStyleByCounting(sal_Unicode const * pBegin,
633  sal_Unicode const * pEnd,
634  FSysStyle eStyle)
635 {
636  DBG_ASSERT(eStyle
637  & (FSysStyle::Unix
638  | FSysStyle::Dos),
639  "guessFSysStyleByCounting(): Bad style");
640  DBG_ASSERT(std::numeric_limits< sal_Int32 >::min() < pBegin - pEnd
641  && pEnd - pBegin <= std::numeric_limits< sal_Int32 >::max(),
642  "guessFSysStyleByCounting(): Too big");
643  sal_Int32 nSlashCount
644  = (eStyle & FSysStyle::Unix) ?
645  0 : std::numeric_limits< sal_Int32 >::min();
646  sal_Int32 nBackslashCount
647  = (eStyle & FSysStyle::Dos) ?
648  0 : std::numeric_limits< sal_Int32 >::min();
649  while (pBegin != pEnd)
650  switch (*pBegin++)
651  {
652  case '/':
653  ++nSlashCount;
654  break;
655 
656  case '\\':
657  ++nBackslashCount;
658  break;
659  }
660  return nSlashCount >= nBackslashCount ?
662 }
663 
664 OUString parseScheme(
665  sal_Unicode const ** begin, sal_Unicode const * end,
666  sal_uInt32 fragmentDelimiter)
667 {
668  sal_Unicode const * p = *begin;
669  if (p != end && rtl::isAsciiAlpha(*p)) {
670  do {
671  ++p;
672  } while (p != end
673  && (rtl::isAsciiAlphanumeric(*p) || *p == '+' || *p == '-'
674  || *p == '.'));
675  // #i34835# To avoid problems with Windows file paths like "C:\foo",
676  // do not accept generic schemes that are only one character long:
677  if (end - p > 1 && p[0] == ':' && p[1] != fragmentDelimiter
678  && p - *begin >= 2)
679  {
680  OUString scheme(
681  OUString(*begin, p - *begin).toAsciiLowerCase());
682  *begin = p + 1;
683  return scheme;
684  }
685  }
686  return OUString();
687 }
688 
689 }
690 
691 bool INetURLObject::setAbsURIRef(OUString const & rTheAbsURIRef,
692  EncodeMechanism eMechanism,
693  rtl_TextEncoding eCharset,
694  bool bSmart,
695  FSysStyle eStyle)
696 {
697  sal_Unicode const * pPos = rTheAbsURIRef.getStr();
698  sal_Unicode const * pEnd = pPos + rTheAbsURIRef.getLength();
699 
700  setInvalid();
701 
702  sal_uInt32 nFragmentDelimiter = '#';
703 
704  OUStringBuffer aSynAbsURIRef(rTheAbsURIRef.getLength()*2);
705 
706  // Parse <scheme>:
707  sal_Unicode const * p = pPos;
708  PrefixInfo const * pPrefix = getPrefix(p, pEnd);
709  if (pPrefix)
710  {
711  pPos = p;
712  m_eScheme = pPrefix->m_eScheme;
713 
714  OUString sTemp(OUString::createFromAscii(pPrefix->m_eKind
715  >= PrefixInfo::Kind::External ?
716  pPrefix->m_pTranslatedPrefix :
717  pPrefix->m_pPrefix));
718  aSynAbsURIRef.append(sTemp);
719  m_aScheme = SubString( 0, sTemp.indexOf(':') );
720  }
721  else
722  {
723  if (bSmart)
724  {
725  // For scheme detection, the first (if any) of the following
726  // productions that matches the input string (and for which the
727  // appropriate style bit is set in eStyle, if applicable)
728  // determines the scheme. The productions use the auxiliary rules
729 
730  // domain = label *("." label)
731  // label = alphanum [*(alphanum / "-") alphanum]
732  // alphanum = ALPHA / DIGIT
733  // IPv6reference = "[" IPv6address "]"
734  // IPv6address = hexpart [":" IPv4address]
735  // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
736  // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
737  // hexseq = hex4 *(":" hex4)
738  // hex4 = 1*4HEXDIG
739  // UCS4 = <any UCS4 character>
740 
741  // 1st Production (known scheme; handled by the "if (pPrefix)" branch above):
742  // <one of the known schemes, ignoring case> ":" *UCS4
743  // 2nd Production (mailto):
744  // domain "@" domain
745  // 3rd Production (ftp):
746  // "FTP" 2*("." label) ["/" *UCS4]
747  // 4th Production (http):
748  // label 2*("." label) ["/" *UCS4]
749  // 5th Production (file):
750  // "//" (domain / IPv6reference) ["/" *UCS4]
751  // 6th Production (Unix file):
752  // "/" *UCS4
753  // 7th Production (UNC file; FSysStyle::Dos only):
754  // "\\" domain ["\" *UCS4]
755  // 8th Production (Unix-like DOS file; FSysStyle::Dos only):
756  // ALPHA ":" ["/" *UCS4]
757  // 9th Production (DOS file; FSysStyle::Dos only):
758  // ALPHA ":" ["\" *UCS4]
759  // 10th Production (any scheme; handled by the "m_eScheme = INetProtocol::Generic;" code
760  // after this else branch):
761  // <any scheme> ":" *UCS4
762 
763  // For the 'non URL' file productions 6--9, the interpretation of
764  // the input as a (degenerate) URI is turned off, i.e., escape
765  // sequences and fragments are never detected as such, but are
766  // taken as literal characters.
767 
768  sal_Unicode const * p1 = pPos;
769  if (eStyle & FSysStyle::Dos
770  && pEnd - p1 >= 2
771  && rtl::isAsciiAlpha(p1[0])
772  && p1[1] == ':'
773  && (pEnd - p1 == 2 || p1[2] == '/' || p1[2] == '\\'))
774  {
775  m_eScheme = INetProtocol::File; // 8th, 9th
776  eMechanism = EncodeMechanism::All;
777  nFragmentDelimiter = 0x80000000;
778  }
779  else if (pEnd - p1 >= 2 && p1[0] == '/' && p1[1] == '/')
780  {
781  p1 += 2;
782  if ((scanDomain(p1, pEnd) > 0 || scanIPv6reference(p1, pEnd))
783  && (p1 == pEnd || *p1 == '/'))
784  m_eScheme = INetProtocol::File; // 5th
785  }
786  else if (p1 != pEnd && *p1 == '/')
787  {
788  m_eScheme = INetProtocol::File; // 6th
789  eMechanism = EncodeMechanism::All;
790  nFragmentDelimiter = 0x80000000;
791  }
792  else if (eStyle & FSysStyle::Dos
793  && pEnd - p1 >= 2
794  && p1[0] == '\\'
795  && p1[1] == '\\')
796  {
797  p1 += 2;
798  sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
799  p1, pEnd - p1, '\\');
800  sal_Unicode const * pe = n == -1 ? pEnd : p1 + n;
801  if (
802  parseHostOrNetBiosName(
803  p1, pe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
804  true, nullptr) ||
805  (scanDomain(p1, pe) > 0 && p1 == pe)
806  )
807  {
808  m_eScheme = INetProtocol::File; // 7th
809  eMechanism = EncodeMechanism::All;
810  nFragmentDelimiter = 0x80000000;
811  }
812  }
813  else
814  {
815  sal_Unicode const * pDomainEnd = p1;
816  sal_uInt32 nLabels = scanDomain(pDomainEnd, pEnd);
817  if (nLabels > 0 && pDomainEnd != pEnd && *pDomainEnd == '@')
818  {
819  ++pDomainEnd;
820  if (scanDomain(pDomainEnd, pEnd) > 0
821  && pDomainEnd == pEnd)
822  m_eScheme = INetProtocol::Mailto; // 2nd
823  }
824  else if (nLabels >= 3
825  && (pDomainEnd == pEnd || *pDomainEnd == '/'))
826  m_eScheme
827  = pDomainEnd - p1 >= 4
828  && (p1[0] == 'f' || p1[0] == 'F')
829  && (p1[1] == 't' || p1[1] == 'T')
830  && (p1[2] == 'p' || p1[2] == 'P')
831  && p1[3] == '.' ?
833  }
834  }
835 
836  OUString aSynScheme;
837  if (m_eScheme == INetProtocol::NotValid) {
838  sal_Unicode const * p1 = pPos;
839  aSynScheme = parseScheme(&p1, pEnd, nFragmentDelimiter);
840  if (!aSynScheme.isEmpty())
841  {
842  m_eScheme = INetProtocol::Generic;
843  pPos = p1;
844  }
845  }
846 
847  if (bSmart && m_eScheme == INetProtocol::NotValid && pPos != pEnd
848  && *pPos != nFragmentDelimiter)
849  {
850  m_eScheme = m_eSmartScheme;
851  }
852 
853  if (m_eScheme == INetProtocol::NotValid)
854  {
855  setInvalid();
856  return false;
857  }
858 
859  if (m_eScheme != INetProtocol::Generic) {
860  aSynScheme = OUString::createFromAscii(getSchemeInfo().m_pScheme);
861  }
862  m_aScheme.set(aSynAbsURIRef, aSynScheme, aSynAbsURIRef.getLength());
863  aSynAbsURIRef.append(':');
864  }
865 
866  sal_uInt32 nSegmentDelimiter = '/';
867  sal_uInt32 nAltSegmentDelimiter = 0x80000000;
868  bool bSkippedInitialSlash = false;
869 
870  // Parse //<user>;AUTH=<auth>@<host>:<port> or
871  // //<user>:<password>@<host>:<port> or
872  // //<reg_name>
873  if (getSchemeInfo().m_bAuthority)
874  {
875  sal_Unicode const * pUserInfoBegin = nullptr;
876  sal_Unicode const * pUserInfoEnd = nullptr;
877  sal_Unicode const * pHostPortBegin = nullptr;
878  sal_Unicode const * pHostPortEnd = nullptr;
879 
880  switch (m_eScheme)
881  {
883  {
884  if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
885  {
886  setInvalid();
887  return false;
888  }
889  aSynAbsURIRef.append("//");
890  OUStringBuffer aSynAuthority;
891  while (pPos < pEnd
892  && *pPos != '/' && *pPos != '?'
893  && *pPos != nFragmentDelimiter)
894  {
895  EscapeType eEscapeType;
896  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
897  eMechanism,
898  eCharset, eEscapeType);
899  appendUCS4(aSynAuthority, nUTF32, eEscapeType,
900  PART_AUTHORITY, eCharset, false);
901  }
902  m_aHost.set(aSynAbsURIRef,
903  aSynAuthority.makeStringAndClear(),
904  aSynAbsURIRef.getLength());
905  // misusing m_aHost to store the authority
906  break;
907  }
908 
910  {
911  if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
912  {
913  pPos += 2;
914  aSynAbsURIRef.append("//");
915  OUStringBuffer aSynAuthority;
916  while (pPos < pEnd
917  && *pPos != '/' && *pPos != '?'
918  && *pPos != nFragmentDelimiter)
919  {
920  EscapeType eEscapeType;
921  sal_uInt32 nUTF32 = getUTF32(pPos,
922  pEnd,
923  eMechanism,
924  eCharset,
925  eEscapeType);
926  appendUCS4(aSynAuthority,
927  nUTF32,
928  eEscapeType,
929  PART_AUTHORITY,
930  eCharset,
931  false);
932  }
933  if (aSynAuthority.isEmpty())
934  {
935  setInvalid();
936  return false;
937  }
938  m_aHost.set(aSynAbsURIRef,
939  aSynAuthority.makeStringAndClear(),
940  aSynAbsURIRef.getLength());
941  // misusing m_aHost to store the authority
942  }
943  break;
944  }
945 
947  case INetProtocol::Cmis:
948  {
949  if (pEnd - pPos < 2 || *pPos++ != '/' || *pPos++ != '/')
950  {
951  setInvalid();
952  return false;
953  }
954  aSynAbsURIRef.append("//");
955  OUStringBuffer aSynUser(128);
956 
957  bool bHasUser = false;
958  while (pPos < pEnd && *pPos != '@'
959  && *pPos != '/' && *pPos != '?'
960  && *pPos != nFragmentDelimiter)
961  {
962  EscapeType eEscapeType;
963  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
964  eMechanism,
965  eCharset, eEscapeType);
966  appendUCS4(aSynUser, nUTF32, eEscapeType,
967  PART_USER_PASSWORD, eCharset, false);
968 
969  bHasUser = *pPos == '@';
970  }
971 
972  OUStringBuffer aSynAuthority(64);
973  if ( !bHasUser )
974  {
975  aSynAuthority = aSynUser;
976  }
977  else
978  {
979  m_aUser.set(aSynAbsURIRef,
980  aSynUser.makeStringAndClear(),
981  aSynAbsURIRef.getLength());
982  aSynAbsURIRef.append("@");
983  ++pPos;
984 
985  while (pPos < pEnd
986  && *pPos != '/' && *pPos != '?'
987  && *pPos != nFragmentDelimiter)
988  {
989  EscapeType eEscapeType;
990  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
991  eMechanism,
992  eCharset, eEscapeType);
993  appendUCS4(aSynAuthority, nUTF32, eEscapeType,
994  PART_AUTHORITY, eCharset, false);
995  }
996  }
997  if (aSynAuthority.isEmpty())
998  {
999  setInvalid();
1000  return false;
1001  }
1002  m_aHost.set(aSynAbsURIRef,
1003  aSynAuthority.makeStringAndClear(),
1004  aSynAbsURIRef.getLength());
1005  // misusing m_aHost to store the authority
1006  break;
1007  }
1008 
1009  case INetProtocol::File:
1010  if (bSmart)
1011  {
1012  // The first of the following seven productions that
1013  // matches the rest of the input string (and for which the
1014  // appropriate style bit is set in eStyle, if applicable)
1015  // determines the used notation. The productions use the
1016  // auxiliary rules
1017 
1018  // domain = label *("." label)
1019  // label = alphanum [*(alphanum / "-") alphanum]
1020  // alphanum = ALPHA / DIGIT
1021  // IPv6reference = "[" IPv6address "]"
1022  // IPv6address = hexpart [":" IPv4address]
1023  // IPv4address = 1*3DIGIT 3("." 1*3DIGIT)
1024  // hexpart = (hexseq ["::" [hexseq]]) / ("::" [hexseq])
1025  // hexseq = hex4 *(":" hex4)
1026  // hex4 = 1*4HEXDIG
1027  // path = <any UCS4 character except "#">
1028  // UCS4 = <any UCS4 character>
1029 
1030  // 1st Production (URL):
1031  // "//" [domain / IPv6reference] ["/" *path]
1032  // ["#" *UCS4]
1033  // becomes
1034  // "file://" domain "/" *path ["#" *UCS4]
1035  if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1036  {
1037  sal_Unicode const * p1 = pPos + 2;
1038  while (p1 != pEnd && *p1 != '/' &&
1039  *p1 != nFragmentDelimiter)
1040  {
1041  ++p1;
1042  }
1043  if (parseHostOrNetBiosName(
1044  pPos + 2, p1, EncodeMechanism::All,
1045  RTL_TEXTENCODING_DONTKNOW, true, nullptr))
1046  {
1047  aSynAbsURIRef.append("//");
1048  pHostPortBegin = pPos + 2;
1049  pHostPortEnd = p1;
1050  pPos = p1;
1051  break;
1052  }
1053  }
1054 
1055  // 2nd Production (MS IE generated 1; FSysStyle::Dos only):
1056  // "//" ALPHA ":" ["/" *path] ["#" *UCS4]
1057  // becomes
1058  // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1059  // replacing "\" by "/" within <*path>
1060  // 3rd Production (MS IE generated 2; FSysStyle::Dos only):
1061  // "//" ALPHA ":" ["\" *path] ["#" *UCS4]
1062  // becomes
1063  // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1064  // replacing "\" by "/" within <*path>
1065  // 4th Production (miscounted slashes):
1066  // "//" *path ["#" *UCS4]
1067  // becomes
1068  // "file:///" *path ["#" *UCS4]
1069  if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1070  {
1071  aSynAbsURIRef.append("//");
1072  pPos += 2;
1073  bSkippedInitialSlash = true;
1074  if ((eStyle & FSysStyle::Dos)
1075  && pEnd - pPos >= 2
1076  && rtl::isAsciiAlpha(pPos[0])
1077  && pPos[1] == ':'
1078  && (pEnd - pPos == 2
1079  || pPos[2] == '/' || pPos[2] == '\\'))
1080  nAltSegmentDelimiter = '\\';
1081  break;
1082  }
1083 
1084  // 5th Production (Unix):
1085  // "/" *path ["#" *UCS4]
1086  // becomes
1087  // "file:///" *path ["#" *UCS4]
1088  if (pPos < pEnd && *pPos == '/')
1089  {
1090  aSynAbsURIRef.append("//");
1091  break;
1092  }
1093 
1094  // 6th Production (UNC; FSysStyle::Dos only):
1095  // "\\" domain ["\" *path] ["#" *UCS4]
1096  // becomes
1097  // "file://" domain "/" *path ["#" *UCS4]
1098  // replacing "\" by "/" within <*path>
1099  if (eStyle & FSysStyle::Dos
1100  && pEnd - pPos >= 2
1101  && pPos[0] == '\\'
1102  && pPos[1] == '\\')
1103  {
1104  sal_Unicode const * p1 = pPos + 2;
1105  sal_Unicode const * pe = p1;
1106  while (pe < pEnd && *pe != '\\' &&
1107  *pe != nFragmentDelimiter)
1108  {
1109  ++pe;
1110  }
1111  if (
1112  parseHostOrNetBiosName(
1113  p1, pe, EncodeMechanism::All,
1114  RTL_TEXTENCODING_DONTKNOW, true, nullptr) ||
1115  (scanDomain(p1, pe) > 0 && p1 == pe)
1116  )
1117  {
1118  aSynAbsURIRef.append("//");
1119  pHostPortBegin = pPos + 2;
1120  pHostPortEnd = pe;
1121  pPos = pe;
1122  nSegmentDelimiter = '\\';
1123  break;
1124  }
1125  }
1126 
1127  // 7th Production (Unix-like DOS; FSysStyle::Dos only):
1128  // ALPHA ":" ["/" *path] ["#" *UCS4]
1129  // becomes
1130  // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1131  // replacing "\" by "/" within <*path>
1132  // 8th Production (DOS; FSysStyle::Dos only):
1133  // ALPHA ":" ["\" *path] ["#" *UCS4]
1134  // becomes
1135  // "file:///" ALPHA ":" ["/" *path] ["#" *UCS4]
1136  // replacing "\" by "/" within <*path>
1137  if (eStyle & FSysStyle::Dos
1138  && pEnd - pPos >= 2
1139  && rtl::isAsciiAlpha(pPos[0])
1140  && pPos[1] == ':'
1141  && (pEnd - pPos == 2
1142  || pPos[2] == '/'
1143  || pPos[2] == '\\'))
1144  {
1145  aSynAbsURIRef.append("//");
1146  nAltSegmentDelimiter = '\\';
1147  bSkippedInitialSlash = true;
1148  break;
1149  }
1150 
1151  // 9th Production (any):
1152  // *path ["#" *UCS4]
1153  // becomes
1154  // "file:///" *path ["#" *UCS4]
1155  // replacing the delimiter by "/" within <*path>. The
1156  // delimiter is that character from the set { "/", "\"}
1157  // which appears most often in <*path> (if FSysStyle::Unix
1158  // is not among the style bits, "/" is removed from the
1159  // set; if FSysStyle::Dos is not among the style bits, "\" is
1160  // removed from the set). If two or
1161  // more characters appear the same number of times, the
1162  // character mentioned first in that set is chosen. If
1163  // the first character of <*path> is the delimiter, that
1164  // character is not copied
1165  if (eStyle & (FSysStyle::Unix | FSysStyle::Dos))
1166  {
1167  aSynAbsURIRef.append("//");
1168  switch (guessFSysStyleByCounting(pPos, pEnd, eStyle))
1169  {
1170  case FSysStyle::Unix:
1171  nSegmentDelimiter = '/';
1172  break;
1173 
1174  case FSysStyle::Dos:
1175  nSegmentDelimiter = '\\';
1176  break;
1177 
1178  default:
1179  OSL_FAIL(
1180  "INetURLObject::setAbsURIRef():"
1181  " Bad guessFSysStyleByCounting");
1182  break;
1183  }
1184  bSkippedInitialSlash
1185  = pPos != pEnd && *pPos != nSegmentDelimiter;
1186  break;
1187  }
1188  }
1189  [[fallthrough]];
1190  default:
1191  {
1192  // For INetProtocol::File, allow an empty authority ("//") to be
1193  // missing if the following path starts with an explicit "/"
1194  // (Java is notorious in generating such file URLs, so be
1195  // liberal here):
1196  if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
1197  pPos += 2;
1198  else if (!bSmart
1199  && !(m_eScheme == INetProtocol::File
1200  && pPos != pEnd && *pPos == '/'))
1201  {
1202  setInvalid();
1203  return false;
1204  }
1205  aSynAbsURIRef.append("//");
1206 
1207  sal_Unicode const * pAuthority = pPos;
1208  sal_uInt32 c = getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1209  while (pPos < pEnd && *pPos != '/' && *pPos != c
1210  && *pPos != nFragmentDelimiter)
1211  ++pPos;
1212  if (getSchemeInfo().m_bUser)
1213  if (getSchemeInfo().m_bHost)
1214  {
1215  sal_Unicode const * p1 = pAuthority;
1216  while (p1 < pPos && *p1 != '@')
1217  ++p1;
1218  if (p1 == pPos)
1219  {
1220  pHostPortBegin = pAuthority;
1221  pHostPortEnd = pPos;
1222  }
1223  else
1224  {
1225  pUserInfoBegin = pAuthority;
1226  pUserInfoEnd = p1;
1227  pHostPortBegin = p1 + 1;
1228  pHostPortEnd = pPos;
1229  }
1230  }
1231  else
1232  {
1233  pUserInfoBegin = pAuthority;
1234  pUserInfoEnd = pPos;
1235  }
1236  else if (getSchemeInfo().m_bHost)
1237  {
1238  pHostPortBegin = pAuthority;
1239  pHostPortEnd = pPos;
1240  }
1241  else if (pPos != pAuthority)
1242  {
1243  setInvalid();
1244  return false;
1245  }
1246  break;
1247  }
1248  }
1249 
1250  if (pUserInfoBegin)
1251  {
1252  Part ePart = PART_USER_PASSWORD;
1253  bool bSupportsPassword = getSchemeInfo().m_bPassword;
1254  bool bSupportsAuth
1255  = !bSupportsPassword && getSchemeInfo().m_bAuth;
1256  bool bHasAuth = false;
1257  OUStringBuffer aSynUser;
1258  sal_Unicode const * p1 = pUserInfoBegin;
1259  while (p1 < pUserInfoEnd)
1260  {
1261  EscapeType eEscapeType;
1262  sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1263  eMechanism, eCharset, eEscapeType);
1264  if (eEscapeType == EscapeType::NONE)
1265  {
1266  if (nUTF32 == ':' && bSupportsPassword)
1267  {
1268  bHasAuth = true;
1269  break;
1270  }
1271  else if (nUTF32 == ';' && bSupportsAuth
1272  && pUserInfoEnd - p1
1273  > RTL_CONSTASCII_LENGTH("auth=")
1275  p1,
1276  p1 + RTL_CONSTASCII_LENGTH("auth="),
1277  "auth="))
1278  {
1279  p1 += RTL_CONSTASCII_LENGTH("auth=");
1280  bHasAuth = true;
1281  break;
1282  }
1283  }
1284  appendUCS4(aSynUser, nUTF32, eEscapeType, ePart,
1285  eCharset, false);
1286  }
1287  m_aUser.set(aSynAbsURIRef, aSynUser.makeStringAndClear(),
1288  aSynAbsURIRef.getLength());
1289  if (bHasAuth)
1290  {
1291  if (bSupportsPassword)
1292  {
1293  aSynAbsURIRef.append(':');
1294  OUStringBuffer aSynAuth;
1295  while (p1 < pUserInfoEnd)
1296  {
1297  EscapeType eEscapeType;
1298  sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1299  eMechanism, eCharset,
1300  eEscapeType);
1301  appendUCS4(aSynAuth, nUTF32, eEscapeType,
1302  ePart, eCharset, false);
1303  }
1304  m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1305  aSynAbsURIRef.getLength());
1306  }
1307  else
1308  {
1309  aSynAbsURIRef.append(";AUTH=");
1310  OUStringBuffer aSynAuth;
1311  while (p1 < pUserInfoEnd)
1312  {
1313  EscapeType eEscapeType;
1314  sal_uInt32 nUTF32 = getUTF32(p1, pUserInfoEnd,
1315  eMechanism, eCharset,
1316  eEscapeType);
1317  if (!INetMIME::isIMAPAtomChar(nUTF32))
1318  {
1319  setInvalid();
1320  return false;
1321  }
1322  appendUCS4(aSynAuth, nUTF32, eEscapeType,
1323  ePart, eCharset, false);
1324  }
1325  m_aAuth.set(aSynAbsURIRef, aSynAuth.makeStringAndClear(),
1326  aSynAbsURIRef.getLength());
1327  }
1328  }
1329  if (pHostPortBegin)
1330  aSynAbsURIRef.append('@');
1331  }
1332 
1333  if (pHostPortBegin)
1334  {
1335  sal_Unicode const * pPort = pHostPortEnd;
1336  if ( getSchemeInfo().m_bPort && pHostPortBegin < pHostPortEnd )
1337  {
1338  sal_Unicode const * p1 = pHostPortEnd - 1;
1339  while (p1 > pHostPortBegin && rtl::isAsciiDigit(*p1))
1340  --p1;
1341  if (*p1 == ':')
1342  pPort = p1;
1343  }
1344  bool bNetBiosName = false;
1345  switch (m_eScheme)
1346  {
1347  case INetProtocol::File:
1348  // If the host equals "LOCALHOST" (unencoded and ignoring
1349  // case), turn it into an empty host:
1350  if (INetMIME::equalIgnoreCase(pHostPortBegin, pPort,
1351  "localhost"))
1352  pHostPortBegin = pPort;
1353  bNetBiosName = true;
1354  break;
1355 
1356  case INetProtocol::Ldap:
1357  case INetProtocol::Smb:
1358  if (pHostPortBegin == pPort && pPort != pHostPortEnd)
1359  {
1360  setInvalid();
1361  return false;
1362  }
1363  break;
1364  default:
1365  if (pHostPortBegin == pPort)
1366  {
1367  setInvalid();
1368  return false;
1369  }
1370  break;
1371  }
1372  OUStringBuffer aSynHost(64);
1373  if (!parseHostOrNetBiosName(
1374  pHostPortBegin, pPort, eMechanism, eCharset,
1375  bNetBiosName, &aSynHost))
1376  {
1377  setInvalid();
1378  return false;
1379  }
1380  m_aHost.set(aSynAbsURIRef, aSynHost.makeStringAndClear(),
1381  aSynAbsURIRef.getLength());
1382  if (pPort != pHostPortEnd)
1383  {
1384  aSynAbsURIRef.append(':');
1385  m_aPort.set(aSynAbsURIRef,
1386  OUString(pPort + 1, pHostPortEnd - (pPort + 1)),
1387  aSynAbsURIRef.getLength());
1388  }
1389  }
1390  }
1391 
1392  // Parse <path>
1393  OUStringBuffer aSynPath;
1394  if (!parsePath(m_eScheme, &pPos, pEnd, eMechanism, eCharset,
1395  bSkippedInitialSlash, nSegmentDelimiter,
1396  nAltSegmentDelimiter,
1397  getSchemeInfo().m_bQuery ? '?' : 0x80000000,
1398  nFragmentDelimiter, aSynPath))
1399  {
1400  setInvalid();
1401  return false;
1402  }
1403  m_aPath.set(aSynAbsURIRef, aSynPath.makeStringAndClear(),
1404  aSynAbsURIRef.getLength());
1405 
1406  // Parse ?<query>
1407  if (getSchemeInfo().m_bQuery && pPos < pEnd && *pPos == '?')
1408  {
1409  aSynAbsURIRef.append('?');
1410  OUStringBuffer aSynQuery;
1411  for (++pPos; pPos < pEnd && *pPos != nFragmentDelimiter;)
1412  {
1413  EscapeType eEscapeType;
1414  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1415  eMechanism, eCharset, eEscapeType);
1416  appendUCS4(aSynQuery, nUTF32, eEscapeType,
1417  PART_URIC, eCharset, true);
1418  }
1419  m_aQuery.set(aSynAbsURIRef, aSynQuery.makeStringAndClear(),
1420  aSynAbsURIRef.getLength());
1421  }
1422 
1423  // Parse #<fragment>
1424  if (pPos < pEnd && *pPos == nFragmentDelimiter)
1425  {
1426  aSynAbsURIRef.append(sal_Unicode(nFragmentDelimiter));
1427  OUStringBuffer aSynFragment;
1428  for (++pPos; pPos < pEnd;)
1429  {
1430  EscapeType eEscapeType;
1431  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
1432  eMechanism, eCharset, eEscapeType);
1433  appendUCS4(aSynFragment, nUTF32, eEscapeType, PART_URIC,
1434  eCharset, true);
1435  }
1436  m_aFragment.set(aSynAbsURIRef, aSynFragment.makeStringAndClear(),
1437  aSynAbsURIRef.getLength());
1438  }
1439 
1440  if (pPos != pEnd)
1441  {
1442  setInvalid();
1443  return false;
1444  }
1445 
1446  m_aAbsURIRef = aSynAbsURIRef;
1447 
1448  return true;
1449 }
1450 
1452  OUString aTmpStr=m_aAbsURIRef.toString();
1453  m_aAbsURIRef.setLength(0);
1454  int oldSchemeLen = 0;
1455  if (m_eScheme == INetProtocol::Generic)
1456  oldSchemeLen = m_aScheme.getLength();
1457  else
1458  oldSchemeLen = strlen(getSchemeInfo().m_pScheme);
1459  m_eScheme=eTargetScheme;
1460  int newSchemeLen=strlen(getSchemeInfo().m_pScheme);
1461  m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1462  m_aAbsURIRef.append(aTmpStr.getStr()+oldSchemeLen);
1463  int delta=newSchemeLen-oldSchemeLen;
1464  m_aUser+=delta;
1465  m_aAuth+=delta;
1466  m_aHost+=delta;
1467  m_aPort+=delta;
1468  m_aPath+=delta;
1469  m_aQuery+=delta;
1470  m_aFragment+=delta;
1471 }
1472 
1473 bool INetURLObject::convertRelToAbs(OUString const & rTheRelURIRef,
1474  INetURLObject & rTheAbsURIRef,
1475  bool & rWasAbsolute,
1476  EncodeMechanism eMechanism,
1477  rtl_TextEncoding eCharset,
1478  bool bIgnoreFragment, bool bSmart,
1479  bool bRelativeNonURIs, FSysStyle eStyle)
1480  const
1481 {
1482  sal_Unicode const * p = rTheRelURIRef.getStr();
1483  sal_Unicode const * pEnd = p + rTheRelURIRef.getLength();
1484 
1485  sal_Unicode const * pPrefixBegin = p;
1486  PrefixInfo const * pPrefix = getPrefix(pPrefixBegin, pEnd);
1487  bool hasScheme = pPrefix != nullptr;
1488  if (!hasScheme) {
1489  pPrefixBegin = p;
1490  hasScheme = !parseScheme(&pPrefixBegin, pEnd, '#').isEmpty();
1491  }
1492 
1493  sal_uInt32 nSegmentDelimiter = '/';
1494  sal_uInt32 nQueryDelimiter
1495  = !bSmart || getSchemeInfo().m_bQuery ? '?' : 0x80000000;
1496  sal_uInt32 nFragmentDelimiter = '#';
1497  Part ePart = PART_VISIBLE;
1498 
1499  if (!hasScheme && bSmart)
1500  {
1501  // If the input matches any of the following productions (for which
1502  // the appropriate style bit is set in eStyle), it is assumed to be an
1503  // absolute file system path, rather than a relative URI reference.
1504  // (This is only a subset of the productions used for scheme detection
1505  // in INetURLObject::setAbsURIRef(), because most of those productions
1506  // interfere with the syntax of relative URI references.) The
1507  // productions use the auxiliary rules
1508 
1509  // domain = label *("." label)
1510  // label = alphanum [*(alphanum / "-") alphanum]
1511  // alphanum = ALPHA / DIGIT
1512  // UCS4 = <any UCS4 character>
1513 
1514  // 1st Production (UNC file; FSysStyle::Dos only):
1515  // "\\" domain ["\" *UCS4]
1516  // 2nd Production (Unix-like DOS file; FSysStyle::Dos only):
1517  // ALPHA ":" ["/" *UCS4]
1518  // 3rd Production (DOS file; FSysStyle::Dos only):
1519  // ALPHA ":" ["\" *UCS4]
1520  if (eStyle & FSysStyle::Dos)
1521  {
1522  bool bFSys = false;
1523  sal_Unicode const * q = p;
1524  if (pEnd - q >= 2
1525  && rtl::isAsciiAlpha(q[0])
1526  && q[1] == ':'
1527  && (pEnd - q == 2 || q[2] == '/' || q[2] == '\\'))
1528  bFSys = true; // 2nd, 3rd
1529  else if (pEnd - q >= 2 && q[0] == '\\' && q[1] == '\\')
1530  {
1531  q += 2;
1532  sal_Int32 n = rtl_ustr_indexOfChar_WithLength(
1533  q, pEnd - q, '\\');
1534  sal_Unicode const * qe = n == -1 ? pEnd : q + n;
1535  if (parseHostOrNetBiosName(
1536  q, qe, EncodeMechanism::All, RTL_TEXTENCODING_DONTKNOW,
1537  true, nullptr))
1538  {
1539  bFSys = true; // 1st
1540  }
1541  }
1542  if (bFSys)
1543  {
1544  INetURLObject aNewURI;
1545  aNewURI.setAbsURIRef(rTheRelURIRef, eMechanism,
1546  eCharset, true, eStyle);
1547  if (!aNewURI.HasError())
1548  {
1549  rTheAbsURIRef = aNewURI;
1550  rWasAbsolute = true;
1551  return true;
1552  }
1553  }
1554  }
1555 
1556  // When the base URL is a file URL, accept relative file system paths
1557  // using "\" or ":" as delimiter (and ignoring URI conventions for "%"
1558  // and "#"), as well as relative URIs using "/" as delimiter:
1559  if (m_eScheme == INetProtocol::File)
1560  switch (guessFSysStyleByCounting(p, pEnd, eStyle))
1561  {
1562  case FSysStyle::Unix:
1563  nSegmentDelimiter = '/';
1564  break;
1565 
1566  case FSysStyle::Dos:
1567  nSegmentDelimiter = '\\';
1568  bRelativeNonURIs = true;
1569  break;
1570 
1571  default:
1572  OSL_FAIL("INetURLObject::convertRelToAbs():"
1573  " Bad guessFSysStyleByCounting");
1574  break;
1575  }
1576 
1577  if (bRelativeNonURIs)
1578  {
1579  eMechanism = EncodeMechanism::All;
1580  nQueryDelimiter = 0x80000000;
1581  nFragmentDelimiter = 0x80000000;
1582  ePart = PART_VISIBLE_NONSPECIAL;
1583  }
1584  }
1585 
1586  // If the relative URI has the same scheme as the base URI, and that
1587  // scheme is hierarchical, then ignore its presence in the relative
1588  // URI in order to be backward compatible (cf. RFC 2396 section 5.2
1589  // step 3):
1590  if (pPrefix && pPrefix->m_eScheme == m_eScheme
1591  && getSchemeInfo().m_bHierarchical)
1592  {
1593  hasScheme = false;
1594  while (p != pEnd && *p++ != ':') ;
1595  }
1596  rWasAbsolute = hasScheme;
1597 
1598  // Fast solution for non-relative URIs:
1599  if (hasScheme)
1600  {
1601  INetURLObject aNewURI(rTheRelURIRef, eMechanism, eCharset);
1602  if (aNewURI.HasError())
1603  {
1604  rWasAbsolute = false;
1605  return false;
1606  }
1607 
1608  if (bIgnoreFragment)
1609  aNewURI.clearFragment();
1610  rTheAbsURIRef = aNewURI;
1611  return true;
1612  }
1613 
1614  enum State { STATE_AUTH, STATE_ABS_PATH, STATE_REL_PATH, STATE_FRAGMENT,
1615  STATE_DONE };
1616 
1617  OUStringBuffer aSynAbsURIRef(128);
1618  // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
1619  // is empty ("") in that case, so take the scheme from m_aAbsURIRef
1620  if (m_eScheme != INetProtocol::Generic)
1621  {
1622  aSynAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
1623  }
1624  else
1625  {
1626  sal_Unicode const * pSchemeBegin
1627  = m_aAbsURIRef.getStr();
1628  sal_Unicode const * pSchemeEnd = pSchemeBegin;
1629  while (pSchemeEnd[0] != ':')
1630  {
1631  ++pSchemeEnd;
1632  }
1633  aSynAbsURIRef.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
1634  }
1635  aSynAbsURIRef.append(':');
1636 
1637  State eState = STATE_AUTH;
1638  bool bSameDoc = true;
1639 
1640  if (getSchemeInfo().m_bAuthority)
1641  {
1642  if (pEnd - p >= 2 && p[0] == '/' && p[1] == '/')
1643  {
1644  aSynAbsURIRef.append("//");
1645  p += 2;
1646  eState = STATE_ABS_PATH;
1647  bSameDoc = false;
1648  while (p != pEnd)
1649  {
1650  EscapeType eEscapeType;
1651  sal_uInt32 nUTF32
1652  = getUTF32(p, pEnd, eMechanism,
1653  eCharset, eEscapeType);
1654  if (eEscapeType == EscapeType::NONE)
1655  {
1656  if (nUTF32 == nSegmentDelimiter)
1657  break;
1658  else if (nUTF32 == nFragmentDelimiter)
1659  {
1660  eState = STATE_FRAGMENT;
1661  break;
1662  }
1663  }
1664  appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1665  PART_VISIBLE, eCharset, true);
1666  }
1667  }
1668  else
1669  {
1670  SubString aAuthority(getAuthority());
1671  aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1672  + aAuthority.getBegin(),
1673  aAuthority.getLength());
1674  }
1675  }
1676 
1677  if (eState == STATE_AUTH)
1678  {
1679  if (p == pEnd)
1680  eState = STATE_DONE;
1681  else if (*p == nFragmentDelimiter)
1682  {
1683  ++p;
1684  eState = STATE_FRAGMENT;
1685  }
1686  else if (*p == nSegmentDelimiter)
1687  {
1688  ++p;
1689  eState = STATE_ABS_PATH;
1690  bSameDoc = false;
1691  }
1692  else
1693  {
1694  eState = STATE_REL_PATH;
1695  bSameDoc = false;
1696  }
1697  }
1698 
1699  if (eState == STATE_ABS_PATH)
1700  {
1701  aSynAbsURIRef.append('/');
1702  eState = STATE_DONE;
1703  while (p != pEnd)
1704  {
1705  EscapeType eEscapeType;
1706  sal_uInt32 nUTF32
1707  = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1708  if (eEscapeType == EscapeType::NONE)
1709  {
1710  if (nUTF32 == nFragmentDelimiter)
1711  {
1712  eState = STATE_FRAGMENT;
1713  break;
1714  }
1715  else if (nUTF32 == nSegmentDelimiter)
1716  nUTF32 = '/';
1717  }
1718  appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1719  eCharset, true);
1720  }
1721  }
1722  else if (eState == STATE_REL_PATH)
1723  {
1724  if (!getSchemeInfo().m_bHierarchical)
1725  {
1726  // Detect cases where a relative input could not be made absolute
1727  // because the given base URL is broken (most probably because it is
1728  // empty):
1729  SAL_WARN_IF(
1730  HasError(), "tools.urlobj",
1731  "cannot make <" << rTheRelURIRef
1732  << "> absolute against broken base <"
1733  << GetMainURL(DecodeMechanism::NONE) << ">");
1734  rWasAbsolute = false;
1735  return false;
1736  }
1737 
1738  sal_Unicode const * pBasePathBegin
1739  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1740  sal_Unicode const * pBasePathEnd
1741  = pBasePathBegin + m_aPath.getLength();
1742  while (pBasePathEnd != pBasePathBegin)
1743  if (*(--pBasePathEnd) == '/')
1744  {
1745  ++pBasePathEnd;
1746  break;
1747  }
1748 
1749  sal_Int32 nPathBegin = aSynAbsURIRef.getLength();
1750  aSynAbsURIRef.append(pBasePathBegin, pBasePathEnd - pBasePathBegin);
1751  DBG_ASSERT(aSynAbsURIRef.getLength() > nPathBegin
1752  && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1753  "INetURLObject::convertRelToAbs(): Bad base path");
1754 
1755  while (p != pEnd && *p != nQueryDelimiter && *p != nFragmentDelimiter)
1756  {
1757  if (*p == '.')
1758  {
1759  if (pEnd - p == 1
1760  || p[1] == nSegmentDelimiter
1761  || p[1] == nQueryDelimiter
1762  || p[1] == nFragmentDelimiter)
1763  {
1764  ++p;
1765  if (p != pEnd && *p == nSegmentDelimiter)
1766  ++p;
1767  continue;
1768  }
1769  else if (pEnd - p >= 2
1770  && p[1] == '.'
1771  && (pEnd - p == 2
1772  || p[2] == nSegmentDelimiter
1773  || p[2] == nQueryDelimiter
1774  || p[2] == nFragmentDelimiter)
1775  && aSynAbsURIRef.getLength() - nPathBegin > 1)
1776  {
1777  p += 2;
1778  if (p != pEnd && *p == nSegmentDelimiter)
1779  ++p;
1780 
1781  sal_Int32 i = aSynAbsURIRef.getLength() - 2;
1782  while (i > nPathBegin && aSynAbsURIRef[i] != '/')
1783  --i;
1784  aSynAbsURIRef.setLength(i + 1);
1785  DBG_ASSERT(
1786  aSynAbsURIRef.getLength() > nPathBegin
1787  && aSynAbsURIRef[aSynAbsURIRef.getLength() - 1] == '/',
1788  "INetURLObject::convertRelToAbs(): Bad base path");
1789  continue;
1790  }
1791  }
1792 
1793  while (p != pEnd
1794  && *p != nSegmentDelimiter
1795  && *p != nQueryDelimiter
1796  && *p != nFragmentDelimiter)
1797  {
1798  EscapeType eEscapeType;
1799  sal_uInt32 nUTF32
1800  = getUTF32(p, pEnd, eMechanism,
1801  eCharset, eEscapeType);
1802  appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1803  eCharset, true);
1804  }
1805  if (p != pEnd && *p == nSegmentDelimiter)
1806  {
1807  aSynAbsURIRef.append('/');
1808  ++p;
1809  }
1810  }
1811 
1812  while (p != pEnd && *p != nFragmentDelimiter)
1813  {
1814  EscapeType eEscapeType;
1815  sal_uInt32 nUTF32
1816  = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1817  appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType, ePart,
1818  eCharset, true);
1819  }
1820 
1821  if (p == pEnd)
1822  eState = STATE_DONE;
1823  else
1824  {
1825  ++p;
1826  eState = STATE_FRAGMENT;
1827  }
1828  }
1829  else if (bSameDoc)
1830  {
1831  aSynAbsURIRef.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
1832  m_aPath.getLength());
1833  if (m_aQuery.isPresent())
1834  aSynAbsURIRef.append(m_aAbsURIRef.getStr()
1835  + m_aQuery.getBegin() - 1,
1836  m_aQuery.getLength() + 1);
1837  }
1838 
1839  if (eState == STATE_FRAGMENT && !bIgnoreFragment)
1840  {
1841  aSynAbsURIRef.append('#');
1842  while (p != pEnd)
1843  {
1844  EscapeType eEscapeType;
1845  sal_uInt32 nUTF32
1846  = getUTF32(p, pEnd, eMechanism, eCharset, eEscapeType);
1847  appendUCS4(aSynAbsURIRef, nUTF32, eEscapeType,
1848  PART_VISIBLE, eCharset, true);
1849  }
1850  }
1851 
1852  INetURLObject aNewURI(aSynAbsURIRef.makeStringAndClear());
1853  if (aNewURI.HasError())
1854  {
1855  // Detect cases where a relative input could not be made absolute
1856  // because the given base URL is broken (most probably because it is
1857  // empty):
1858  SAL_WARN_IF(
1859  HasError(), "tools.urlobj",
1860  "cannot make <" << rTheRelURIRef
1861  << "> absolute against broken base <" << GetMainURL(DecodeMechanism::NONE)
1862  << ">");
1863  rWasAbsolute = false;
1864  return false;
1865  }
1866 
1867  rTheAbsURIRef = aNewURI;
1868  return true;
1869 }
1870 
1871 bool INetURLObject::convertAbsToRel(OUString const & rTheAbsURIRef,
1872  OUString & rTheRelURIRef,
1873  EncodeMechanism eEncodeMechanism,
1874  DecodeMechanism eDecodeMechanism,
1875  rtl_TextEncoding eCharset,
1876  FSysStyle eStyle) const
1877 {
1878  // Check for hierarchical base URL:
1879  if (!getSchemeInfo().m_bHierarchical)
1880  {
1881  rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1882  return false;
1883  }
1884 
1885  // Convert the input (absolute or relative URI ref) to an absolute URI
1886  // ref:
1887  INetURLObject aSubject;
1888  bool bWasAbsolute;
1889  if (!convertRelToAbs(rTheAbsURIRef, aSubject, bWasAbsolute,
1890  eEncodeMechanism, eCharset, false, false, false,
1891  eStyle))
1892  {
1893  rTheRelURIRef = decode(rTheAbsURIRef, eDecodeMechanism, eCharset);
1894  return false;
1895  }
1896 
1897  // Check for differing scheme or authority parts:
1898  if ((m_aScheme.compare(
1899  aSubject.m_aScheme, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1900  != 0)
1901  || (m_aUser.compare(
1902  aSubject.m_aUser, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1903  != 0)
1904  || (m_aAuth.compare(
1905  aSubject.m_aAuth, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1906  != 0)
1907  || (m_aHost.compare(
1908  aSubject.m_aHost, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1909  != 0)
1910  || (m_aPort.compare(
1911  aSubject.m_aPort, m_aAbsURIRef, aSubject.m_aAbsURIRef)
1912  != 0))
1913  {
1914  rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1915  return false;
1916  }
1917 
1918  sal_Unicode const * pBasePathBegin
1919  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
1920  sal_Unicode const * pBasePathEnd = pBasePathBegin + m_aPath.getLength();
1921  sal_Unicode const * pSubjectPathBegin
1922  = aSubject.m_aAbsURIRef.getStr() + aSubject.m_aPath.getBegin();
1923  sal_Unicode const * pSubjectPathEnd
1924  = pSubjectPathBegin + aSubject.m_aPath.getLength();
1925 
1926  // Make nMatch point past the last matching slash, or past the end of the
1927  // paths, in case they are equal:
1928  sal_Unicode const * pSlash = nullptr;
1929  sal_Unicode const * p1 = pBasePathBegin;
1930  sal_Unicode const * p2 = pSubjectPathBegin;
1931  for (;;)
1932  {
1933  if (p1 == pBasePathEnd || p2 == pSubjectPathEnd)
1934  {
1935  if (p1 == pBasePathEnd && p2 == pSubjectPathEnd)
1936  pSlash = p1;
1937  break;
1938  }
1939 
1940  sal_Unicode c = *p1++;
1941  if (c != *p2++)
1942  break;
1943  if (c == '/')
1944  pSlash = p1;
1945  }
1946  if (!pSlash)
1947  {
1948  // One of the paths does not start with '/':
1949  rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1950  return false;
1951  }
1952  sal_Int32 nMatch = pSlash - pBasePathBegin;
1953 
1954  // If the two URLs are DOS file URLs starting with different volumes
1955  // (e.g., file:///a:/... and file:///b:/...), the subject is not made
1956  // relative (it could be, but some people do not like that):
1957  if (m_eScheme == INetProtocol::File
1958  && nMatch <= 1
1959  && hasDosVolume(eStyle)
1960  && aSubject.hasDosVolume(eStyle)) //TODO! ok to use eStyle for these?
1961  {
1962  rTheRelURIRef = aSubject.GetMainURL(eDecodeMechanism, eCharset);
1963  return false;
1964  }
1965 
1966  // For every slash in the base path after nMatch, a prefix of "../" is
1967  // added to the new relative URL (if the common prefix of the two paths is
1968  // only "/"---but see handling of file URLs above---, the complete subject
1969  // path could go into the new relative URL instead, but some people don't
1970  // like that):
1971  OUStringBuffer aSynRelURIRef;
1972  for (sal_Unicode const * p = pBasePathBegin + nMatch; p != pBasePathEnd;
1973  ++p)
1974  {
1975  if (*p == '/')
1976  aSynRelURIRef.append("../");
1977  }
1978 
1979  // If the new relative URL would start with "//" (i.e., it would be
1980  // mistaken for a relative URL starting with an authority part), or if the
1981  // new relative URL would neither be empty nor start with <"/"> nor start
1982  // with <1*rseg> (i.e., it could be mistaken for an absolute URL starting
1983  // with a scheme part), then the new relative URL is prefixed with "./":
1984  if (aSynRelURIRef.isEmpty())
1985  {
1986  if (pSubjectPathEnd - pSubjectPathBegin >= nMatch + 2
1987  && pSubjectPathBegin[nMatch] == '/'
1988  && pSubjectPathBegin[nMatch + 1] == '/')
1989  {
1990  aSynRelURIRef.append("./");
1991  }
1992  else
1993  {
1994  for (sal_Unicode const * p = pSubjectPathBegin + nMatch;
1995  p != pSubjectPathEnd && *p != '/'; ++p)
1996  {
1997  if (mustEncode(*p, PART_REL_SEGMENT_EXTRA))
1998  {
1999  aSynRelURIRef.append("./");
2000  break;
2001  }
2002  }
2003  }
2004  }
2005 
2006  // The remainder of the subject path, starting at nMatch, is appended to
2007  // the new relative URL:
2008  aSynRelURIRef.append(decode(pSubjectPathBegin + nMatch, pSubjectPathEnd,
2009  eDecodeMechanism, eCharset));
2010 
2011  // If the subject has defined query or fragment parts, they are appended
2012  // to the new relative URL:
2013  if (aSubject.m_aQuery.isPresent())
2014  {
2015  aSynRelURIRef.append('?');
2016  aSynRelURIRef.append(aSubject.decode(aSubject.m_aQuery,
2017  eDecodeMechanism, eCharset));
2018  }
2019  if (aSubject.m_aFragment.isPresent())
2020  {
2021  aSynRelURIRef.append('#');
2022  aSynRelURIRef.append(aSubject.decode(aSubject.m_aFragment,
2023  eDecodeMechanism, eCharset));
2024  }
2025 
2026  rTheRelURIRef = aSynRelURIRef.makeStringAndClear();
2027  return true;
2028 }
2029 
2030 // static
2031 bool INetURLObject::convertIntToExt(OUString const & rTheIntURIRef,
2032  OUString & rTheExtURIRef,
2033  DecodeMechanism eDecodeMechanism,
2034  rtl_TextEncoding eCharset)
2035 {
2036  OUString aSynExtURIRef(encodeText(rTheIntURIRef, PART_VISIBLE,
2037  EncodeMechanism::NotCanonical, eCharset, true));
2038  sal_Unicode const * pBegin = aSynExtURIRef.getStr();
2039  sal_Unicode const * pEnd = pBegin + aSynExtURIRef.getLength();
2040  sal_Unicode const * p = pBegin;
2041  PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2042  bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::Internal;
2043  if (bConvert)
2044  {
2045  aSynExtURIRef =
2046  aSynExtURIRef.replaceAt(0, p - pBegin,
2047  OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2048  }
2049  rTheExtURIRef = decode(aSynExtURIRef, eDecodeMechanism, eCharset);
2050  return bConvert;
2051 }
2052 
2053 // static
2054 bool INetURLObject::convertExtToInt(OUString const & rTheExtURIRef,
2055  OUString & rTheIntURIRef,
2056  DecodeMechanism eDecodeMechanism,
2057  rtl_TextEncoding eCharset)
2058 {
2059  OUString aSynIntURIRef(encodeText(rTheExtURIRef, PART_VISIBLE,
2060  EncodeMechanism::NotCanonical, eCharset, true));
2061  sal_Unicode const * pBegin = aSynIntURIRef.getStr();
2062  sal_Unicode const * pEnd = pBegin + aSynIntURIRef.getLength();
2063  sal_Unicode const * p = pBegin;
2064  PrefixInfo const * pPrefix = getPrefix(p, pEnd);
2065  bool bConvert = pPrefix && pPrefix->m_eKind == PrefixInfo::Kind::External;
2066  if (bConvert)
2067  {
2068  aSynIntURIRef =
2069  aSynIntURIRef.replaceAt(0, p - pBegin,
2070  OUString::createFromAscii(pPrefix->m_pTranslatedPrefix));
2071  }
2072  rTheIntURIRef = decode(aSynIntURIRef, eDecodeMechanism, eCharset);
2073  return bConvert;
2074 }
2075 
2076 // static
2078  sal_Unicode const * pEnd)
2079 {
2080  static PrefixInfo const aMap[]
2081  = { // dummy entry at front needed, because pLast may point here:
2082  { nullptr, nullptr, INetProtocol::NotValid, PrefixInfo::Kind::Internal },
2083  { ".component:", "staroffice.component:", INetProtocol::Component,
2084  PrefixInfo::Kind::Internal },
2085  { ".uno:", "staroffice.uno:", INetProtocol::Uno,
2086  PrefixInfo::Kind::Internal },
2087  { "cid:", nullptr, INetProtocol::Cid, PrefixInfo::Kind::Official },
2088  { "data:", nullptr, INetProtocol::Data, PrefixInfo::Kind::Official },
2089  { "db:", "staroffice.db:", INetProtocol::Db, PrefixInfo::Kind::Internal },
2090  { "file:", nullptr, INetProtocol::File, PrefixInfo::Kind::Official },
2091  { "ftp:", nullptr, INetProtocol::Ftp, PrefixInfo::Kind::Official },
2092  { "hid:", "staroffice.hid:", INetProtocol::Hid,
2093  PrefixInfo::Kind::Internal },
2094  { "http:", nullptr, INetProtocol::Http, PrefixInfo::Kind::Official },
2095  { "https:", nullptr, INetProtocol::Https, PrefixInfo::Kind::Official },
2096  { "javascript:", nullptr, INetProtocol::Javascript, PrefixInfo::Kind::Official },
2097  { "ldap:", nullptr, INetProtocol::Ldap, PrefixInfo::Kind::Official },
2098  { "macro:", "staroffice.macro:", INetProtocol::Macro,
2099  PrefixInfo::Kind::Internal },
2100  { "mailto:", nullptr, INetProtocol::Mailto, PrefixInfo::Kind::Official },
2101  { "private:", "staroffice.private:", INetProtocol::PrivSoffice,
2102  PrefixInfo::Kind::Internal },
2103  { "private:factory/", "staroffice.factory:",
2104  INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal },
2105  { "private:helpid/", "staroffice.helpid:", INetProtocol::PrivSoffice,
2106  PrefixInfo::Kind::Internal },
2107  { "private:java/", "staroffice.java:", INetProtocol::PrivSoffice,
2108  PrefixInfo::Kind::Internal },
2109  { "private:searchfolder:", "staroffice.searchfolder:",
2110  INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal },
2111  { "private:trashcan:", "staroffice.trashcan:",
2112  INetProtocol::PrivSoffice, PrefixInfo::Kind::Internal },
2113  { "sftp:", nullptr, INetProtocol::Sftp, PrefixInfo::Kind::Official },
2114  { "slot:", "staroffice.slot:", INetProtocol::Slot,
2115  PrefixInfo::Kind::Internal },
2116  { "smb:", nullptr, INetProtocol::Smb, PrefixInfo::Kind::Official },
2117  { "staroffice.component:", ".component:", INetProtocol::Component,
2118  PrefixInfo::Kind::External },
2119  { "staroffice.db:", "db:", INetProtocol::Db, PrefixInfo::Kind::External },
2120  { "staroffice.factory:", "private:factory/",
2121  INetProtocol::PrivSoffice, PrefixInfo::Kind::External },
2122  { "staroffice.helpid:", "private:helpid/", INetProtocol::PrivSoffice,
2123  PrefixInfo::Kind::External },
2124  { "staroffice.hid:", "hid:", INetProtocol::Hid,
2125  PrefixInfo::Kind::External },
2126  { "staroffice.java:", "private:java/", INetProtocol::PrivSoffice,
2127  PrefixInfo::Kind::External },
2128  { "staroffice.macro:", "macro:", INetProtocol::Macro,
2129  PrefixInfo::Kind::External },
2130  { "staroffice.private:", "private:", INetProtocol::PrivSoffice,
2131  PrefixInfo::Kind::External },
2132  { "staroffice.searchfolder:", "private:searchfolder:",
2133  INetProtocol::PrivSoffice, PrefixInfo::Kind::External },
2134  { "staroffice.slot:", "slot:", INetProtocol::Slot,
2135  PrefixInfo::Kind::External },
2136  { "staroffice.trashcan:", "private:trashcan:",
2137  INetProtocol::PrivSoffice, PrefixInfo::Kind::External },
2138  { "staroffice.uno:", ".uno:", INetProtocol::Uno,
2139  PrefixInfo::Kind::External },
2140  { "staroffice:", "private:", INetProtocol::PrivSoffice,
2141  PrefixInfo::Kind::External },
2142  { "telnet:", nullptr, INetProtocol::Telnet, PrefixInfo::Kind::Official },
2143  { "vnd.libreoffice.cmis:", nullptr, INetProtocol::Cmis, PrefixInfo::Kind::Internal },
2144  { "vnd.sun.star.cmd:", nullptr, INetProtocol::VndSunStarCmd,
2145  PrefixInfo::Kind::Official },
2146  { "vnd.sun.star.expand:", nullptr, INetProtocol::VndSunStarExpand,
2147  PrefixInfo::Kind::Official },
2148  { "vnd.sun.star.help:", nullptr, INetProtocol::VndSunStarHelp,
2149  PrefixInfo::Kind::Official },
2150  { "vnd.sun.star.hier:", nullptr, INetProtocol::VndSunStarHier,
2151  PrefixInfo::Kind::Official },
2152  { "vnd.sun.star.pkg:", nullptr, INetProtocol::VndSunStarPkg,
2153  PrefixInfo::Kind::Official },
2154  { "vnd.sun.star.tdoc:", nullptr, INetProtocol::VndSunStarTdoc,
2155  PrefixInfo::Kind::Official },
2156  { "vnd.sun.star.webdav:", nullptr, INetProtocol::VndSunStarWebdav,
2157  PrefixInfo::Kind::Official }
2158  };
2159 /* This list needs to be sorted, or you'll introduce serious bugs */
2160 
2161  PrefixInfo const * pFirst = aMap + 1;
2162  PrefixInfo const * pLast = aMap + sizeof aMap / sizeof (PrefixInfo) - 1;
2163  PrefixInfo const * pMatch = nullptr;
2164  sal_Unicode const * pMatched = rBegin;
2165  sal_Unicode const * p = rBegin;
2166  sal_Int32 i = 0;
2167  for (; pFirst < pLast; ++i)
2168  {
2169  if (pFirst->m_pPrefix[i] == '\0')
2170  {
2171  pMatch = pFirst++;
2172  pMatched = p;
2173  }
2174  if (p >= pEnd)
2175  break;
2176  sal_uInt32 nChar = rtl::toAsciiLowerCase(*p++);
2177  while (pFirst <= pLast && static_cast<unsigned char>(pFirst->m_pPrefix[i]) < nChar)
2178  ++pFirst;
2179  while (pFirst <= pLast && static_cast<unsigned char>(pLast->m_pPrefix[i]) > nChar)
2180  --pLast;
2181  }
2182  if (pFirst == pLast)
2183  {
2184  char const * q = pFirst->m_pPrefix + i;
2185  while (p < pEnd && *q != '\0'
2186  && rtl::toAsciiLowerCase(*p) == static_cast<unsigned char>(*q))
2187  {
2188  ++p;
2189  ++q;
2190  }
2191  if (*q == '\0')
2192  {
2193  rBegin = p;
2194  return pFirst;
2195  }
2196  }
2197  rBegin = pMatched;
2198  return pMatch;
2199 }
2200 
2202 {
2203  DBG_ASSERT(getSchemeInfo().m_bAuthority,
2204  "INetURLObject::getAuthority(): Bad scheme");
2205  sal_Int32 nBegin;
2206  if (m_aUser.isPresent())
2207  nBegin = m_aUser.getBegin();
2208  else if (m_aHost.isPresent())
2209  nBegin = m_aHost.getBegin();
2210  else
2211  nBegin = m_aPath.getBegin();
2212  nBegin -= RTL_CONSTASCII_LENGTH("//");
2213  DBG_ASSERT(m_aAbsURIRef[nBegin] == '/' && m_aAbsURIRef[nBegin + 1] == '/',
2214  "INetURLObject::getAuthority(): Bad authority");
2215  return nBegin;
2216 }
2217 
2219 {
2220  sal_Int32 nBegin = getAuthorityBegin();
2221  sal_Int32 nEnd = m_aPort.isPresent() ? m_aPort.getEnd() :
2222  m_aHost.isPresent() ? m_aHost.getEnd() :
2223  m_aAuth.isPresent() ? m_aAuth.getEnd() :
2224  m_aUser.isPresent() ? m_aUser.getEnd() :
2225  nBegin + RTL_CONSTASCII_LENGTH("//");
2226  return SubString(nBegin, nEnd - nBegin);
2227 }
2228 
2229 bool INetURLObject::setUser(OUString const & rTheUser,
2230  rtl_TextEncoding eCharset)
2231 {
2232  if (
2233  !getSchemeInfo().m_bUser
2234  )
2235  {
2236  return false;
2237  }
2238 
2239  OUString aNewUser(encodeText(rTheUser, PART_USER_PASSWORD,
2240  EncodeMechanism::WasEncoded, eCharset, false));
2241  sal_Int32 nDelta;
2242  if (m_aUser.isPresent())
2243  nDelta = m_aUser.set(m_aAbsURIRef, aNewUser);
2244  else if (m_aHost.isPresent())
2245  {
2246  m_aAbsURIRef.insert(m_aHost.getBegin(), u'@');
2247  nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aHost.getBegin()) + 1;
2248  }
2249  else if (getSchemeInfo().m_bHost)
2250  return false;
2251  else
2252  nDelta = m_aUser.set(m_aAbsURIRef, aNewUser, m_aPath.getBegin());
2253  m_aAuth += nDelta;
2254  m_aHost += nDelta;
2255  m_aPort += nDelta;
2256  m_aPath += nDelta;
2257  m_aQuery += nDelta;
2258  m_aFragment += nDelta;
2259  return true;
2260 }
2261 
2262 namespace
2263 {
2264  void lcl_Erase(OUStringBuffer &rBuf, sal_Int32 index, sal_Int32 count)
2265  {
2266  OUString sTemp(rBuf.makeStringAndClear());
2267  rBuf.append(sTemp.replaceAt(index, count, OUString()));
2268  }
2269 }
2270 
2272 {
2273  if (!getSchemeInfo().m_bPassword)
2274  return false;
2275  if (m_aAuth.isPresent())
2276  {
2277  lcl_Erase(m_aAbsURIRef, m_aAuth.getBegin() - 1,
2278  m_aAuth.getLength() + 1);
2279  sal_Int32 nDelta = m_aAuth.clear() - 1;
2280  m_aHost += nDelta;
2281  m_aPort += nDelta;
2282  m_aPath += nDelta;
2283  m_aQuery += nDelta;
2284  m_aFragment += nDelta;
2285  }
2286  return true;
2287 }
2288 
2289 bool INetURLObject::setPassword(OUString const & rThePassword,
2290  rtl_TextEncoding eCharset)
2291 {
2292  if (!getSchemeInfo().m_bPassword)
2293  return false;
2294  OUString aNewAuth(encodeText(rThePassword, PART_USER_PASSWORD,
2295  EncodeMechanism::WasEncoded, eCharset, false));
2296  sal_Int32 nDelta;
2297  if (m_aAuth.isPresent())
2298  nDelta = m_aAuth.set(m_aAbsURIRef, aNewAuth);
2299  else if (m_aUser.isPresent())
2300  {
2301  m_aAbsURIRef.insert(m_aUser.getEnd(), u':');
2302  nDelta
2303  = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aUser.getEnd() + 1) + 1;
2304  }
2305  else if (m_aHost.isPresent())
2306  {
2307  m_aAbsURIRef.insert(m_aHost.getBegin(), ":@" );
2308  m_aUser.set(m_aAbsURIRef, OUString(), m_aHost.getBegin());
2309  nDelta
2310  = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aHost.getBegin() + 1) + 2;
2311  }
2312  else if (getSchemeInfo().m_bHost)
2313  return false;
2314  else
2315  {
2316  m_aAbsURIRef.insert(m_aPath.getBegin(), u':');
2317  m_aUser.set(m_aAbsURIRef, OUString(), m_aPath.getBegin());
2318  nDelta
2319  = m_aAuth.set(m_aAbsURIRef, aNewAuth, m_aPath.getBegin() + 1) + 1;
2320  }
2321  m_aHost += nDelta;
2322  m_aPort += nDelta;
2323  m_aPath += nDelta;
2324  m_aQuery += nDelta;
2325  m_aFragment += nDelta;
2326  return true;
2327 }
2328 
2329 // static
2330 bool INetURLObject::parseHost(sal_Unicode const *& rBegin, sal_Unicode const * pEnd,
2331  OUString & rCanonic)
2332 {
2333  // RFC 2373 is inconsistent about how to write an IPv6 address in which an
2334  // IPv4 address directly follows the abbreviating "::". The ABNF in
2335  // Appendix B suggests ":::13.1.68.3", while an example in 2.2/3 explicitly
2336  // mentions "::13:1.68.3". This algorithm accepts both variants:
2337  enum State { STATE_INITIAL, STATE_LABEL, STATE_LABEL_HYPHEN,
2338  STATE_LABEL_DOT, STATE_TOPLABEL, STATE_TOPLABEL_HYPHEN,
2339  STATE_TOPLABEL_DOT, STATE_IP4, STATE_IP4_DOT, STATE_IP6,
2340  STATE_IP6_COLON, STATE_IP6_2COLON, STATE_IP6_3COLON,
2341  STATE_IP6_HEXSEQ1, STATE_IP6_HEXSEQ1_COLON,
2342  STATE_IP6_HEXSEQ1_MAYBE_IP4, STATE_IP6_HEXSEQ2,
2343  STATE_IP6_HEXSEQ2_COLON, STATE_IP6_HEXSEQ2_MAYBE_IP4,
2344  STATE_IP6_IP4, STATE_IP6_IP4_DOT, STATE_IP6_DONE };
2345  OUStringBuffer aTheCanonic(32);
2346  sal_uInt32 nNumber = 0;
2347  int nDigits = 0;
2348  int nOctets = 0;
2349  State eState = STATE_INITIAL;
2350  sal_Unicode const * p = rBegin;
2351  for (; p != pEnd; ++p)
2352  switch (eState)
2353  {
2354  case STATE_INITIAL:
2355  if (*p == '[')
2356  {
2357  aTheCanonic.append('[');
2358  eState = STATE_IP6;
2359  }
2360  else if (rtl::isAsciiAlpha(*p) || *p == '_')
2361  eState = STATE_TOPLABEL;
2362  else if (rtl::isAsciiDigit(*p))
2363  {
2364  nNumber = INetMIME::getWeight(*p);
2365  nDigits = 1;
2366  nOctets = 1;
2367  eState = STATE_IP4;
2368  }
2369  else
2370  goto done;
2371  break;
2372 
2373  case STATE_LABEL:
2374  if (*p == '.')
2375  eState = STATE_LABEL_DOT;
2376  else if (*p == '-')
2377  eState = STATE_LABEL_HYPHEN;
2378  else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2379  goto done;
2380  break;
2381 
2382  case STATE_LABEL_HYPHEN:
2383  if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2384  eState = STATE_LABEL;
2385  else if (*p != '-')
2386  goto done;
2387  break;
2388 
2389  case STATE_LABEL_DOT:
2390  if (rtl::isAsciiAlpha(*p) || *p == '_')
2391  eState = STATE_TOPLABEL;
2392  else if (rtl::isAsciiDigit(*p))
2393  eState = STATE_LABEL;
2394  else
2395  goto done;
2396  break;
2397 
2398  case STATE_TOPLABEL:
2399  if (*p == '.')
2400  eState = STATE_TOPLABEL_DOT;
2401  else if (*p == '-')
2402  eState = STATE_TOPLABEL_HYPHEN;
2403  else if (!rtl::isAsciiAlphanumeric(*p) && *p != '_')
2404  goto done;
2405  break;
2406 
2407  case STATE_TOPLABEL_HYPHEN:
2408  if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
2409  eState = STATE_TOPLABEL;
2410  else if (*p != '-')
2411  goto done;
2412  break;
2413 
2414  case STATE_TOPLABEL_DOT:
2415  if (rtl::isAsciiAlpha(*p) || *p == '_')
2416  eState = STATE_TOPLABEL;
2417  else if (rtl::isAsciiDigit(*p))
2418  eState = STATE_LABEL;
2419  else
2420  goto done;
2421  break;
2422 
2423  case STATE_IP4:
2424  if (*p == '.')
2425  if (nOctets < 4)
2426  {
2427  aTheCanonic.append( OUString::number(nNumber) );
2428  aTheCanonic.append( '.' );
2429  ++nOctets;
2430  eState = STATE_IP4_DOT;
2431  }
2432  else
2433  eState = STATE_LABEL_DOT;
2434  else if (*p == '-')
2435  eState = STATE_LABEL_HYPHEN;
2436  else if (rtl::isAsciiAlpha(*p) || *p == '_')
2437  eState = STATE_LABEL;
2438  else if (rtl::isAsciiDigit(*p))
2439  if (nDigits < 3)
2440  {
2441  nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2442  ++nDigits;
2443  }
2444  else
2445  eState = STATE_LABEL;
2446  else
2447  goto done;
2448  break;
2449 
2450  case STATE_IP4_DOT:
2451  if (rtl::isAsciiAlpha(*p) || *p == '_')
2452  eState = STATE_TOPLABEL;
2453  else if (rtl::isAsciiDigit(*p))
2454  {
2455  nNumber = INetMIME::getWeight(*p);
2456  nDigits = 1;
2457  eState = STATE_IP4;
2458  }
2459  else
2460  goto done;
2461  break;
2462 
2463  case STATE_IP6:
2464  if (*p == ':')
2465  eState = STATE_IP6_COLON;
2466  else if (rtl::isAsciiHexDigit(*p))
2467  {
2468  nNumber = INetMIME::getHexWeight(*p);
2469  nDigits = 1;
2470  eState = STATE_IP6_HEXSEQ1;
2471  }
2472  else
2473  goto done;
2474  break;
2475 
2476  case STATE_IP6_COLON:
2477  if (*p == ':')
2478  {
2479  aTheCanonic.append("::");
2480  eState = STATE_IP6_2COLON;
2481  }
2482  else
2483  goto done;
2484  break;
2485 
2486  case STATE_IP6_2COLON:
2487  if (*p == ']')
2488  eState = STATE_IP6_DONE;
2489  else if (*p == ':')
2490  {
2491  aTheCanonic.append(':');
2492  eState = STATE_IP6_3COLON;
2493  }
2494  else if (rtl::isAsciiDigit(*p))
2495  {
2496  nNumber = INetMIME::getWeight(*p);
2497  nDigits = 1;
2498  eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2499  }
2500  else if (rtl::isAsciiHexDigit(*p))
2501  {
2502  nNumber = INetMIME::getHexWeight(*p);
2503  nDigits = 1;
2504  eState = STATE_IP6_HEXSEQ2;
2505  }
2506  else
2507  goto done;
2508  break;
2509 
2510  case STATE_IP6_3COLON:
2511  if (rtl::isAsciiDigit(*p))
2512  {
2513  nNumber = INetMIME::getWeight(*p);
2514  nDigits = 1;
2515  nOctets = 1;
2516  eState = STATE_IP6_IP4;
2517  }
2518  else
2519  goto done;
2520  break;
2521 
2522  case STATE_IP6_HEXSEQ1:
2523  if (*p == ']')
2524  {
2525  aTheCanonic.append(
2526  OUString::number(nNumber, 16));
2527  eState = STATE_IP6_DONE;
2528  }
2529  else if (*p == ':')
2530  {
2531  aTheCanonic.append(
2532  OUString::number(nNumber, 16));
2533  aTheCanonic.append(':');
2534  eState = STATE_IP6_HEXSEQ1_COLON;
2535  }
2536  else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2537  {
2538  nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2539  ++nDigits;
2540  }
2541  else
2542  goto done;
2543  break;
2544 
2545  case STATE_IP6_HEXSEQ1_COLON:
2546  if (*p == ':')
2547  {
2548  aTheCanonic.append(':');
2549  eState = STATE_IP6_2COLON;
2550  }
2551  else if (rtl::isAsciiDigit(*p))
2552  {
2553  nNumber = INetMIME::getWeight(*p);
2554  nDigits = 1;
2555  eState = STATE_IP6_HEXSEQ1_MAYBE_IP4;
2556  }
2557  else if (rtl::isAsciiHexDigit(*p))
2558  {
2559  nNumber = INetMIME::getHexWeight(*p);
2560  nDigits = 1;
2561  eState = STATE_IP6_HEXSEQ1;
2562  }
2563  else
2564  goto done;
2565  break;
2566 
2567  case STATE_IP6_HEXSEQ1_MAYBE_IP4:
2568  if (*p == ']')
2569  {
2570  aTheCanonic.append(
2571  OUString::number(nNumber, 16));
2572  eState = STATE_IP6_DONE;
2573  }
2574  else if (*p == ':')
2575  {
2576  aTheCanonic.append(
2577  OUString::number(nNumber, 16));
2578  aTheCanonic.append(':');
2579  eState = STATE_IP6_HEXSEQ1_COLON;
2580  }
2581  else if (*p == '.')
2582  {
2583  nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2584  + (nNumber & 15);
2585  aTheCanonic.append(
2586  OUString::number(nNumber));
2587  aTheCanonic.append('.');
2588  nOctets = 2;
2589  eState = STATE_IP6_IP4_DOT;
2590  }
2591  else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2592  {
2593  nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2594  ++nDigits;
2595  }
2596  else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2597  {
2598  nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2599  ++nDigits;
2600  eState = STATE_IP6_HEXSEQ1;
2601  }
2602  else
2603  goto done;
2604  break;
2605 
2606  case STATE_IP6_HEXSEQ2:
2607  if (*p == ']')
2608  {
2609  aTheCanonic.append(
2610  OUString::number(nNumber, 16));
2611  eState = STATE_IP6_DONE;
2612  }
2613  else if (*p == ':')
2614  {
2615  aTheCanonic.append(
2616  OUString::number(nNumber, 16));
2617  aTheCanonic.append(':');
2618  eState = STATE_IP6_HEXSEQ2_COLON;
2619  }
2620  else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2621  {
2622  nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2623  ++nDigits;
2624  }
2625  else
2626  goto done;
2627  break;
2628 
2629  case STATE_IP6_HEXSEQ2_COLON:
2630  if (rtl::isAsciiDigit(*p))
2631  {
2632  nNumber = INetMIME::getWeight(*p);
2633  nDigits = 1;
2634  eState = STATE_IP6_HEXSEQ2_MAYBE_IP4;
2635  }
2636  else if (rtl::isAsciiHexDigit(*p))
2637  {
2638  nNumber = INetMIME::getHexWeight(*p);
2639  nDigits = 1;
2640  eState = STATE_IP6_HEXSEQ2;
2641  }
2642  else
2643  goto done;
2644  break;
2645 
2646  case STATE_IP6_HEXSEQ2_MAYBE_IP4:
2647  if (*p == ']')
2648  {
2649  aTheCanonic.append(
2650  OUString::number(nNumber, 16));
2651  eState = STATE_IP6_DONE;
2652  }
2653  else if (*p == ':')
2654  {
2655  aTheCanonic.append(
2656  OUString::number(nNumber, 16));
2657  aTheCanonic.append(':');
2658  eState = STATE_IP6_HEXSEQ2_COLON;
2659  }
2660  else if (*p == '.')
2661  {
2662  nNumber = 100 * (nNumber >> 8) + 10 * (nNumber >> 4 & 15)
2663  + (nNumber & 15);
2664  aTheCanonic.append(
2665  OUString::number(nNumber));
2666  aTheCanonic.append('.');
2667  nOctets = 2;
2668  eState = STATE_IP6_IP4_DOT;
2669  }
2670  else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2671  {
2672  nNumber = 16 * nNumber + INetMIME::getWeight(*p);
2673  ++nDigits;
2674  }
2675  else if (rtl::isAsciiHexDigit(*p) && nDigits < 4)
2676  {
2677  nNumber = 16 * nNumber + INetMIME::getHexWeight(*p);
2678  ++nDigits;
2679  eState = STATE_IP6_HEXSEQ2;
2680  }
2681  else
2682  goto done;
2683  break;
2684 
2685  case STATE_IP6_IP4:
2686  if (*p == ']')
2687  if (nOctets == 4)
2688  {
2689  aTheCanonic.append(
2690  OUString::number(nNumber));
2691  eState = STATE_IP6_DONE;
2692  }
2693  else
2694  goto done;
2695  else if (*p == '.')
2696  if (nOctets < 4)
2697  {
2698  aTheCanonic.append(
2699  OUString::number(nNumber));
2700  aTheCanonic.append('.');
2701  ++nOctets;
2702  eState = STATE_IP6_IP4_DOT;
2703  }
2704  else
2705  goto done;
2706  else if (rtl::isAsciiDigit(*p) && nDigits < 3)
2707  {
2708  nNumber = 10 * nNumber + INetMIME::getWeight(*p);
2709  ++nDigits;
2710  }
2711  else
2712  goto done;
2713  break;
2714 
2715  case STATE_IP6_IP4_DOT:
2716  if (rtl::isAsciiDigit(*p))
2717  {
2718  nNumber = INetMIME::getWeight(*p);
2719  nDigits = 1;
2720  eState = STATE_IP6_IP4;
2721  }
2722  else
2723  goto done;
2724  break;
2725 
2726  case STATE_IP6_DONE:
2727  goto done;
2728  }
2729  done:
2730  switch (eState)
2731  {
2732  case STATE_LABEL:
2733  case STATE_TOPLABEL:
2734  case STATE_TOPLABEL_DOT:
2735  aTheCanonic.setLength(0);
2736  aTheCanonic.append(rBegin, p - rBegin);
2737  rBegin = p;
2738  rCanonic = aTheCanonic.makeStringAndClear();
2739  return true;
2740 
2741  case STATE_IP4:
2742  if (nOctets == 4)
2743  {
2744  aTheCanonic.append(
2745  OUString::number(nNumber));
2746  rBegin = p;
2747  rCanonic = aTheCanonic.makeStringAndClear();
2748  return true;
2749  }
2750  return false;
2751 
2752  case STATE_IP6_DONE:
2753  aTheCanonic.append(']');
2754  rBegin = p;
2755  rCanonic = aTheCanonic.makeStringAndClear();
2756  return true;
2757 
2758  default:
2759  return false;
2760  }
2761 }
2762 
2763 // static
2765  sal_Unicode const * pBegin, sal_Unicode const * pEnd,
2766  EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName,
2767  OUStringBuffer* pCanonic)
2768 {
2769  OUString aTheCanonic;
2770  if (pBegin < pEnd)
2771  {
2772  sal_Unicode const * p = pBegin;
2773  if (!parseHost(p, pEnd, aTheCanonic) || p != pEnd)
2774  {
2775  if (bNetBiosName)
2776  {
2777  OUStringBuffer buf;
2778  while (pBegin < pEnd)
2779  {
2780  EscapeType eEscapeType;
2781  sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
2782  eMechanism, eCharset,
2783  eEscapeType);
2784  if (!INetMIME::isVisible(nUTF32))
2785  return false;
2786  if (!rtl::isAsciiAlphanumeric(nUTF32))
2787  switch (nUTF32)
2788  {
2789  case '"':
2790  case '*':
2791  case '+':
2792  case ',':
2793  case '/':
2794  case ':':
2795  case ';':
2796  case '<':
2797  case '=':
2798  case '>':
2799  case '?':
2800  case '[':
2801  case '\\':
2802  case ']':
2803  case '`':
2804  case '|':
2805  return false;
2806  }
2807  if (pCanonic != nullptr) {
2808  appendUCS4(
2809  buf, nUTF32, eEscapeType, PART_URIC,
2810  eCharset, true);
2811  }
2812  }
2813  aTheCanonic = buf.makeStringAndClear();
2814  }
2815  else
2816  return false;
2817  }
2818  }
2819  if (pCanonic != nullptr) {
2820  *pCanonic = aTheCanonic;
2821  }
2822  return true;
2823 }
2824 
2825 bool INetURLObject::setHost(OUString const & rTheHost,
2826  rtl_TextEncoding eCharset)
2827 {
2828  if (!getSchemeInfo().m_bHost)
2829  return false;
2830  OUStringBuffer aSynHost(rTheHost);
2831  bool bNetBiosName = false;
2832  switch (m_eScheme)
2833  {
2834  case INetProtocol::File:
2835  {
2836  OUString sTemp(aSynHost.toString());
2837  if (sTemp.equalsIgnoreAsciiCase("localhost"))
2838  {
2839  aSynHost.setLength(0);
2840  }
2841  bNetBiosName = true;
2842  }
2843  break;
2844  case INetProtocol::Ldap:
2845  if (aSynHost.isEmpty() && m_aPort.isPresent())
2846  return false;
2847  break;
2848 
2849  default:
2850  if (aSynHost.isEmpty())
2851  return false;
2852  break;
2853  }
2854  if (!parseHostOrNetBiosName(
2855  aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
2856  EncodeMechanism::WasEncoded, eCharset, bNetBiosName, &aSynHost))
2857  return false;
2858  sal_Int32 nDelta = m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear());
2859  m_aPort += nDelta;
2860  m_aPath += nDelta;
2861  m_aQuery += nDelta;
2862  m_aFragment += nDelta;
2863  return true;
2864 }
2865 
2866 // static
2868  sal_Unicode const ** pBegin,
2869  sal_Unicode const * pEnd,
2870  EncodeMechanism eMechanism,
2871  rtl_TextEncoding eCharset,
2872  bool bSkippedInitialSlash,
2873  sal_uInt32 nSegmentDelimiter,
2874  sal_uInt32 nAltSegmentDelimiter,
2875  sal_uInt32 nQueryDelimiter,
2876  sal_uInt32 nFragmentDelimiter,
2877  OUStringBuffer &rSynPath)
2878 {
2879  DBG_ASSERT(pBegin, "INetURLObject::parsePath(): Null output param");
2880 
2881  sal_Unicode const * pPos = *pBegin;
2882  OUStringBuffer aTheSynPath(256);
2883 
2884  switch (eScheme)
2885  {
2887  return false;
2888 
2889  case INetProtocol::Ftp:
2890  if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2891  return false;
2892  while (pPos < pEnd && *pPos != nFragmentDelimiter)
2893  {
2894  EscapeType eEscapeType;
2895  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2896  eCharset, eEscapeType);
2897  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2898  PART_HTTP_PATH, eCharset, true);
2899  }
2900  if (aTheSynPath.isEmpty())
2901  aTheSynPath.append('/');
2902  break;
2903 
2904  case INetProtocol::Http:
2906  case INetProtocol::Https:
2907  case INetProtocol::Smb:
2908  case INetProtocol::Cmis:
2909  if (pPos < pEnd && *pPos != '/' && *pPos != nFragmentDelimiter)
2910  return false;
2911  while (pPos < pEnd && *pPos != nQueryDelimiter
2912  && *pPos != nFragmentDelimiter)
2913  {
2914  EscapeType eEscapeType;
2915  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2916  eCharset, eEscapeType);
2917  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2918  PART_HTTP_PATH, eCharset, true);
2919  }
2920  if (aTheSynPath.isEmpty())
2921  aTheSynPath.append('/');
2922  break;
2923 
2924  case INetProtocol::File:
2925  {
2926  if (bSkippedInitialSlash)
2927  aTheSynPath.append('/');
2928  else if (pPos < pEnd
2929  && *pPos != nSegmentDelimiter
2930  && *pPos != nAltSegmentDelimiter)
2931  return false;
2932  while (pPos < pEnd && *pPos != nFragmentDelimiter)
2933  {
2934  EscapeType eEscapeType;
2935  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2936  eCharset, eEscapeType);
2937  if (eEscapeType == EscapeType::NONE)
2938  {
2939  if (nUTF32 == nSegmentDelimiter
2940  || nUTF32 == nAltSegmentDelimiter)
2941  {
2942  aTheSynPath.append('/');
2943  continue;
2944  }
2945  else if (nUTF32 == '|'
2946  && (pPos == pEnd
2947  || *pPos == nFragmentDelimiter
2948  || *pPos == nSegmentDelimiter
2949  || *pPos == nAltSegmentDelimiter)
2950  && aTheSynPath.getLength() == 2
2951  && rtl::isAsciiAlpha(aTheSynPath[1]))
2952  {
2953  // A first segment of <ALPHA "|"> is translated to
2954  // <ALPHA ":">:
2955  aTheSynPath.append(':');
2956  continue;
2957  }
2958  }
2959  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2960  PART_PCHAR, eCharset, true);
2961  }
2962  if (aTheSynPath.isEmpty())
2963  aTheSynPath.append('/');
2964  break;
2965  }
2966 
2967  case INetProtocol::Mailto:
2968  while (pPos < pEnd && *pPos != nQueryDelimiter
2969  && *pPos != nFragmentDelimiter)
2970  {
2971  EscapeType eEscapeType;
2972  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2973  eCharset, eEscapeType);
2974  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2975  PART_MAILTO, eCharset, true);
2976  }
2977  break;
2978 
2979 
2981  case INetProtocol::Slot:
2982  case INetProtocol::Hid:
2983  case INetProtocol::Macro:
2984  case INetProtocol::Uno:
2986  case INetProtocol::Ldap:
2987  while (pPos < pEnd && *pPos != nQueryDelimiter
2988  && *pPos != nFragmentDelimiter)
2989  {
2990  EscapeType eEscapeType;
2991  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
2992  eCharset, eEscapeType);
2993  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
2994  PART_PATH_BEFORE_QUERY, eCharset, true);
2995  }
2996  break;
2997 
2999  if (pPos == pEnd
3000  || *pPos == nQueryDelimiter
3001  || *pPos == nFragmentDelimiter)
3002  aTheSynPath.append('/');
3003  else
3004  {
3005  if (*pPos != '/')
3006  return false;
3007  while (pPos < pEnd && *pPos != nQueryDelimiter
3008  && *pPos != nFragmentDelimiter)
3009  {
3010  EscapeType eEscapeType;
3011  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd,
3012  eMechanism,
3013  eCharset, eEscapeType);
3014  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3015  PART_HTTP_PATH, eCharset, true);
3016  }
3017  }
3018  break;
3019 
3021  case INetProtocol::Data:
3022  case INetProtocol::Cid:
3023  case INetProtocol::Db:
3024  while (pPos < pEnd && *pPos != nFragmentDelimiter)
3025  {
3026  EscapeType eEscapeType;
3027  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3028  eCharset, eEscapeType);
3029  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3030  PART_URIC, eCharset, true);
3031  }
3032  break;
3033 
3036  if (pPos < pEnd && *pPos != '/'
3037  && *pPos != nQueryDelimiter && *pPos != nFragmentDelimiter)
3038  return false;
3039  while (pPos < pEnd && *pPos != nQueryDelimiter
3040  && *pPos != nFragmentDelimiter)
3041  {
3042  EscapeType eEscapeType;
3043  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3044  eCharset, eEscapeType);
3045  if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3046  aTheSynPath.append('/');
3047  else
3048  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3049  PART_PCHAR, eCharset, false);
3050  }
3051  if (aTheSynPath.isEmpty())
3052  aTheSynPath.append('/');
3053  break;
3054 
3057  {
3058  if (pPos == pEnd || *pPos == nFragmentDelimiter)
3059  return false;
3060  Part ePart = PART_URIC_NO_SLASH;
3061  while (pPos != pEnd && *pPos != nFragmentDelimiter)
3062  {
3063  EscapeType eEscapeType;
3064  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3065  eCharset, eEscapeType);
3066  appendUCS4(aTheSynPath, nUTF32, eEscapeType, ePart,
3067  eCharset, true);
3068  ePart = PART_URIC;
3069  }
3070  break;
3071  }
3072 
3073  case INetProtocol::Telnet:
3074  if (pPos < pEnd)
3075  {
3076  if (*pPos != '/' || pEnd - pPos > 1)
3077  return false;
3078  ++pPos;
3079  }
3080  aTheSynPath.append('/');
3081  break;
3082 
3084  if (pPos == pEnd || *pPos != '/')
3085  return false;
3086  while (pPos < pEnd && *pPos != nFragmentDelimiter)
3087  {
3088  EscapeType eEscapeType;
3089  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3090  eCharset, eEscapeType);
3091  if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
3092  aTheSynPath.append('/');
3093  else
3094  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3095  PART_PCHAR, eCharset, false);
3096  }
3097  break;
3098 
3099  case INetProtocol::Generic:
3100  case INetProtocol::Sftp:
3101  while (pPos < pEnd && *pPos != nFragmentDelimiter)
3102  {
3103  EscapeType eEscapeType;
3104  sal_uInt32 nUTF32 = getUTF32(pPos, pEnd, eMechanism,
3105  eCharset, eEscapeType);
3106  appendUCS4(aTheSynPath, nUTF32, eEscapeType,
3107  PART_URIC, eCharset, true);
3108  }
3109  if (aTheSynPath.isEmpty())
3110  return false;
3111  break;
3112  default:
3113  OSL_ASSERT(false);
3114  break;
3115  }
3116 
3117  *pBegin = pPos;
3118  rSynPath = aTheSynPath;
3119  return true;
3120 }
3121 
3122 bool INetURLObject::setPath(OUString const & rThePath,
3123  EncodeMechanism eMechanism,
3124  rtl_TextEncoding eCharset)
3125 {
3126  OUStringBuffer aSynPath;
3127  sal_Unicode const * p = rThePath.getStr();
3128  sal_Unicode const * pEnd = p + rThePath.getLength();
3129  if (!parsePath(m_eScheme, &p, pEnd, eMechanism, eCharset, false,
3130  '/', 0x80000000, 0x80000000, 0x80000000, aSynPath)
3131  || p != pEnd)
3132  return false;
3133  sal_Int32 nDelta = m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear());
3134  m_aQuery += nDelta;
3135  m_aFragment += nDelta;
3136  return true;
3137 }
3138 
3140  if (m_eScheme == INetProtocol::VndSunStarExpand) {
3141  OSL_FAIL(
3142  "INetURLObject::checkHierarchical vnd.sun.star.expand");
3143  return true;
3144  } else {
3145  return getSchemeInfo().m_bHierarchical;
3146  }
3147 }
3148 
3149 bool INetURLObject::Append(OUString const & rTheSegment,
3150  EncodeMechanism eMechanism,
3151  rtl_TextEncoding eCharset)
3152 {
3153  return insertName(rTheSegment, false, LAST_SEGMENT, eMechanism, eCharset);
3154 }
3155 
3157  bool bIgnoreFinalSlash)
3158  const
3159 {
3160  DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3161  "INetURLObject::getSegment(): Bad index");
3162 
3163  if (!checkHierarchical())
3164  return SubString();
3165 
3166  sal_Unicode const * pPathBegin
3167  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3168  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3169  sal_Unicode const * pSegBegin;
3170  sal_Unicode const * pSegEnd;
3171  if (nIndex == LAST_SEGMENT)
3172  {
3173  pSegEnd = pPathEnd;
3174  if (bIgnoreFinalSlash && pSegEnd > pPathBegin && pSegEnd[-1] == '/')
3175  --pSegEnd;
3176  if (pSegEnd <= pPathBegin)
3177  return SubString();
3178  pSegBegin = pSegEnd - 1;
3179  while (pSegBegin > pPathBegin && *pSegBegin != '/')
3180  --pSegBegin;
3181  }
3182  else
3183  {
3184  pSegBegin = pPathBegin;
3185  while (nIndex-- > 0)
3186  do
3187  {
3188  ++pSegBegin;
3189  if (pSegBegin >= pPathEnd)
3190  return SubString();
3191  }
3192  while (*pSegBegin != '/');
3193  pSegEnd = pSegBegin + 1;
3194  while (pSegEnd < pPathEnd && *pSegEnd != '/')
3195  ++pSegEnd;
3196  }
3197 
3198  return SubString(pSegBegin - m_aAbsURIRef.getStr(),
3199  pSegEnd - pSegBegin);
3200 }
3201 
3202 bool INetURLObject::insertName(OUString const & rTheName,
3203  bool bAppendFinalSlash, sal_Int32 nIndex,
3204  EncodeMechanism eMechanism,
3205  rtl_TextEncoding eCharset)
3206 {
3207  DBG_ASSERT(nIndex >= 0 || nIndex == LAST_SEGMENT,
3208  "INetURLObject::insertName(): Bad index");
3209 
3210  if (!checkHierarchical())
3211  return false;
3212 
3213  sal_Unicode const * pPathBegin
3214  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3215  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
3216  sal_Unicode const * pPrefixEnd;
3217  bool bInsertSlash;
3218  sal_Unicode const * pSuffixBegin;
3219  if (nIndex == LAST_SEGMENT)
3220  {
3221  pPrefixEnd = pPathEnd;
3222  if (pPrefixEnd > pPathBegin &&
3223  pPrefixEnd[-1] == '/')
3224  {
3225  --pPrefixEnd;
3226  }
3227  bInsertSlash = bAppendFinalSlash;
3228  pSuffixBegin = pPathEnd;
3229  }
3230  else if (nIndex == 0)
3231  {
3232  pPrefixEnd = pPathBegin;
3233  bInsertSlash =
3234  (pPathBegin < pPathEnd && *pPathBegin != '/') ||
3235  (pPathBegin == pPathEnd && bAppendFinalSlash);
3236  pSuffixBegin =
3237  (pPathEnd - pPathBegin == 1 && *pPathBegin == '/' &&
3238  !bAppendFinalSlash)
3239  ? pPathEnd : pPathBegin;
3240  }
3241  else
3242  {
3243  pPrefixEnd = pPathBegin;
3244  sal_Unicode const * pEnd = pPathEnd;
3245  if (pEnd > pPathBegin && pEnd[-1] == '/')
3246  --pEnd;
3247  bool bSkip = pPrefixEnd < pEnd && *pPrefixEnd == '/';
3248  bInsertSlash = false;
3249  pSuffixBegin = pPathEnd;
3250  while (nIndex-- > 0)
3251  for (;;)
3252  {
3253  if (bSkip)
3254  ++pPrefixEnd;
3255  bSkip = true;
3256  if (pPrefixEnd >= pEnd)
3257  {
3258  if (nIndex == 0)
3259  {
3260  bInsertSlash = bAppendFinalSlash;
3261  break;
3262  }
3263  else
3264  return false;
3265  }
3266  if (*pPrefixEnd == '/')
3267  {
3268  pSuffixBegin = pPrefixEnd;
3269  break;
3270  }
3271  }
3272  }
3273 
3274  OUStringBuffer aNewPath(256);
3275  aNewPath.append(pPathBegin, pPrefixEnd - pPathBegin);
3276  aNewPath.append('/');
3277  aNewPath.append(encodeText(rTheName, PART_PCHAR,
3278  eMechanism, eCharset, true));
3279  if (bInsertSlash) {
3280  aNewPath.append('/');
3281  }
3282  aNewPath.append(pSuffixBegin, pPathEnd - pSuffixBegin);
3283 
3284  return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
3285  RTL_TEXTENCODING_UTF8);
3286 }
3287 
3289 {
3290  if (HasError())
3291  return;
3292  if (m_aQuery.isPresent())
3293  {
3294  lcl_Erase(m_aAbsURIRef, m_aQuery.getBegin() - 1,
3295  m_aQuery.getLength() + 1);
3296  m_aFragment += m_aQuery.clear() - 1;
3297  }
3298 }
3299 
3300 bool INetURLObject::setQuery(OUString const & rTheQuery,
3301  EncodeMechanism eMechanism,
3302  rtl_TextEncoding eCharset)
3303 {
3304  if (!getSchemeInfo().m_bQuery)
3305  return false;
3306  OUString aNewQuery(encodeText(rTheQuery, PART_URIC,
3307  eMechanism, eCharset, true));
3308  sal_Int32 nDelta;
3309  if (m_aQuery.isPresent())
3310  nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery);
3311  else
3312  {
3313  m_aAbsURIRef.insert(m_aPath.getEnd(), u'?');
3314  nDelta = m_aQuery.set(m_aAbsURIRef, aNewQuery, m_aPath.getEnd() + 1)
3315  + 1;
3316  }
3317  m_aFragment += nDelta;
3318  return true;
3319 }
3320 
3322 {
3323  if (HasError())
3324  return false;
3325  if (m_aFragment.isPresent())
3326  {
3327  m_aAbsURIRef.setLength(m_aFragment.getBegin() - 1);
3328  m_aFragment.clear();
3329  }
3330  return true;
3331 }
3332 
3333 bool INetURLObject::setFragment(OUString const & rTheFragment,
3334  EncodeMechanism eMechanism,
3335  rtl_TextEncoding eCharset)
3336 {
3337  if (HasError())
3338  return false;
3339  OUString aNewFragment(encodeText(rTheFragment, PART_URIC,
3340  eMechanism, eCharset, true));
3341  if (m_aFragment.isPresent())
3342  m_aFragment.set(m_aAbsURIRef, aNewFragment);
3343  else
3344  {
3345  m_aAbsURIRef.append('#');
3346  m_aFragment.set(m_aAbsURIRef, aNewFragment, m_aAbsURIRef.getLength());
3347  }
3348  return true;
3349 }
3350 
3352 {
3353  sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3354  return (eStyle & FSysStyle::Dos)
3355  && m_aPath.getLength() >= 3
3356  && p[0] == '/'
3357  && rtl::isAsciiAlpha(p[1])
3358  && p[2] == ':'
3359  && (m_aPath.getLength() == 3 || p[3] == '/');
3360 }
3361 
3362 // static
3363 OUString INetURLObject::encodeText(sal_Unicode const * pBegin,
3364  sal_Unicode const * pEnd,
3365  Part ePart, EncodeMechanism eMechanism,
3366  rtl_TextEncoding eCharset,
3367  bool bKeepVisibleEscapes)
3368 {
3369  OUStringBuffer aResult(256);
3370  while (pBegin < pEnd)
3371  {
3372  EscapeType eEscapeType;
3373  sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3374  eMechanism, eCharset, eEscapeType);
3375  appendUCS4(aResult, nUTF32, eEscapeType, ePart,
3376  eCharset, bKeepVisibleEscapes);
3377  }
3378  return aResult.makeStringAndClear();
3379 }
3380 
3381 // static
3382 OUString INetURLObject::decode(sal_Unicode const * pBegin,
3383  sal_Unicode const * pEnd,
3384  DecodeMechanism eMechanism,
3385  rtl_TextEncoding eCharset)
3386 {
3387  switch (eMechanism)
3388  {
3389  case DecodeMechanism::NONE:
3390  return OUString(pBegin, pEnd - pBegin);
3391 
3392  case DecodeMechanism::ToIUri:
3393  eCharset = RTL_TEXTENCODING_UTF8;
3394  break;
3395 
3396  default:
3397  break;
3398  }
3399  OUStringBuffer aResult(static_cast<int>(pEnd-pBegin));
3400  while (pBegin < pEnd)
3401  {
3402  EscapeType eEscapeType;
3403  sal_uInt32 nUTF32 = getUTF32(pBegin, pEnd,
3404  EncodeMechanism::WasEncoded, eCharset, eEscapeType);
3405  switch (eEscapeType)
3406  {
3407  case EscapeType::NONE:
3408  aResult.appendUtf32(nUTF32);
3409  break;
3410 
3411  case EscapeType::Octet:
3412  appendEscape(aResult, nUTF32);
3413  break;
3414 
3415  case EscapeType::Utf32:
3416  if (
3417  rtl::isAscii(nUTF32) &&
3418  (
3419  eMechanism == DecodeMechanism::ToIUri ||
3420  (
3421  eMechanism == DecodeMechanism::Unambiguous &&
3422  mustEncode(nUTF32, PART_UNAMBIGUOUS)
3423  )
3424  )
3425  )
3426  {
3427  appendEscape(aResult, nUTF32);
3428  }
3429  else
3430  aResult.appendUtf32(nUTF32);
3431  break;
3432  }
3433  }
3434  return aResult.makeStringAndClear();
3435 }
3436 
3438  rtl_TextEncoding eCharset) const
3439 {
3440  INetURLObject aTemp(*this);
3441  aTemp.clearPassword();
3442  return aTemp.GetMainURL(eMechanism, eCharset);
3443 }
3444 
3446  rtl_TextEncoding eCharset) const
3447 {
3448  INetURLObject aTemp(*this);
3449  aTemp.clearFragment();
3450  return aTemp.GetMainURL(eMechanism, eCharset);
3451 }
3452 
3453 OUString
3455  uno::Reference< util::XStringWidth > const & rStringWidth,
3456  sal_Int32 nWidth,
3457  DecodeMechanism eMechanism,
3458  rtl_TextEncoding eCharset)
3459  const
3460 {
3461  OSL_ENSURE(rStringWidth.is(), "specification violation");
3462  OUStringBuffer aBuffer;
3463  // make sure that the scheme is copied for generic schemes: getSchemeInfo().m_pScheme
3464  // is empty ("") in that case, so take the scheme from m_aAbsURIRef
3465  if (m_eScheme != INetProtocol::Generic)
3466  {
3467  aBuffer.appendAscii(getSchemeInfo().m_pScheme);
3468  }
3469  else
3470  {
3471  if (!m_aAbsURIRef.isEmpty())
3472  {
3473  sal_Unicode const * pSchemeBegin
3474  = m_aAbsURIRef.getStr();
3475  sal_Unicode const * pSchemeEnd = pSchemeBegin;
3476 
3477  while (pSchemeEnd[0] != ':')
3478  {
3479  ++pSchemeEnd;
3480  }
3481  aBuffer.append(pSchemeBegin, pSchemeEnd - pSchemeBegin);
3482  }
3483  }
3484  aBuffer.append(':');
3485  bool bAuthority = getSchemeInfo().m_bAuthority;
3486  sal_Unicode const * pCoreBegin
3487  = m_aAbsURIRef.getStr() + (bAuthority ? getAuthorityBegin() :
3488  m_aPath.getBegin());
3489  sal_Unicode const * pCoreEnd
3490  = m_aAbsURIRef.getStr() + m_aPath.getBegin() + m_aPath.getLength();
3491  bool bSegment = false;
3492  if (getSchemeInfo().m_bHierarchical)
3493  {
3494  OUString aRest;
3495  if (m_aQuery.isPresent())
3496  aRest = "?...";
3497  else if (m_aFragment.isPresent())
3498  aRest = "#...";
3499  OUStringBuffer aTrailer;
3500  sal_Unicode const * pBegin = pCoreBegin;
3501  sal_Unicode const * pEnd = pCoreEnd;
3502  sal_Unicode const * pPrefixBegin = pBegin;
3503  sal_Unicode const * pSuffixEnd = pEnd;
3504  bool bPrefix = true;
3505  bool bSuffix = true;
3506  do
3507  {
3508  if (bSuffix)
3509  {
3510  sal_Unicode const * p = pSuffixEnd - 1;
3511  if (pSuffixEnd == pCoreEnd && *p == '/')
3512  --p;
3513  while (*p != '/')
3514  --p;
3515  if (bAuthority && p == pCoreBegin + 1)
3516  --p;
3517  OUString
3518  aSegment(decode(p + (p == pBegin && pBegin != pCoreBegin ?
3519  1 : 0),
3520  pSuffixEnd,
3521  eMechanism,
3522  eCharset));
3523  pSuffixEnd = p;
3524  OUStringBuffer aResult(aBuffer);
3525  if (pSuffixEnd != pBegin)
3526  aResult.append("...");
3527  aResult.append(aSegment);
3528  aResult.append(aTrailer.toString());
3529  aResult.append(aRest);
3530  if (rStringWidth->
3531  queryStringWidth(aResult.makeStringAndClear())
3532  <= nWidth)
3533  {
3534  aTrailer.insert(0, aSegment);
3535  bSegment = true;
3536  pEnd = pSuffixEnd;
3537  }
3538  else
3539  bSuffix = false;
3540  if (pPrefixBegin > pSuffixEnd)
3541  pPrefixBegin = pSuffixEnd;
3542  if (pBegin == pEnd)
3543  break;
3544  }
3545  if (bPrefix)
3546  {
3547  sal_Unicode const * p
3548  = pPrefixBegin
3549  + (bAuthority && pPrefixBegin == pCoreBegin ? 2 :
3550  1);
3551  OSL_ASSERT(p <= pEnd);
3552  while (p < pEnd && *p != '/')
3553  ++p;
3554  if (p == pCoreEnd - 1 && *p == '/')
3555  ++p;
3556  OUString
3557  aSegment(decode(pPrefixBegin
3558  + (pPrefixBegin == pCoreBegin ? 0 :
3559  1),
3560  p == pEnd ? p : p + 1,
3561  eMechanism,
3562  eCharset));
3563  pPrefixBegin = p;
3564  OUStringBuffer aResult(aBuffer);
3565  aResult.append(aSegment);
3566  if (pPrefixBegin != pEnd)
3567  aResult.append("...");
3568  aResult.append(aTrailer.toString());
3569  aResult.append(aRest);
3570  if (rStringWidth->
3571  queryStringWidth(aResult.makeStringAndClear())
3572  <= nWidth)
3573  {
3574  aBuffer.append(aSegment);
3575  bSegment = true;
3576  pBegin = pPrefixBegin;
3577  }
3578  else
3579  bPrefix = false;
3580  if (pPrefixBegin > pSuffixEnd)
3581  pSuffixEnd = pPrefixBegin;
3582  if (pBegin == pEnd)
3583  break;
3584  }
3585  }
3586  while (bPrefix || bSuffix);
3587  if (bSegment)
3588  {
3589  if (pPrefixBegin != pBegin || pSuffixEnd != pEnd)
3590  aBuffer.append("...");
3591  aBuffer.append(aTrailer.toString());
3592  }
3593  }
3594  if (!bSegment)
3595  aBuffer.append(decode(pCoreBegin,
3596  pCoreEnd,
3597  eMechanism,
3598  eCharset));
3599  if (m_aQuery.isPresent())
3600  {
3601  aBuffer.append('?');
3602  aBuffer.append(decode(m_aQuery, eMechanism, eCharset));
3603  }
3604  if (m_aFragment.isPresent())
3605  {
3606  aBuffer.append('#');
3607  aBuffer.append(decode(m_aFragment, eMechanism, eCharset));
3608  }
3609  if (!aBuffer.isEmpty())
3610  {
3611  OUStringBuffer aResult(aBuffer);
3612  if (rStringWidth->queryStringWidth(aResult.makeStringAndClear())
3613  > nWidth)
3614  for (sal_Int32 i = aBuffer.getLength();;)
3615  {
3616  if (i == 0)
3617  {
3618  aBuffer.setLength(aBuffer.getLength() - 1);
3619  if (aBuffer.isEmpty())
3620  break;
3621  }
3622  else
3623  {
3624  aBuffer.setLength(--i);
3625  aBuffer.append("...");
3626  }
3627  aResult = aBuffer;
3628  if (rStringWidth->
3629  queryStringWidth(aResult.makeStringAndClear())
3630  <= nWidth)
3631  break;
3632  }
3633  }
3634  return aBuffer.makeStringAndClear();
3635 }
3636 
3637 bool INetURLObject::operator ==(INetURLObject const & rObject) const
3638 {
3639  if (m_eScheme != rObject.m_eScheme)
3640  return false;
3641  if (m_eScheme == INetProtocol::NotValid)
3642  return m_aAbsURIRef.toString() == rObject.m_aAbsURIRef.toString();
3643  if ((m_aScheme.compare(
3644  rObject.m_aScheme, m_aAbsURIRef, rObject.m_aAbsURIRef)
3645  != 0)
3646  || GetUser(DecodeMechanism::NONE) != rObject.GetUser(DecodeMechanism::NONE)
3647  || GetPass(DecodeMechanism::NONE) != rObject.GetPass(DecodeMechanism::NONE)
3648  || !GetHost(DecodeMechanism::NONE).equalsIgnoreAsciiCase(
3649  rObject.GetHost(DecodeMechanism::NONE))
3650  || GetPort() != rObject.GetPort()
3651  || HasParam() != rObject.HasParam()
3652  || GetParam() != rObject.GetParam())
3653  return false;
3654  OUString aPath1(GetURLPath(DecodeMechanism::NONE));
3655  OUString aPath2(rObject.GetURLPath(DecodeMechanism::NONE));
3656  switch (m_eScheme)
3657  {
3658  case INetProtocol::File:
3659  {
3660  // If the URL paths of two file URLs only differ in that one has a
3661  // final '/' and the other has not, take the two paths as
3662  // equivalent (this could be useful for other schemes, too):
3663  sal_Int32 nLength = aPath1.getLength();
3664  switch (nLength - aPath2.getLength())
3665  {
3666  case -1:
3667  if (aPath2[nLength] != '/')
3668  return false;
3669  break;
3670 
3671  case 0:
3672  break;
3673 
3674  case 1:
3675  if (aPath1[--nLength] != '/')
3676  return false;
3677  break;
3678 
3679  default:
3680  return false;
3681  }
3682  return aPath1.compareTo(aPath2, nLength) == 0;
3683  }
3684 
3685  default:
3686  return aPath1 == aPath2;
3687  }
3688 }
3689 
3691  OUString const & rTheUser,
3692  OUString const & rThePassword,
3693  OUString const & rTheHost,
3694  sal_uInt32 nThePort,
3695  OUString const & rThePath)
3696 {
3697  setInvalid();
3698  m_eScheme = eTheScheme;
3699  if (HasError() || m_eScheme == INetProtocol::Generic)
3700  return false;
3701  m_aAbsURIRef.setLength(0);
3702  m_aAbsURIRef.appendAscii(getSchemeInfo().m_pScheme);
3703  m_aAbsURIRef.append(':');
3704  if (getSchemeInfo().m_bAuthority)
3705  {
3706  m_aAbsURIRef.append("//");
3707  bool bUserInfo = false;
3708  if (getSchemeInfo().m_bUser)
3709  {
3710  if (!rTheUser.isEmpty())
3711  {
3712  m_aUser.set(m_aAbsURIRef,
3713  encodeText(rTheUser, PART_USER_PASSWORD,
3714  EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false),
3715  m_aAbsURIRef.getLength());
3716  bUserInfo = true;
3717  }
3718  }
3719  else if (!rTheUser.isEmpty())
3720  {
3721  setInvalid();
3722  return false;
3723  }
3724  if (!rThePassword.isEmpty())
3725  {
3726  if (getSchemeInfo().m_bPassword)
3727  {
3728  m_aAbsURIRef.append(':');
3729  m_aAuth.set(m_aAbsURIRef,
3730  encodeText(rThePassword, PART_USER_PASSWORD,
3731  EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false),
3732  m_aAbsURIRef.getLength());
3733  bUserInfo = true;
3734  }
3735  else
3736  {
3737  setInvalid();
3738  return false;
3739  }
3740  }
3741  if (bUserInfo && getSchemeInfo().m_bHost)
3742  m_aAbsURIRef.append('@');
3743  if (getSchemeInfo().m_bHost)
3744  {
3745  OUStringBuffer aSynHost(rTheHost);
3746  bool bNetBiosName = false;
3747  switch (m_eScheme)
3748  {
3749  case INetProtocol::File:
3750  {
3751  OUString sTemp(aSynHost.toString());
3752  if (sTemp.equalsIgnoreAsciiCase( "localhost" ))
3753  {
3754  aSynHost.setLength(0);
3755  }
3756  bNetBiosName = true;
3757  }
3758  break;
3759 
3760  case INetProtocol::Ldap:
3761  if (aSynHost.isEmpty() && nThePort != 0)
3762  {
3763  setInvalid();
3764  return false;
3765  }
3766  break;
3767 
3768  default:
3769  if (aSynHost.isEmpty())
3770  {
3771  setInvalid();
3772  return false;
3773  }
3774  break;
3775  }
3776  if (!parseHostOrNetBiosName(
3777  aSynHost.getStr(), aSynHost.getStr() + aSynHost.getLength(),
3778  EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, bNetBiosName, &aSynHost))
3779  {
3780  setInvalid();
3781  return false;
3782  }
3783  m_aHost.set(m_aAbsURIRef, aSynHost.makeStringAndClear(),
3784  m_aAbsURIRef.getLength());
3785  if (nThePort != 0)
3786  {
3787  if (getSchemeInfo().m_bPort)
3788  {
3789  m_aAbsURIRef.append(':');
3790  m_aPort.set(m_aAbsURIRef,
3791  OUString::number(nThePort),
3792  m_aAbsURIRef.getLength());
3793  }
3794  else
3795  {
3796  setInvalid();
3797  return false;
3798  }
3799  }
3800  }
3801  else if (!rTheHost.isEmpty() || nThePort != 0)
3802  {
3803  setInvalid();
3804  return false;
3805  }
3806  }
3807  OUStringBuffer aSynPath;
3808  sal_Unicode const * p = rThePath.getStr();
3809  sal_Unicode const * pEnd = p + rThePath.getLength();
3810  if (!parsePath(m_eScheme, &p, pEnd, EncodeMechanism::WasEncoded, RTL_TEXTENCODING_UTF8, false, '/',
3811  0x80000000, 0x80000000, 0x80000000, aSynPath)
3812  || p != pEnd)
3813  {
3814  setInvalid();
3815  return false;
3816  }
3817  m_aPath.set(m_aAbsURIRef, aSynPath.makeStringAndClear(),
3818  m_aAbsURIRef.getLength());
3819  return true;
3820 }
3821 
3822 // static
3823 OUString INetURLObject::GetAbsURL(OUString const & rTheBaseURIRef,
3824  OUString const & rTheRelURIRef,
3825  EncodeMechanism eEncodeMechanism,
3826  DecodeMechanism eDecodeMechanism,
3827  rtl_TextEncoding eCharset)
3828 {
3829  // Backwards compatibility:
3830  if (rTheRelURIRef.isEmpty() || rTheRelURIRef[0] == '#')
3831  return rTheRelURIRef;
3832 
3833  INetURLObject aTheAbsURIRef;
3834  bool bWasAbsolute;
3835  return INetURLObject(rTheBaseURIRef, eEncodeMechanism, eCharset).
3836  convertRelToAbs(rTheRelURIRef, aTheAbsURIRef,
3837  bWasAbsolute, eEncodeMechanism,
3838  eCharset, false, false,
3839  false, FSysStyle::Detect)
3840  || eEncodeMechanism != EncodeMechanism::WasEncoded
3841  || eDecodeMechanism != DecodeMechanism::ToIUri
3842  || eCharset != RTL_TEXTENCODING_UTF8 ?
3843  aTheAbsURIRef.GetMainURL(eDecodeMechanism, eCharset) :
3844  rTheRelURIRef;
3845 }
3846 
3848 {
3849  OUString aTheExtURIRef;
3850  translateToExternal(
3851  m_aAbsURIRef.toString(), aTheExtURIRef);
3852  return aTheExtURIRef;
3853 }
3854 
3855 bool INetURLObject::isSchemeEqualTo(std::u16string_view scheme) const {
3856  return m_aScheme.isPresent()
3857  && (rtl_ustr_compareIgnoreAsciiCase_WithLength(
3858  scheme.data(), scheme.size(),
3859  m_aAbsURIRef.getStr() + m_aScheme.getBegin(),
3860  m_aScheme.getLength())
3861  == 0);
3862 }
3863 
3865  return ( isSchemeEqualTo( INetProtocol::Http ) ||
3866  isSchemeEqualTo( INetProtocol::Https ) ||
3867  isSchemeEqualTo( INetProtocol::VndSunStarWebdav ) ||
3868  isSchemeEqualTo( u"vnd.sun.star.webdavs" ) ||
3869  isSchemeEqualTo( u"webdav" ) ||
3870  isSchemeEqualTo( u"webdavs" ));
3871 }
3872 
3873 // static
3875 {
3876  return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pPrefix);
3877 }
3878 
3879 // static
3881 {
3882  return OUString::createFromAscii(getSchemeInfo(eTheScheme).m_pScheme);
3883 }
3884 
3885 // static
3887  rTheAbsURIRef)
3888 {
3889  sal_Unicode const * p = rTheAbsURIRef.getStr();
3890  PrefixInfo const * pPrefix = getPrefix(p, p + rTheAbsURIRef.getLength());
3891  return pPrefix ? pPrefix->m_eScheme : INetProtocol::NotValid;
3892 }
3893 
3895  rtl_TextEncoding eCharset) const
3896 {
3897  // Check because PROT_VND_SUN_STAR_HELP, PROT_VND_SUN_STAR_HIER, and
3898  // PROT_VND_SUN_STAR_PKG misuse m_aHost:
3899  if (!getSchemeInfo().m_bHost)
3900  return OUString();
3901  OUStringBuffer aHostPort(decode(m_aHost, eMechanism, eCharset));
3902  if (m_aPort.isPresent())
3903  {
3904  aHostPort.append(':');
3905  aHostPort.append(decode(m_aPort, eMechanism, eCharset));
3906  }
3907  return aHostPort.makeStringAndClear();
3908 }
3909 
3910 sal_uInt32 INetURLObject::GetPort() const
3911 {
3912  if (m_aPort.isPresent())
3913  {
3914  sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPort.getBegin();
3915  sal_Unicode const * pEnd = p + m_aPort.getLength();
3916  sal_uInt32 nThePort;
3917  if (INetMIME::scanUnsigned(p, pEnd, true, nThePort) && p == pEnd)
3918  return nThePort;
3919  }
3920  return 0;
3921 }
3922 
3923 bool INetURLObject::SetPort(sal_uInt32 nThePort)
3924 {
3925  if (getSchemeInfo().m_bPort && m_aHost.isPresent())
3926  {
3927  OUString aNewPort(OUString::number(nThePort));
3928  sal_Int32 nDelta;
3929  if (m_aPort.isPresent())
3930  nDelta = m_aPort.set(m_aAbsURIRef, aNewPort);
3931  else
3932  {
3933  m_aAbsURIRef.insert(m_aHost.getEnd(), u':');
3934  nDelta = m_aPort.set(m_aAbsURIRef, aNewPort, m_aHost.getEnd() + 1)
3935  + 1;
3936  }
3937  m_aPath += nDelta;
3938  m_aQuery += nDelta;
3939  m_aFragment += nDelta;
3940  return true;
3941  }
3942  return false;
3943 }
3944 
3945 sal_Int32 INetURLObject::getSegmentCount(bool bIgnoreFinalSlash) const
3946 {
3947  if (!checkHierarchical())
3948  return 0;
3949 
3950  sal_Unicode const * p = m_aAbsURIRef.getStr() + m_aPath.getBegin();
3951  sal_Unicode const * pEnd = p + m_aPath.getLength();
3952  if (bIgnoreFinalSlash && pEnd > p && pEnd[-1] == '/')
3953  --pEnd;
3954  sal_Int32 n = p == pEnd || *p == '/' ? 0 : 1;
3955  while (p != pEnd)
3956  if (*p++ == '/')
3957  ++n;
3958  return n;
3959 }
3960 
3961 bool INetURLObject::removeSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash)
3962 {
3963  SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
3964  if (!aSegment.isPresent())
3965  return false;
3966 
3967  OUStringBuffer aNewPath(m_aPath.getLength());
3968  aNewPath.append(m_aAbsURIRef.getStr() + m_aPath.getBegin(),
3969  aSegment.getBegin() - m_aPath.getBegin());
3970  if (bIgnoreFinalSlash && aSegment.getEnd() == m_aPath.getEnd())
3971  aNewPath.append('/');
3972  else
3973  aNewPath.append(m_aAbsURIRef.getStr() + aSegment.getEnd(),
3974  m_aPath.getEnd() - aSegment.getEnd());
3975  if (aNewPath.isEmpty() && !aSegment.isEmpty() &&
3976  m_aAbsURIRef[aSegment.getBegin()] == '/')
3977  {
3978  aNewPath.append('/');
3979  }
3980 
3981  return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
3982  RTL_TEXTENCODING_UTF8);
3983 }
3984 
3985 OUString INetURLObject::getName(sal_Int32 nIndex, bool bIgnoreFinalSlash,
3986  DecodeMechanism eMechanism,
3987  rtl_TextEncoding eCharset) const
3988 {
3989  SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
3990  if (!aSegment.isPresent())
3991  return OUString();
3992 
3993  sal_Unicode const * pSegBegin
3994  = m_aAbsURIRef.getStr() + aSegment.getBegin();
3995  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
3996 
3997  if (pSegBegin < pSegEnd && *pSegBegin == '/')
3998  ++pSegBegin;
3999  sal_Unicode const * p = pSegBegin;
4000  while (p != pSegEnd && *p != ';')
4001  ++p;
4002 
4003  return decode(pSegBegin, p, eMechanism, eCharset);
4004 }
4005 
4006 bool INetURLObject::setName(OUString const& rTheName, EncodeMechanism eMechanism,
4007  rtl_TextEncoding eCharset)
4008 {
4009  SubString aSegment(getSegment(LAST_SEGMENT, true));
4010  if (!aSegment.isPresent())
4011  return false;
4012 
4013  sal_Unicode const * pPathBegin
4014  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4015  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4016  sal_Unicode const * pSegBegin
4017  = m_aAbsURIRef.getStr() + aSegment.getBegin();
4018  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4019 
4020  if (pSegBegin < pSegEnd && *pSegBegin == '/')
4021  ++pSegBegin;
4022  sal_Unicode const * p = pSegBegin;
4023  while (p != pSegEnd && *p != ';')
4024  ++p;
4025 
4026  OUStringBuffer aNewPath(256);
4027  aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4028  aNewPath.append(encodeText(rTheName, PART_PCHAR, eMechanism, eCharset, true));
4029  aNewPath.append(p, pPathEnd - p);
4030 
4031  return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4032  RTL_TEXTENCODING_UTF8);
4033 }
4034 
4036  const
4037 {
4038  SubString aSegment(getSegment(LAST_SEGMENT, true/*bIgnoreFinalSlash*/));
4039  if (!aSegment.isPresent())
4040  return false;
4041 
4042  sal_Unicode const * pSegBegin
4043  = m_aAbsURIRef.getStr() + aSegment.getBegin();
4044  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4045 
4046  if (pSegBegin < pSegEnd && *pSegBegin == '/')
4047  ++pSegBegin;
4048  for (sal_Unicode const * p = pSegBegin; p != pSegEnd && *p != ';'; ++p)
4049  if (*p == '.' && p != pSegBegin)
4050  return true;
4051  return false;
4052 }
4053 
4054 OUString INetURLObject::getBase(sal_Int32 nIndex, bool bIgnoreFinalSlash,
4055  DecodeMechanism eMechanism,
4056  rtl_TextEncoding eCharset) const
4057 {
4058  SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4059  if (!aSegment.isPresent())
4060  return OUString();
4061 
4062  sal_Unicode const * pSegBegin
4063  = m_aAbsURIRef.getStr() + aSegment.getBegin();
4064  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4065 
4066  if (pSegBegin < pSegEnd && *pSegBegin == '/')
4067  ++pSegBegin;
4068  sal_Unicode const * pExtension = nullptr;
4069  sal_Unicode const * p = pSegBegin;
4070  for (; p != pSegEnd && *p != ';'; ++p)
4071  if (*p == '.' && p != pSegBegin)
4072  pExtension = p;
4073  if (!pExtension)
4074  pExtension = p;
4075 
4076  return decode(pSegBegin, pExtension, eMechanism, eCharset);
4077 }
4078 
4079 bool INetURLObject::setBase(OUString const & rTheBase, sal_Int32 nIndex,
4080  EncodeMechanism eMechanism,
4081  rtl_TextEncoding eCharset)
4082 {
4083  SubString aSegment(getSegment(nIndex, true/*bIgnoreFinalSlash*/));
4084  if (!aSegment.isPresent())
4085  return false;
4086 
4087  sal_Unicode const * pPathBegin
4088  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4089  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4090  sal_Unicode const * pSegBegin
4091  = m_aAbsURIRef.getStr() + aSegment.getBegin();
4092  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4093 
4094  if (pSegBegin < pSegEnd && *pSegBegin == '/')
4095  ++pSegBegin;
4096  sal_Unicode const * pExtension = nullptr;
4097  sal_Unicode const * p = pSegBegin;
4098  for (; p != pSegEnd && *p != ';'; ++p)
4099  if (*p == '.' && p != pSegBegin)
4100  pExtension = p;
4101  if (!pExtension)
4102  pExtension = p;
4103 
4104  OUStringBuffer aNewPath;
4105  aNewPath.append(pPathBegin, pSegBegin - pPathBegin);
4106  aNewPath.append(encodeText(rTheBase, PART_PCHAR,
4107  eMechanism, eCharset, true));
4108  aNewPath.append(pExtension, pPathEnd - pExtension);
4109 
4110  return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4111  RTL_TEXTENCODING_UTF8);
4112 }
4113 
4114 OUString INetURLObject::getExtension(sal_Int32 nIndex,
4115  bool bIgnoreFinalSlash,
4116  DecodeMechanism eMechanism,
4117  rtl_TextEncoding eCharset) const
4118 {
4119  SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4120  if (!aSegment.isPresent())
4121  return OUString();
4122 
4123  sal_Unicode const * pSegBegin
4124  = m_aAbsURIRef.getStr() + aSegment.getBegin();
4125  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4126 
4127  if (pSegBegin < pSegEnd && *pSegBegin == '/')
4128  ++pSegBegin;
4129  sal_Unicode const * pExtension = nullptr;
4130  sal_Unicode const * p = pSegBegin;
4131  for (; p != pSegEnd && *p != ';'; ++p)
4132  if (*p == '.' && p != pSegBegin)
4133  pExtension = p;
4134 
4135  if (!pExtension)
4136  return OUString();
4137 
4138  return decode(pExtension + 1, p, eMechanism, eCharset);
4139 }
4140 
4141 bool INetURLObject::setExtension(OUString const & rTheExtension,
4142  sal_Int32 nIndex, bool bIgnoreFinalSlash,
4143  rtl_TextEncoding eCharset)
4144 {
4145  SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4146  if (!aSegment.isPresent())
4147  return false;
4148 
4149  sal_Unicode const * pPathBegin
4150  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4151  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4152  sal_Unicode const * pSegBegin
4153  = m_aAbsURIRef.getStr() + aSegment.getBegin();
4154  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4155 
4156  if (pSegBegin < pSegEnd && *pSegBegin == '/')
4157  ++pSegBegin;
4158  sal_Unicode const * pExtension = nullptr;
4159  sal_Unicode const * p = pSegBegin;
4160  for (; p != pSegEnd && *p != ';'; ++p)
4161  if (*p == '.' && p != pSegBegin)
4162  pExtension = p;
4163  if (!pExtension)
4164  pExtension = p;
4165 
4166  OUStringBuffer aNewPath(128);
4167  aNewPath.append(pPathBegin, pExtension - pPathBegin);
4168  aNewPath.append('.');
4169  aNewPath.append(encodeText(rTheExtension, PART_PCHAR,
4170  EncodeMechanism::WasEncoded, eCharset, true));
4171  aNewPath.append(p, pPathEnd - p);
4172 
4173  return setPath(aNewPath.makeStringAndClear(), EncodeMechanism::NotCanonical,
4174  RTL_TEXTENCODING_UTF8);
4175 }
4176 
4177 bool INetURLObject::removeExtension(sal_Int32 nIndex, bool bIgnoreFinalSlash)
4178 {
4179  SubString aSegment(getSegment(nIndex, bIgnoreFinalSlash));
4180  if (!aSegment.isPresent())
4181  return false;
4182 
4183  sal_Unicode const * pPathBegin
4184  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4185  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4186  sal_Unicode const * pSegBegin
4187  = m_aAbsURIRef.getStr() + aSegment.getBegin();
4188  sal_Unicode const * pSegEnd = pSegBegin + aSegment.getLength();
4189 
4190  if (pSegBegin < pSegEnd && *pSegBegin == '/')
4191  ++pSegBegin;
4192  sal_Unicode const * pExtension = nullptr;
4193  sal_Unicode const * p = pSegBegin;
4194  for (; p != pSegEnd && *p != ';'; ++p)
4195  if (*p == '.' && p != pSegBegin)
4196  pExtension = p;
4197  if (!pExtension)
4198  return true;
4199 
4200  OUString aNewPath =
4201  rtl::OUStringView(pPathBegin, pExtension - pPathBegin) +
4202  rtl::OUStringView(p, pPathEnd - p);
4203 
4204  return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4205 }
4206 
4208 {
4209  if (!checkHierarchical())
4210  return false;
4211 
4212  sal_Unicode const * pPathBegin
4213  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4214  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4215  return pPathEnd > pPathBegin && pPathEnd[-1] == '/';
4216 }
4217 
4219 {
4220  if (!checkHierarchical())
4221  return false;
4222 
4223  sal_Unicode const * pPathBegin
4224  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4225  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4226  if (pPathEnd > pPathBegin && pPathEnd[-1] == '/')
4227  return true;
4228 
4229  OUString aNewPath = rtl::OUStringView(pPathBegin, pPathEnd - pPathBegin) + "/";
4230 
4231  return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4232 }
4233 
4235 {
4236  if (!checkHierarchical())
4237  return false;
4238 
4239  sal_Unicode const * pPathBegin
4240  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4241  sal_Unicode const * pPathEnd = pPathBegin + m_aPath.getLength();
4242  if (pPathEnd <= pPathBegin || pPathEnd[-1] != '/')
4243  return true;
4244 
4245  --pPathEnd;
4246  if (pPathEnd == pPathBegin && *pPathBegin == '/')
4247  return false;
4248  OUString aNewPath(pPathBegin, pPathEnd - pPathBegin);
4249 
4250  return setPath(aNewPath, EncodeMechanism::NotCanonical, RTL_TEXTENCODING_UTF8);
4251 }
4252 
4254  sal_Unicode * pDelimiter) const
4255 {
4256  if (m_eScheme != INetProtocol::File)
4257  return OUString();
4258 
4259  if (((eStyle & FSysStyle::Vos) ? 1 : 0)
4260  + ((eStyle & FSysStyle::Unix) ? 1 : 0)
4261  + ((eStyle & FSysStyle::Dos) ? 1 : 0)
4262  > 1)
4263  {
4264  if(eStyle & FSysStyle::Vos && m_aHost.isPresent() && m_aHost.getLength() > 0)
4265  {
4266  eStyle= FSysStyle::Vos;
4267  }
4268  else
4269  {
4270  if(hasDosVolume(eStyle) || ((eStyle & FSysStyle::Dos) && m_aHost.isPresent() && m_aHost.getLength() > 0))
4271  {
4272  eStyle = FSysStyle::Dos;
4273  }
4274  else
4275  {
4276  if(eStyle & FSysStyle::Unix && (!m_aHost.isPresent() || m_aHost.getLength() == 0))
4277  {
4278  eStyle = FSysStyle::Unix;
4279  }
4280  else
4281  {
4282  eStyle= FSysStyle(0);
4283  }
4284  }
4285  }
4286  }
4287 
4288  switch (eStyle)
4289  {
4290  case FSysStyle::Vos:
4291  {
4292  if (pDelimiter)
4293  *pDelimiter = '/';
4294 
4295  OUStringBuffer aSynFSysPath;
4296  aSynFSysPath.append("//");
4297  if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4298  aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset,
4299  RTL_TEXTENCODING_UTF8));
4300  else
4301  aSynFSysPath.append('.');
4302  aSynFSysPath.append(decode(m_aPath, DecodeMechanism::WithCharset,
4303  RTL_TEXTENCODING_UTF8));
4304  return aSynFSysPath.makeStringAndClear();
4305  }
4306 
4307  case FSysStyle::Unix:
4308  {
4309  if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4310  return OUString();
4311 
4312  if (pDelimiter)
4313  *pDelimiter = '/';
4314 
4315  return decode(m_aPath, DecodeMechanism::WithCharset, RTL_TEXTENCODING_UTF8);
4316  }
4317 
4318  case FSysStyle::Dos:
4319  {
4320  if (pDelimiter)
4321  *pDelimiter = '\\';
4322 
4323  OUStringBuffer aSynFSysPath(64);
4324  if (m_aHost.isPresent() && m_aHost.getLength() > 0)
4325  {
4326  aSynFSysPath.append("\\\\");
4327  aSynFSysPath.append(decode(m_aHost, DecodeMechanism::WithCharset,
4328  RTL_TEXTENCODING_UTF8));
4329  aSynFSysPath.append('\\');
4330  }
4331  sal_Unicode const * p
4332  = m_aAbsURIRef.getStr() + m_aPath.getBegin();
4333  sal_Unicode const * pEnd = p + m_aPath.getLength();
4334  DBG_ASSERT(p < pEnd && *p == '/',
4335  "INetURLObject::getFSysPath(): Bad path");
4336  ++p;
4337  while (p < pEnd)
4338  {
4339  EscapeType eEscapeType;
4340  sal_uInt32 nUTF32 = getUTF32(p, pEnd, EncodeMechanism::WasEncoded,
4341  RTL_TEXTENCODING_UTF8,
4342  eEscapeType);
4343  if (eEscapeType == EscapeType::NONE && nUTF32 == '/')
4344  aSynFSysPath.append('\\');
4345  else
4346  aSynFSysPath.appendUtf32(nUTF32);
4347  }
4348  return aSynFSysPath.makeStringAndClear();
4349  }
4350 
4351  default:
4352  return OUString();
4353  }
4354 }
4355 
4356 // static
4357 void INetURLObject::appendUCS4Escape(OUStringBuffer & rTheText,
4358  sal_uInt32 nUCS4)
4359 {
4360  DBG_ASSERT(nUCS4 < 0x80000000,
4361  "INetURLObject::appendUCS4Escape(): Bad char");
4362  if (nUCS4 < 0x80)
4363  appendEscape(rTheText, nUCS4);
4364  else if (nUCS4 < 0x800)
4365  {
4366  appendEscape(rTheText, nUCS4 >> 6 | 0xC0);
4367  appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4368  }
4369  else if (nUCS4 < 0x10000)
4370  {
4371  appendEscape(rTheText, nUCS4 >> 12 | 0xE0);
4372  appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4373  appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4374  }
4375  else if (nUCS4 < 0x200000)
4376  {
4377  appendEscape(rTheText, nUCS4 >> 18 | 0xF0);
4378  appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4379  appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4380  appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4381  }
4382  else if (nUCS4 < 0x4000000)
4383  {
4384  appendEscape(rTheText, nUCS4 >> 24 | 0xF8);
4385  appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4386  appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4387  appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4388  appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4389  }
4390  else
4391  {
4392  appendEscape(rTheText, nUCS4 >> 30 | 0xFC);
4393  appendEscape(rTheText, (nUCS4 >> 24 & 0x3F) | 0x80);
4394  appendEscape(rTheText, (nUCS4 >> 18 & 0x3F) | 0x80);
4395  appendEscape(rTheText, (nUCS4 >> 12 & 0x3F) | 0x80);
4396  appendEscape(rTheText, (nUCS4 >> 6 & 0x3F) | 0x80);
4397  appendEscape(rTheText, (nUCS4 & 0x3F) | 0x80);
4398  }
4399 }
4400 
4401 // static
4402 void INetURLObject::appendUCS4(OUStringBuffer& rTheText, sal_uInt32 nUCS4,
4403  EscapeType eEscapeType,
4404  Part ePart, rtl_TextEncoding eCharset,
4405  bool bKeepVisibleEscapes)
4406 {
4407  bool bEscape;
4408  rtl_TextEncoding eTargetCharset = RTL_TEXTENCODING_DONTKNOW;
4409  switch (eEscapeType)
4410  {
4411  case EscapeType::NONE:
4412  if (mustEncode(nUCS4, ePart))
4413  {
4414  bEscape = true;
4415  eTargetCharset = RTL_TEXTENCODING_UTF8;
4416  }
4417  else
4418  bEscape = false;
4419  break;
4420 
4421  case EscapeType::Octet:
4422  bEscape = true;
4423  eTargetCharset = RTL_TEXTENCODING_ISO_8859_1;
4424  break;
4425 
4426  case EscapeType::Utf32:
4427  if (mustEncode(nUCS4, ePart))
4428  {
4429  bEscape = true;
4430  eTargetCharset = eCharset;
4431  }
4432  else if (bKeepVisibleEscapes && INetMIME::isVisible(nUCS4))
4433  {
4434  bEscape = true;
4435  eTargetCharset = RTL_TEXTENCODING_ASCII_US;
4436  }
4437  else
4438  bEscape = false;
4439  break;
4440  default:
4441  bEscape = false;
4442  }
4443 
4444  if (bEscape)
4445  {
4446  switch (eTargetCharset)
4447  {
4448  default:
4449  OSL_FAIL("INetURLObject::appendUCS4(): Unsupported charset");
4450  [[fallthrough]];
4451  case RTL_TEXTENCODING_ASCII_US:
4452  case RTL_TEXTENCODING_ISO_8859_1:
4453  appendEscape(rTheText, nUCS4);
4454  break;
4455  case RTL_TEXTENCODING_UTF8:
4456  appendUCS4Escape(rTheText, nUCS4);
4457  break;
4458  }
4459  }
4460  else
4461  rTheText.append(sal_Unicode(nUCS4));
4462 }
4463 
4464 // static
4465 sal_uInt32 INetURLObject::getUTF32(sal_Unicode const *& rBegin,
4466  sal_Unicode const * pEnd,
4467  EncodeMechanism eMechanism,
4468  rtl_TextEncoding eCharset,
4469  EscapeType & rEscapeType)
4470 {
4471  DBG_ASSERT(rBegin < pEnd, "INetURLObject::getUTF32(): Bad sequence");
4472  sal_uInt32 nUTF32 = INetMIME::getUTF32Character(rBegin, pEnd);
4473  switch (eMechanism)
4474  {
4475  case EncodeMechanism::All:
4476  rEscapeType = EscapeType::NONE;
4477  break;
4478 
4479  case EncodeMechanism::WasEncoded:
4480  {
4481  int nWeight1;
4482  int nWeight2;
4483  if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4484  && (nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0
4485  && (nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0)
4486  {
4487  rBegin += 2;
4488  nUTF32 = nWeight1 << 4 | nWeight2;
4489  switch (eCharset)
4490  {
4491  default:
4492  OSL_FAIL(
4493  "INetURLObject::getUTF32(): Unsupported charset");
4494  [[fallthrough]];
4495  case RTL_TEXTENCODING_ASCII_US:
4496  rEscapeType = rtl::isAscii(nUTF32) ?
4497  EscapeType::Utf32 : EscapeType::Octet;
4498  break;
4499 
4500  case RTL_TEXTENCODING_ISO_8859_1:
4501  rEscapeType = EscapeType::Utf32;
4502  break;
4503 
4504  case RTL_TEXTENCODING_UTF8:
4505  if (rtl::isAscii(nUTF32))
4506  rEscapeType = EscapeType::Utf32;
4507  else
4508  {
4509  if (nUTF32 >= 0xC0 && nUTF32 <= 0xF4)
4510  {
4511  sal_uInt32 nEncoded;
4512  int nShift;
4513  sal_uInt32 nMin;
4514  if (nUTF32 <= 0xDF)
4515  {
4516  nEncoded = (nUTF32 & 0x1F) << 6;
4517  nShift = 0;
4518  nMin = 0x80;
4519  }
4520  else if (nUTF32 <= 0xEF)
4521  {
4522  nEncoded = (nUTF32 & 0x0F) << 12;
4523  nShift = 6;
4524  nMin = 0x800;
4525  }
4526  else
4527  {
4528  nEncoded = (nUTF32 & 0x07) << 18;
4529  nShift = 12;
4530  nMin = 0x10000;
4531  }
4532  sal_Unicode const * p = rBegin;
4533  bool bUTF8 = true;
4534  for (;;)
4535  {
4536  if (pEnd - p < 3
4537  || p[0] != '%'
4538  || (nWeight1
4539  = INetMIME::getHexWeight(p[1]))
4540  < 8
4541  || nWeight1 > 11
4542  || (nWeight2
4543  = INetMIME::getHexWeight(p[2]))
4544  < 0)
4545  {
4546  bUTF8 = false;
4547  break;
4548  }
4549  p += 3;
4550  nEncoded
4551  |= ((nWeight1 & 3) << 4 | nWeight2)
4552  << nShift;
4553  if (nShift == 0)
4554  break;
4555  nShift -= 6;
4556  }
4557  if (bUTF8 && rtl::isUnicodeScalarValue(nEncoded)
4558  && nEncoded >= nMin)
4559  {
4560  rBegin = p;
4561  nUTF32 = nEncoded;
4562  rEscapeType = EscapeType::Utf32;
4563  break;
4564  }
4565  }
4566  rEscapeType = EscapeType::Octet;
4567  }
4568  break;
4569  }
4570  }
4571  else
4572  rEscapeType = EscapeType::NONE;
4573  break;
4574  }
4575 
4576  case EncodeMechanism::NotCanonical:
4577  {
4578  int nWeight1;
4579  int nWeight2;
4580  if (nUTF32 == static_cast<unsigned char>('%') && rBegin + 1 < pEnd
4581  && ((nWeight1 = INetMIME::getHexWeight(rBegin[0])) >= 0)
4582  && ((nWeight2 = INetMIME::getHexWeight(rBegin[1])) >= 0))
4583  {
4584  rBegin += 2;
4585  nUTF32 = nWeight1 << 4 | nWeight2;
4586  rEscapeType = EscapeType::Octet;
4587  }
4588  else
4589  rEscapeType = EscapeType::NONE;
4590  break;
4591  }
4592  }
4593  return nUTF32;
4594 }
4595 
4596 // static
4597 sal_uInt32 INetURLObject::scanDomain(sal_Unicode const *& rBegin,
4598  sal_Unicode const * pEnd,
4599  bool bEager)
4600 {
4601  enum State { STATE_DOT, STATE_LABEL, STATE_HYPHEN };
4602  State eState = STATE_DOT;
4603  sal_Int32 nLabels = 0;
4604  sal_Unicode const * pLastAlphanumeric = nullptr;
4605  for (sal_Unicode const * p = rBegin;; ++p)
4606  switch (eState)
4607  {
4608  case STATE_DOT:
4609  if (p != pEnd && (rtl::isAsciiAlphanumeric(*p) || *p == '_'))
4610  {
4611  ++nLabels;
4612  eState = STATE_LABEL;
4613  break;
4614  }
4615  if (bEager || nLabels == 0)
4616  return 0;
4617  rBegin = p - 1;
4618  return nLabels;
4619 
4620  case STATE_LABEL:
4621  if (p != pEnd)
4622  {
4623  if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4624  break;
4625  else if (*p == '.')
4626  {
4627  eState = STATE_DOT;
4628  break;
4629  }
4630  else if (*p == '-')
4631  {
4632  pLastAlphanumeric = p;
4633  eState = STATE_HYPHEN;
4634  break;
4635  }
4636  }
4637  rBegin = p;
4638  return nLabels;
4639 
4640  case STATE_HYPHEN:
4641  if (p != pEnd)
4642  {
4643  if (rtl::isAsciiAlphanumeric(*p) || *p == '_')
4644  {
4645  eState = STATE_LABEL;
4646  break;
4647  }
4648  else if (*p == '-')
4649  break;
4650  }
4651  if (bEager)
4652  return 0;
4653  rBegin = pLastAlphanumeric;
4654  return nLabels;
4655  }
4656 }
4657 
4658 // static
4660  sal_Unicode const * pEnd)
4661 {
4662  if (rBegin != pEnd && *rBegin == '[') {
4663  sal_Unicode const * p = rBegin + 1;
4664  //TODO: check for valid IPv6address (RFC 2373):
4665  while (p != pEnd && (rtl::isAsciiHexDigit(*p) || *p == ':' || *p == '.'))
4666  {
4667  ++p;
4668  }
4669  if (p != pEnd && *p == ']') {
4670  rBegin = p + 1;
4671  return true;
4672  }
4673  }
4674  return false;
4675 }
4676 
4678  const
4679 {
4680  if (!checkHierarchical())
4681  return OUString();
4682  INetURLObject aTemp(*this);
4683  aTemp.clearFragment();
4684  aTemp.clearQuery();
4685  aTemp.removeSegment(LAST_SEGMENT, false);
4686  aTemp.setFinalSlash();
4687  return aTemp.GetMainURL(DecodeMechanism::ToIUri);
4688 }
4689 
4691  rtl_TextEncoding eCharset) const
4692 {
4693  return getName(LAST_SEGMENT, true, eMechanism, eCharset);
4694 }
4695 
4697 {
4698  return getExtension(LAST_SEGMENT, false);
4699 }
4700 
4702 {
4703  INetURLObject aTemp(*this);
4704  aTemp.clearFragment();
4705  aTemp.clearQuery();
4706  if (!aTemp.removeSegment(LAST_SEGMENT, false))
4707  return;
4708  *this = aTemp;
4709 }
4710 
4712 {
4713  if (m_eScheme != INetProtocol::File)
4714  return OUString();
4715  OUString aSystemPath;
4716  if (osl::FileBase::getSystemPathFromFileURL(
4717  decode(m_aAbsURIRef.getStr(),
4718  m_aAbsURIRef.getStr() + m_aPath.getEnd(),
4719  DecodeMechanism::NONE, RTL_TEXTENCODING_UTF8),
4720  aSystemPath)
4721  != osl::FileBase::E_None)
4722  return OUString();
4723  return aSystemPath;
4724 }
4725 
4726 OUString INetURLObject::GetFull() const
4727 {
4728  INetURLObject aTemp(*this);
4729  aTemp.removeFinalSlash();
4730  return aTemp.PathToFileName();
4731 }
4732 
4733 OUString INetURLObject::GetPath() const
4734 {
4735  INetURLObject aTemp(*this);
4736  aTemp.removeSegment();
4737  aTemp.removeFinalSlash();
4738  return aTemp.PathToFileName();
4739 }
4740 
4741 void INetURLObject::SetBase(OUString const & rTheBase)
4742 {
4743  setBase(rTheBase, LAST_SEGMENT, EncodeMechanism::All);
4744 }
4745 
4746 OUString INetURLObject::GetBase() const
4747 {
4748  return getBase(LAST_SEGMENT, true, DecodeMechanism::WithCharset);
4749 }
4750 
4751 void INetURLObject::SetExtension(OUString const & rTheExtension)
4752 {
4753  setExtension(rTheExtension, LAST_SEGMENT, false);
4754 }
4755 
4757 {
4758  OUString aTheExtension(getExtension(LAST_SEGMENT, false));
4759  return removeExtension(LAST_SEGMENT, false)
4760  ? aTheExtension : OUString();
4761 }
4762 
4763 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
bool setHost(OUString const &rTheHost, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2825
void SetBase(OUString const &rTheBase)
Definition: urlobj.cxx:4741
const sal_Int32 m_nLength
DecodeMechanism
The way strings that represent (parts of) URIs are returned from get- methods.
Definition: urlobj.hxx:223
bool convertAbsToRel(OUString const &rTheAbsURIRef, OUString &rTheRelURIRef, EncodeMechanism eEncodeMechanism, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset, FSysStyle eStyle) const
Definition: urlobj.cxx:1871
SubString m_aAuth
Definition: urlobj.hxx:968
EncodeMechanism
The way input strings that represent (parts of) URIs are interpreted in set-methods.
Definition: urlobj.hxx:194
#define PI
void operator+=(sal_Int32 nDelta)
Definition: urlobj.cxx:262
TOOLS_DLLPRIVATE SubString getAuthority() const
Definition: urlobj.cxx:2218
bool isPresent() const
Definition: urlobj.hxx:936
bool setFragment(OUString const &rTheMark, EncodeMechanism eMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:3333
TOOLS_DLLPRIVATE sal_Int32 getAuthorityBegin() const
Definition: urlobj.cxx:2201
void CutLastName()
Definition: urlobj.cxx:4701
bool convertRelToAbs(OUString const &rTheRelURIRef, INetURLObject &rTheAbsURIRef, bool &rWasAbsolute, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bIgnoreFragment, bool bSmart, bool bRelativeNonURIs, FSysStyle eStyle) const
Definition: urlobj.cxx:1473
char const * m_pScheme
Definition: urlobj.cxx:292
OUString GetURLPath(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:457
OUString getBase(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the base of the name of a segment.
Definition: urlobj.cxx:4054
SubString m_aUser
Definition: urlobj.hxx:967
static bool isVisible(sal_uInt32 nChar)
Check for US-ASCII visible character.
Definition: inetmime.hxx:207
sal_Int64 n
SubString m_aPort
Definition: urlobj.hxx:970
bool removeExtension(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true)
Remove the extension of the name of a segment.
Definition: urlobj.cxx:4177
bool operator==(INetURLObject const &rObject) const
Definition: urlobj.cxx:3637
bool ConcatData(INetProtocol eTheScheme, OUString const &rTheUser, OUString const &rThePassword, OUString const &rTheHost, sal_uInt32 nThePort, OUString const &rThePath)
Definition: urlobj.cxx:3690
bool hasFinalSlash() const
Determine whether the hierarchical path ends in a final slash.
Definition: urlobj.cxx:4207
static bool convertIntToExt(OUString const &rTheIntURIRef, OUString &rTheExtURIRef, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2031
bool isAnyKnownWebDAVScheme() const
Check if the scheme is one of the WebDAV scheme we know about.
Definition: urlobj.cxx:3864
TOOLS_DLLPRIVATE bool checkHierarchical() const
Definition: urlobj.cxx:3139
OUString GetLastName(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the last segment in the path.
Definition: urlobj.cxx:4690
static bool convertExtToInt(OUString const &rTheExtURIRef, OUString &rTheIntURIRef, DecodeMechanism eDecodeMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2054
static TOOLS_DLLPRIVATE bool parseHost(sal_Unicode const *&rBegin, sal_Unicode const *pEnd, OUString &rCanonic)
Definition: urlobj.cxx:2330
static int getWeight(sal_uInt32 nChar)
Get the digit weight of a US-ASCII character.
Definition: inetmime.hxx:213
bool HasError() const
Definition: urlobj.hxx:260
char const * m_pTranslatedPrefix
Definition: urlobj.cxx:309
INetProtocol m_eScheme
Definition: urlobj.cxx:310
OUString CutExtension()
Definition: urlobj.cxx:4756
bool HasParam() const
Definition: urlobj.hxx:733
OUString PathToFileName() const
Definition: urlobj.cxx:4711
sal_Int32 getSegmentCount(bool bIgnoreFinalSlash=true) const
The number of segments in the hierarchical path.
Definition: urlobj.cxx:3945
OUString GetBase() const
Definition: urlobj.cxx:4746
int compare(SubString const &rOther, OUStringBuffer const &rThisString, OUStringBuffer const &rOtherString) const
Definition: urlobj.cxx:268
HashMap_OWString_Interface aMap
sal_uInt16 sal_Unicode
bool setName(OUString const &rTheName, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Set the name of the last segment (preserving any parameters and any query or fragment part)...
Definition: urlobj.cxx:4006
bool setFinalSlash()
Make the hierarchical path end in a final slash (if it does not already do so).
Definition: urlobj.cxx:4218
TOOLS_DLLPRIVATE SchemeInfo const & getSchemeInfo() const
Definition: urlobj.cxx:407
SubString m_aQuery
Definition: urlobj.hxx:972
bool setExtension(OUString const &rTheExtension, sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Set the extension of the name of a segment (replacing an already existing extension).
Definition: urlobj.cxx:4141
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
OUString getAbbreviated(css::uno::Reference< css::util::XStringWidth > const &rStringWidth, sal_Int32 nWidth, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3454
static OUString GetAbsURL(OUString const &rTheBaseURIRef, OUString const &rTheRelURIRef, EncodeMechanism eEncodeMechanism=EncodeMechanism::WasEncoded, DecodeMechanism eDecodeMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
If rTheRelURIRef cannot be converted to an absolute URL (because of syntactic reasons), either rTheRelURIRef or an empty string is returned: If all of the parameters eEncodeMechanism, eDecodeMechanism and eCharset have their respective default values, then rTheRelURIRef is returned unmodified; otherwise, an empty string is returned.
Definition: urlobj.cxx:3823
OUString GetUser(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:416
VOS notation (e.g., "//server/dir/file").
sal_uInt32 GetPort() const
Definition: urlobj.cxx:3910
void SetExtension(OUString const &rTheExtension)
Definition: urlobj.cxx:4751
bool setUser(OUString const &rTheUser, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2229
bool insertName(OUString const &rTheName, bool bAppendFinalSlash=false, sal_Int32 nIndex=LAST_SEGMENT, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Insert a new segment into the hierarchical path.
Definition: urlobj.cxx:3202
INetProtocol
Definition: urlobj.hxx:50
static sal_uInt32 getUTF32(sal_Unicode const *&rBegin, sal_Unicode const *pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, EscapeType &rEscapeType)
Definition: urlobj.cxx:4465
static TOOLS_DLLPRIVATE bool parseHostOrNetBiosName(sal_Unicode const *pBegin, sal_Unicode const *pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bNetBiosName, OUStringBuffer *pCanonic)
Definition: urlobj.cxx:2764
static bool isIMAPAtomChar(sal_uInt32 nChar)
Check whether some character is valid within an RFC 2060 .
Definition: inetmime.cxx:965
TOOLS_DLLPRIVATE SubString getSegment(sal_Int32 nIndex, bool bIgnoreFinalSlash) const
Definition: urlobj.cxx:3156
OUString getFSysPath(FSysStyle eStyle, sal_Unicode *pDelimiter=nullptr) const
Return the file system path represented by a file URL (ignoring any fragment part).
Definition: urlobj.cxx:4253
char const * m_pPrefix
Definition: urlobj.cxx:293
#define DBG_ASSERT(sCon, aError)
Definition: debug.hxx:57
int i
bool Append(OUString const &rTheSegment, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Definition: urlobj.cxx:3149
OUString GetURLNoMark(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3445
DOS notation (e.g., "a:\dir\file" and "\\server\dir\file").
bool isEmpty() const
Definition: urlobj.hxx:938
Detect the used notation.
OUStringBuffer m_aAbsURIRef
Definition: urlobj.hxx:965
OUString getName(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the name of a segment of the hierarchical path.
Definition: urlobj.cxx:3985
sal_Int32 getBegin() const
Definition: urlobj.hxx:940
static OUString encodeText(sal_Unicode const *pBegin, sal_Unicode const *pEnd, Part ePart, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes)
Definition: urlobj.cxx:3363
static TOOLS_DLLPRIVATE void appendEscape(OUStringBuffer &rTheText, sal_uInt32 nOctet)
Definition: urlobj.cxx:426
float u
TOOLS_DLLPRIVATE bool hasDosVolume(FSysStyle eStyle) const
Definition: urlobj.cxx:3351
bool hasExtension() const
Determine whether the name of the last segment has an extension.
Definition: urlobj.cxx:4035
static OUString GetScheme(INetProtocol eTheScheme)
Return the URL 'prefix' for a given scheme.
Definition: urlobj.cxx:3874
bool clearFragment()
Definition: urlobj.cxx:3321
TOOLS_DLLPRIVATE void setInvalid()
Definition: urlobj.cxx:563
sal_Int32 getLength() const
Definition: urlobj.hxx:942
bool removeFinalSlash()
Remove a final slash from the hierarchical path.
Definition: urlobj.cxx:4234
SubString m_aFragment
Definition: urlobj.hxx:973
SubString m_aHost
Definition: urlobj.hxx:969
sal_Int32 getEnd() const
Definition: urlobj.hxx:944
enumrange< T >::Iterator end(enumrange< T >)
allow read accesses
bool setPassword(OUString const &rThePassword, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:2289
bool setBase(OUString const &rTheBase, sal_Int32 nIndex=LAST_SEGMENT, EncodeMechanism eMechanism=EncodeMechanism::WasEncoded, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Set the base of the name of a segment (preserving the extension).
Definition: urlobj.cxx:4079
Unix notation (e.g., "/dir/file").
OUString GetParam(rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:735
OUString GetMainURL(DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:262
static bool equalIgnoreCase(const sal_Unicode *pBegin1, const sal_Unicode *pEnd1, const char *pString2)
Check two US-ASCII strings for equality, ignoring case.
Definition: inetmime.cxx:989
static sal_uInt32 getUTF32Character(const sal_Unicode *&rBegin, const sal_Unicode *pEnd)
Get the UTF-32 character at the head of a UTF-16 encoded string.
Definition: inetmime.hxx:227
SubString m_aPath
Definition: urlobj.hxx:971
static sal_Unicode const * scanContentType(OUString const &rStr, OUString *pType=nullptr, OUString *pSubType=nullptr, INetContentTypeParameterList *pParameters=nullptr)
Parse the body of an RFC 2045 Content-Type header field.
Definition: inetmime.cxx:1029
std::unique_ptr< char[]> aBuffer
FSysStyle
The supported notations for file system paths.
Definition: urlobj.hxx:86
bool SetPort(sal_uInt32 nThePort)
Definition: urlobj.cxx:3923
OUString GetPath() const
Definition: urlobj.cxx:4733
#define SAL_WARN_IF(condition, area, stream)
OUString GetPartBeforeLastName() const
Definition: urlobj.cxx:4677
static TOOLS_DLLPRIVATE bool parsePath(INetProtocol eScheme, sal_Unicode const **pBegin, sal_Unicode const *pEnd, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSkippedInitialSlash, sal_uInt32 nSegmentDelimiter, sal_uInt32 nAltSegmentDelimiter, sal_uInt32 nQueryDelimiter, sal_uInt32 nFragmentDelimiter, OUStringBuffer &rSynPath)
Definition: urlobj.cxx:2867
State
void changeScheme(INetProtocol eTargetScheme)
Definition: urlobj.cxx:1451
std::map< OUString, rtl::Reference< Entity > > map
OUString GetHost(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:438
OUString GetFileExtension() const
Get the 'extension' of the last segment in the path.
Definition: urlobj.cxx:4696
OUString GetHostPort(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3894
void * p
static void appendUCS4Escape(OUStringBuffer &rTheText, sal_uInt32 nUCS4)
Definition: urlobj.cxx:4357
OUString getExternalURL() const
Definition: urlobj.cxx:3847
OUString GetURLNoPass(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.cxx:3437
static sal_uInt32 scanDomain(sal_Unicode const *&rBegin, sal_Unicode const *pEnd, bool bEager=true)
Definition: urlobj.cxx:4597
INetProtocol m_eScheme
Definition: urlobj.hxx:974
SubString m_aScheme
Definition: urlobj.hxx:966
void clearQuery()
Definition: urlobj.cxx:3288
bool clearPassword()
Definition: urlobj.cxx:2271
bool setAbsURIRef(OUString const &rTheAbsURIRef, EncodeMechanism eMechanism, rtl_TextEncoding eCharset, bool bSmart, FSysStyle eStyle)
Definition: urlobj.cxx:691
sal_Int32 nLength
static void appendUCS4(OUStringBuffer &rTheText, sal_uInt32 nUCS4, EscapeType eEscapeType, Part ePart, rtl_TextEncoding eCharset, bool bKeepVisibleEscapes)
Definition: urlobj.cxx:4402
static TOOLS_DLLPRIVATE bool scanIPv6reference(sal_Unicode const *&rBegin, sal_Unicode const *pEnd)
Definition: urlobj.cxx:4659
sal_Int32 set(OUStringBuffer &rString, OUString const &rSubString, sal_Int32 nTheBegin)
Definition: urlobj.cxx:254
static sal_Int32 decodeSomeChars(css::uno::Sequence< sal_Int8 > &aPass, const OUString &sBuffer)
OUString GetPass(DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Definition: urlobj.hxx:421
static TOOLS_DLLPRIVATE PrefixInfo const * getPrefix(sal_Unicode const *&rBegin, sal_Unicode const *pEnd)
Definition: urlobj.cxx:2077
static int getHexWeight(sal_uInt32 nChar)
Get the hexadecimal digit weight of a US-ASCII character.
Definition: inetmime.hxx:219
std::unique_ptr< SvMemoryStream > getData() const
Definition: urlobj.cxx:594
static OUString GetSchemeName(INetProtocol eTheScheme)
Return the human-readable name for a given scheme.
Definition: urlobj.cxx:3880
OUString GetFull() const
Definition: urlobj.cxx:4726
static bool scanUnsigned(const sal_Unicode *&rBegin, const sal_Unicode *pEnd, bool bLeadingZeroes, sal_uInt32 &rValue)
Definition: inetmime.cxx:1006
bool setQuery(OUString const &rTheQuery, EncodeMechanism eMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:3300
static INetProtocol CompareProtocolScheme(OUString const &rTheAbsURIRef)
Definition: urlobj.cxx:3886
void set(css::uno::UnoInterfaceReference const &value)
OUString getExtension(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true, DecodeMechanism eMechanism=DecodeMechanism::ToIUri, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8) const
Get the extension of the name of a segment.
Definition: urlobj.cxx:4114
bool removeSegment(sal_Int32 nIndex=LAST_SEGMENT, bool bIgnoreFinalSlash=true)
Remove a segment from the hierarchical path.
Definition: urlobj.cxx:3961
bool isSchemeEqualTo(INetProtocol scheme) const
Definition: urlobj.hxx:382
bool setPath(OUString const &rThePath, EncodeMechanism eMechanism, rtl_TextEncoding eCharset)
Definition: urlobj.cxx:3122
char const * m_pPrefix
Definition: urlobj.cxx:308
static OUString decode(OUString const &rText, DecodeMechanism eMechanism, rtl_TextEncoding eCharset=RTL_TEXTENCODING_UTF8)
Decode some text.
Definition: urlobj.hxx:1301