LibreOffice Module o3tl (master) 1
string_view.hxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#pragma once
11
12#include <sal/config.h>
13
14#include <cassert>
15#include <cstddef>
16#include <string>
17#include <string_view>
18
19#include <o3tl/intcmp.hxx>
20#include <rtl/character.hxx>
21#include <rtl/ustring.h>
22#include <rtl/math.h>
23#include <sal/types.h>
24
25namespace o3tl
26{
27// Like OUString::equalsAscii/OUString::equalsAsciiL, but for std::u16string_view:
28inline bool equalsAscii(std::u16string_view s1, std::string_view s2)
29{
30 return s1.size() == s2.size()
31 && rtl_ustr_ascii_shortenedCompare_WithLength(s1.data(), s1.size(), s2.data(), s2.size())
32 == 0;
33}
34
35// Like OUString::compareToAscii, but for std::u16string_view and std::string_view:
36inline int compareToAscii(std::u16string_view s1, std::string_view s2)
37{
38 return rtl_ustr_asciil_reverseCompare_WithLength(s1.data(), s1.size(), s2.data(), s2.size());
39};
40
41// Like OUString::equalsIgnoreAsciiCase, but for two std::u16string_view:
42inline bool equalsIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
43{
44 if (s1.size() != s2.size())
45 return false;
46 if (s1.data() == s2.data())
47 return true;
48 return rtl_ustr_compareIgnoreAsciiCase_WithLength(s1.data(), s1.size(), s2.data(), s2.size())
49 == 0;
50};
51
52inline bool equalsIgnoreAsciiCase(std::u16string_view s1, std::string_view s2)
53{
54 return s1.size() == s2.size()
55 && (rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength(s1.data(), s1.size(),
56 s2.data(), s2.size())
57 == 0);
58}
59
60inline bool equalsIgnoreAsciiCase(std::string_view s1, std::string_view s2)
61{
62 if (s1.size() != s2.size())
63 return false;
64 if (s1.data() == s2.data())
65 return true;
66 return rtl_str_compareIgnoreAsciiCase_WithLength(s1.data(), s1.size(), s2.data(), s2.size())
67 == 0;
68};
69
70// Like OUString::compareToIgnoreAsciiCase, but for two std::u16string_view:
71inline int compareToIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
72{
73 return rtl_ustr_compareIgnoreAsciiCase_WithLength(s1.data(), s1.size(), s2.data(), s2.size());
74};
75
76// Like OUString::matchIgnoreAsciiCase, but for two std::u16string_view:
77inline bool matchIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2,
78 sal_Int32 fromIndex = 0)
79{
80 return rtl_ustr_shortenedCompareIgnoreAsciiCase_WithLength(
81 s1.data() + fromIndex, s1.size() - fromIndex, s2.data(), s2.size(), s2.size())
82 == 0;
83}
84
85// Like OUString::matchIgnoreAsciiCase, but for std::u16string_view and std::string_view:
86inline bool matchIgnoreAsciiCase(std::u16string_view s1, std::string_view s2,
87 sal_Int32 fromIndex = 0)
88{
89 return rtl_ustr_ascii_shortenedCompareIgnoreAsciiCase_WithLength(
90 s1.data() + fromIndex, s1.size() - fromIndex, s2.data(), s2.size())
91 == 0;
92}
93
94// Like OUString::endsWithIgnoreAsciiCase, but for std::u16string_view
95inline bool endsWithIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2,
96 std::u16string_view* rest = nullptr)
97{
98 auto const b = s2.size() <= s1.size() && matchIgnoreAsciiCase(s1, s2, s1.size() - s2.size());
99 if (b && rest != nullptr)
100 {
101 *rest = s1.substr(0, s1.size() - s2.size());
102 }
103 return b;
104}
105
106inline bool endsWithIgnoreAsciiCase(std::u16string_view s1, std::string_view s2,
107 std::u16string_view* rest = nullptr)
108{
109 auto const b = s2.size() <= s1.size()
110 && rtl_ustr_ascii_compareIgnoreAsciiCase_WithLengths(
111 s1.data() + s1.size() - s2.size(), s2.size(), s2.data(), s2.size())
112 == 0;
113 if (b && rest != nullptr)
114 {
115 *rest = s1.substr(0, s1.size() - s2.size());
116 }
117 return b;
118}
119
120// Similar to O[U]String::getToken, returning the first token of a std::[u16]string_view starting
121// at a given position.
122//
123// Attention: There are two sets of o3tl::getToken overloads here. This first set has an interface
124// based on std::size_t length parameters, and its semantics don't match those of
125// O[U]String::getToken exactly (buf if needed, it can be extended to return the n'th token instead
126// of just the first, and/or support an initial position of npos, to make the semantics match).
127template <typename charT, typename traits = std::char_traits<charT>>
128inline std::basic_string_view<charT, traits> getToken(std::basic_string_view<charT, traits> sv,
129 charT delimiter, std::size_t& position)
130{
131 assert(position <= sv.size());
132 auto const n = sv.find(delimiter, position);
133 std::basic_string_view<charT, traits> t;
134 if (n == std::string_view::npos)
135 {
136 t = sv.substr(position);
137 position = std::string_view::npos;
138 }
139 else
140 {
141 t = sv.substr(position, n - position);
142 position = n + 1;
143 }
144 return t;
145}
146// The following two overloads prevent overload resolution mistakes that would occur with their
147// template counterpart, when sv is of a type that is implicitly convertible to basic_string_view
148// (like OString or OUString), in which case overload resolution would erroneously choose the
149// three-argument overloads (taking sv, nToken, cTok) from the second set of
150// o3tl::getToken overloads below:
151inline std::string_view getToken(std::string_view sv, char delimiter, std::size_t& position)
152{
153 return getToken<char>(sv, delimiter, position);
154}
155inline std::u16string_view getToken(std::u16string_view sv, char16_t delimiter,
156 std::size_t& position)
157{
158 return getToken<char16_t>(sv, delimiter, position);
159}
160
161// Similar to O[U]String::getToken.
162//
163// Attention: There are two sets of o3tl::getToken overloads here. This second set has an
164// interface based on sal_Int32 length parameters, and is meant to be a drop-in replacement for
165// O[U]String::getToken.
166template <typename charT, typename traits = std::char_traits<charT>>
167inline std::basic_string_view<charT, traits> getToken(std::basic_string_view<charT, traits> pStr,
168 sal_Int32 nToken, charT cTok,
169 sal_Int32& rnIndex)
170{
171 assert(o3tl::IntCmp(rnIndex) <= o3tl::IntCmp(pStr.size()));
172
173 // Return an empty string and set rnIndex to -1 if either nToken or rnIndex is
174 // negative:
175 if (rnIndex >= 0 && nToken >= 0)
176 {
177 const charT* pOrgCharStr = pStr.data();
178 const charT* pCharStr = pOrgCharStr + rnIndex;
179 sal_Int32 nLen = pStr.size() - rnIndex;
180 sal_Int32 nTokCount = 0;
181 const charT* pCharStrStart = pCharStr;
182 while (nLen > 0)
183 {
184 if (*pCharStr == cTok)
185 {
186 nTokCount++;
187
188 if (nTokCount > nToken)
189 break;
190 if (nTokCount == nToken)
191 pCharStrStart = pCharStr + 1;
192 }
193
194 pCharStr++;
195 nLen--;
196 }
197 if (nTokCount >= nToken)
198 {
199 if (nLen > 0)
200 rnIndex = pCharStr - pOrgCharStr + 1;
201 else
202 rnIndex = -1;
203 return std::basic_string_view<charT, traits>(pCharStrStart, pCharStr - pCharStrStart);
204 }
205 }
206
207 rnIndex = -1;
208 return std::basic_string_view<charT, traits>();
209}
210// The following two overloads prevent deduction failures that would occur with their template
211// counterpart, when sv is of a type that is implicitly convertible to basic_string_view (like
212// OString or OUString):
213inline std::string_view getToken(std::string_view sv, sal_Int32 nToken, char cTok,
214 sal_Int32& rnIndex)
215{
216 return getToken<char>(sv, nToken, cTok, rnIndex);
217}
218inline std::u16string_view getToken(std::u16string_view sv, sal_Int32 nToken, char16_t cTok,
219 sal_Int32& rnIndex)
220{
221 return getToken<char16_t>(sv, nToken, cTok, rnIndex);
222}
223inline std::string_view getToken(std::string_view sv, sal_Int32 nToken, char cTok)
224{
225 sal_Int32 nIndex = 0;
226 return getToken<char>(sv, nToken, cTok, nIndex);
227}
228inline std::u16string_view getToken(std::u16string_view sv, sal_Int32 nToken, char16_t cTok)
229{
230 sal_Int32 nIndex = 0;
231 return getToken<char16_t>(sv, nToken, cTok, nIndex);
232}
233
234// Implementations of C++20 std::basic_string_view::starts_with and
235// std::basic_string_view::ends_with, until we can use those directly on all platforms:
236template <typename charT, typename traits = std::char_traits<charT>>
237constexpr bool starts_with(std::basic_string_view<charT, traits> sv,
238 std::basic_string_view<charT, traits> x) noexcept
239{
240#if defined __cpp_lib_starts_ends_with
241 return sv.starts_with(x);
242#else
243 return sv.substr(0, x.size()) == x;
244#endif
245}
246template <typename charT, typename traits = std::char_traits<charT>>
247constexpr bool starts_with(std::basic_string_view<charT, traits> sv, charT x) noexcept
248{
249#if defined __cpp_lib_starts_ends_with
250 return sv.starts_with(x);
251#else
252 return !sv.empty() && traits::eq(sv.front(), x);
253#endif
254}
255template <typename charT, typename traits = std::char_traits<charT>>
256constexpr bool starts_with(std::basic_string_view<charT, traits> sv, charT const* x)
257{
258#if defined __cpp_lib_starts_ends_with
259 return sv.starts_with(x);
260#else
261 return starts_with(sv, std::basic_string_view<charT, traits>(x));
262#endif
263}
264template <typename charT, typename traits = std::char_traits<charT>>
265constexpr bool ends_with(std::basic_string_view<charT, traits> sv,
266 std::basic_string_view<charT, traits> x) noexcept
267{
268#if defined __cpp_lib_ends_ends_with
269 return sv.ends_with(x);
270#else
271 return sv.size() >= x.size()
272 && sv.compare(sv.size() - x.size(), std::basic_string_view<charT, traits>::npos, x) == 0;
273#endif
274}
275template <typename charT, typename traits = std::char_traits<charT>>
276constexpr bool ends_with(std::basic_string_view<charT, traits> sv, charT x) noexcept
277{
278#if defined __cpp_lib_ends_ends_with
279 return sv.ends_with(x);
280#else
281 return !sv.empty() && traits::eq(sv.back(), x);
282#endif
283}
284template <typename charT, typename traits = std::char_traits<charT>>
285constexpr bool ends_with(std::basic_string_view<charT, traits> sv, charT const* x)
286{
287#if defined __cpp_lib_ends_ends_with
288 return sv.ends_with(x);
289#else
290 return ends_with(sv, std::basic_string_view<charT, traits>(x));
291#endif
292}
293// The following overloads prevent deduction failures that would occur with their template
294// counterparts, when x is of a type that is implicitly convertible to basic_string_view (like
295// OString or OUString, and we only bother to provide overloads for the char and char16_t cases, not
296// also for char32_t and wchar_t, nor for C++20 char8_t):
297constexpr bool starts_with(std::string_view sv, std::string_view x) noexcept
298{
299 return starts_with<char>(sv, x);
300}
301constexpr bool starts_with(std::u16string_view sv, std::u16string_view x) noexcept
302{
303 return starts_with<char16_t>(sv, x);
304}
305constexpr bool ends_with(std::string_view sv, std::string_view x) noexcept
306{
307 return ends_with<char>(sv, x);
308}
309constexpr bool ends_with(std::u16string_view sv, std::u16string_view x) noexcept
310{
311 return ends_with<char16_t>(sv, x);
312}
313
314// Variants of C++20 std::basic_string_view::starts_with and
315// std::basic_string_view::ends_with that have a rest out parameter, similar to our OString and
316// OUString startsWith and endsWith member functions:
317template <typename charT, typename traits = std::char_traits<charT>>
318constexpr bool starts_with(std::basic_string_view<charT, traits> sv,
319 std::basic_string_view<charT, traits> x,
320 std::basic_string_view<charT, traits>* rest) noexcept
321{
322 assert(rest != nullptr);
323 auto const found = starts_with(sv, x);
324 if (found)
325 {
326 *rest = sv.substr(x.length());
327 }
328 return found;
329}
330template <typename charT, typename traits = std::char_traits<charT>>
331constexpr bool starts_with(std::basic_string_view<charT, traits> sv, charT x,
332 std::basic_string_view<charT, traits>* rest) noexcept
333{
334 assert(rest != nullptr);
335 auto const found = starts_with(sv, x);
336 if (found)
337 {
338 *rest = sv.substr(1);
339 }
340 return found;
341}
342template <typename charT, typename traits = std::char_traits<charT>>
343constexpr bool starts_with(std::basic_string_view<charT, traits> sv, charT const* x,
344 std::basic_string_view<charT, traits>* rest)
345{
346 assert(rest != nullptr);
347 auto const found = starts_with(sv, x);
348 if (found)
349 {
350 *rest = sv.substr(traits::length(x));
351 }
352 return found;
353}
354template <typename charT, typename traits = std::char_traits<charT>>
355constexpr bool ends_with(std::basic_string_view<charT, traits> sv,
356 std::basic_string_view<charT, traits> x,
357 std::basic_string_view<charT, traits>* rest) noexcept
358{
359 assert(rest != nullptr);
360 auto const found = ends_with(sv, x);
361 if (found)
362 {
363 *rest = sv.substr(0, sv.length() - x.length());
364 }
365 return found;
366}
367template <typename charT, typename traits = std::char_traits<charT>>
368constexpr bool ends_with(std::basic_string_view<charT, traits> sv, charT x,
369 std::basic_string_view<charT, traits>* rest) noexcept
370{
371 assert(rest != nullptr);
372 auto const found = ends_with(sv, x);
373 if (found)
374 {
375 *rest = sv.substr(0, sv.length() - 1);
376 }
377 return found;
378}
379template <typename charT, typename traits = std::char_traits<charT>>
380constexpr bool ends_with(std::basic_string_view<charT, traits> sv, charT const* x,
381 std::basic_string_view<charT, traits>* rest)
382{
383 assert(rest != nullptr);
384 auto const found = ends_with(sv, x);
385 if (found)
386 {
387 *rest = sv.substr(0, sv.length() - traits::length(x));
388 }
389 return found;
390}
391// The following overloads prevent deduction failures that would occur with their template
392// counterparts, when x is of a type that is implicitly convertible to basic_string_view (like
393// OString or OUString, and we only bother to provide overloads for the char and char16_t cases, not
394// also for char32_t and wchar_t, nor for C++20 char8_t):
395constexpr bool starts_with(std::string_view sv, std::string_view x, std::string_view* rest) noexcept
396{
397 return starts_with<char>(sv, x, rest);
398}
399constexpr bool starts_with(std::u16string_view sv, std::u16string_view x,
400 std::u16string_view* rest) noexcept
401{
402 return starts_with<char16_t>(sv, x, rest);
403}
404constexpr bool ends_with(std::string_view sv, std::string_view x, std::string_view* rest) noexcept
405{
406 return ends_with<char>(sv, x, rest);
407}
408constexpr bool ends_with(std::u16string_view sv, std::u16string_view x,
409 std::u16string_view* rest) noexcept
410{
411 return ends_with<char16_t>(sv, x, rest);
412}
413
414namespace internal
415{
417{
418 /* Space or Control character? */
419 if ((c <= 32) && c)
420 return true;
421
422 /* Only in the General Punctuation area Space or Control characters are included? */
423 if ((c < 0x2000) || (c > 0x2029))
424 return false;
425
426 if ((c <= 0x200B) || /* U+2000 - U+200B All Spaces */
427 (c >= 0x2028)) /* U+2028 LINE SEPARATOR, U+2029 PARAGRAPH SEPARATOR */
428 return true;
429
430 return false;
431}
432} // namespace internal
433
434// Like OUString::trim, but for std::[u16]string_view:
435template <typename charT, typename traits = std::char_traits<charT>>
436std::basic_string_view<charT, traits> trim(std::basic_string_view<charT, traits> str)
437{
438 auto pFirst = str.data();
439 auto pLast = pFirst + str.size();
440
441 while ((pFirst < pLast) && internal::implIsWhitespace(*pFirst))
442 ++pFirst;
443
444 if (pFirst == pLast)
445 return {};
446
447 do
448 --pLast;
449 while (internal::implIsWhitespace(*pLast));
450
451 return std::basic_string_view<charT, traits>(pFirst, pLast - pFirst + 1);
452}
453
454// "deduction guides"
455
456inline auto trim(std::string_view str) { return trim<>(str); }
457inline auto trim(std::u16string_view str) { return trim<>(str); }
458
459// Like OString::toInt32, but for std::string_view:
460inline sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix = 10)
461{
462 sal_Int64 n = rtl_ustr_toInt64_WithLength(str.data(), radix, str.size());
463 if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32)
464 n = 0;
465 return n;
466}
467inline sal_Int32 toInt32(std::string_view str, sal_Int16 radix = 10)
468{
469 sal_Int64 n = rtl_str_toInt64_WithLength(str.data(), radix, str.size());
470 if (n < SAL_MIN_INT32 || n > SAL_MAX_INT32)
471 n = 0;
472 return n;
473}
474
475// Like OString::toUInt32, but for std::string_view:
476inline sal_uInt32 toUInt32(std::u16string_view str, sal_Int16 radix = 10)
477{
478 sal_Int64 n = rtl_ustr_toInt64_WithLength(str.data(), radix, str.size());
479 if (n < 0 || n > SAL_MAX_UINT32)
480 n = 0;
481 return n;
482}
483inline sal_uInt32 toUInt32(std::string_view str, sal_Int16 radix = 10)
484{
485 sal_Int64 n = rtl_str_toInt64_WithLength(str.data(), radix, str.size());
486 if (n < 0 || n > SAL_MAX_UINT32)
487 n = 0;
488 return n;
489}
490
491// Like OString::toInt64, but for std::string_view:
492inline sal_Int64 toInt64(std::u16string_view str, sal_Int16 radix = 10)
493{
494 return rtl_ustr_toInt64_WithLength(str.data(), radix, str.size());
495}
496inline sal_Int64 toInt64(std::string_view str, sal_Int16 radix = 10)
497{
498 return rtl_str_toInt64_WithLength(str.data(), radix, str.size());
499}
500
501// Like OString::toDouble, but for std::string_view:
502inline double toDouble(std::u16string_view str)
503{
504 return rtl_math_uStringToDouble(str.data(), str.data() + str.size(), '.', 0, nullptr, nullptr);
505}
506inline double toDouble(std::string_view str)
507{
508 return rtl_math_stringToDouble(str.data(), str.data() + str.size(), '.', 0, nullptr, nullptr);
509}
510
511// Like OUString::iterateCodePoints, but for std::string_view:
512inline sal_uInt32 iterateCodePoints(std::u16string_view string, sal_Int32* indexUtf16,
513 sal_Int32 incrementCodePoints = 1)
514{
515 std::size_t n;
516 char16_t cu;
517 sal_uInt32 cp;
518 assert(indexUtf16 != nullptr);
519 n = *indexUtf16;
520 assert(n <= string.length());
521 while (incrementCodePoints < 0)
522 {
523 assert(n > 0);
524 cu = string[--n];
525 if (rtl::isLowSurrogate(cu) && n != 0 && rtl::isHighSurrogate(string[n - 1]))
526 {
527 --n;
528 }
529 ++incrementCodePoints;
530 }
531 assert(n < string.length());
532 cu = string[n];
533 if (rtl::isHighSurrogate(cu) && string.length() - n >= 2 && rtl::isLowSurrogate(string[n + 1]))
534 {
535 cp = rtl::combineSurrogates(cu, string[n + 1]);
536 }
537 else
538 {
539 cp = cu;
540 }
541 while (incrementCodePoints > 0)
542 {
543 assert(n < string.length());
544 cu = string[n++];
545 if (rtl::isHighSurrogate(cu) && n != string.length() && rtl::isLowSurrogate(string[n]))
546 {
547 ++n;
548 }
549 --incrementCodePoints;
550 }
551 assert(n <= string.length());
552 *indexUtf16 = n;
553 return cp;
554}
555
556} // namespace
557
558/* vim:set shiftwidth=4 softtabstop=4 expandtab cinoptions=b1,g0,N-s cinkeys+=0=break: */
XPropertyListType t
float x
sal_Int32 nIndex
sal_Int64 n
def position(n=-1)
bool implIsWhitespace(sal_Unicode c)
std::basic_string_view< charT, traits > trim(std::basic_string_view< charT, traits > str)
bool equalsIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
Definition: string_view.hxx:42
bool endsWithIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2, std::u16string_view *rest=nullptr)
Definition: string_view.hxx:95
constexpr bool ends_with(std::basic_string_view< charT, traits > sv, std::basic_string_view< charT, traits > x) noexcept
sal_Int32 toInt32(std::u16string_view str, sal_Int16 radix=10)
sal_Int64 toInt64(std::u16string_view str, sal_Int16 radix=10)
constexpr bool starts_with(std::basic_string_view< charT, traits > sv, std::basic_string_view< charT, traits > x) noexcept
std::basic_string_view< charT, traits > getToken(std::basic_string_view< charT, traits > sv, charT delimiter, std::size_t &position)
double toDouble(std::u16string_view str)
sal_uInt32 toUInt32(std::u16string_view str, sal_Int16 radix=10)
bool matchIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2, sal_Int32 fromIndex=0)
Definition: string_view.hxx:77
int compareToIgnoreAsciiCase(std::u16string_view s1, std::u16string_view s2)
Definition: string_view.hxx:71
sal_uInt32 iterateCodePoints(std::u16string_view string, sal_Int32 *indexUtf16, sal_Int32 incrementCodePoints=1)
int compareToAscii(std::u16string_view s1, std::string_view s2)
Definition: string_view.hxx:36
bool equalsAscii(std::u16string_view s1, std::string_view s2)
Definition: string_view.hxx:28
DefTokenId nToken
#define SAL_MAX_INT32
sal_uInt16 sal_Unicode
#define SAL_MAX_UINT32