LibreOffice Module svl (master) 1
sharedstringpool.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
11#include <svl/sharedstring.hxx>
13
14#include <mutex>
15#include <unordered_map>
16#include <unordered_set>
17
19namespace
20{
21struct StringWithHash
22{
23 OUString str;
24 sal_Int32 hashCode;
25 StringWithHash(OUString s)
26 : str(s)
27 , hashCode(s.hashCode())
28 {
29 }
30
31 bool operator==(StringWithHash const& rhs) const
32 {
33 if (hashCode != rhs.hashCode)
34 return false;
35 return str == rhs.str;
36 }
37};
38}
39
40namespace std
41{
42template <> struct hash<StringWithHash>
43{
44 std::size_t operator()(const StringWithHash& k) const { return k.hashCode; }
45};
46}
47
48namespace svl
49{
50namespace
51{
52sal_Int32 getRefCount(const rtl_uString* p) { return (p->refCount & 0x3FFFFFFF); }
53}
54
56{
57 mutable std::mutex maMutex;
58 // We use this map for two purposes - to store lower->upper case mappings
59 // and to retrieve a shared uppercase object, so the management logic
60 // is quite complex.
61 std::unordered_map<StringWithHash, OUString> maStrMap;
63
64 explicit Impl(const CharClass& rCharClass)
65 : mrCharClass(rCharClass)
66 {
67 }
68};
69
71 : mpImpl(new Impl(rCharClass))
72{
73 // make sure the one empty string instance is shared in this pool as well
76}
77
79
81{
82 StringWithHash aStrWithHash(rStr);
83 std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
84
85 auto[mapIt, bInserted] = mpImpl->maStrMap.emplace(aStrWithHash, rStr);
86 if (!bInserted)
87 // there is already a mapping
88 return SharedString(mapIt->first.str.pData, mapIt->second.pData);
89
90 // This is a new string insertion. Establish mapping to upper-case variant.
91 OUString aUpper = mpImpl->mrCharClass.uppercase(rStr);
92 if (aUpper == rStr)
93 // no need to do anything more, because we inserted an upper->upper mapping
94 return SharedString(mapIt->first.str.pData, mapIt->second.pData);
95
96 // We need to insert a lower->upper mapping, so also insert
97 // an upper->upper mapping, which we can use both for when an upper string
98 // is interned, and to look up a shared upper string.
99 StringWithHash aUpperWithHash(aUpper);
100 auto mapIt2 = mpImpl->maStrMap.find(aUpperWithHash);
101 if (mapIt2 != mpImpl->maStrMap.end())
102 {
103 // there is an already existing upper string
104 mapIt->second = mapIt2->first.str;
105 return SharedString(mapIt->first.str.pData, mapIt->second.pData);
106 }
107
108 // There is no already existing upper string.
109 // First, update using the iterator, can't do this later because
110 // the iterator will be invalid.
111 mapIt->second = aUpper;
112 mpImpl->maStrMap.emplace_hint(mapIt2, aUpperWithHash, aUpper);
113 return SharedString(rStr.pData, aUpper.pData);
114}
115
117{
118 std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
119
120 // Because we can have an uppercase entry mapped to itself,
121 // and then a bunch of lowercase entries mapped to that same
122 // upper-case entry, we need to scan the map twice - the first
123 // time to remove lowercase entries, and then only can we
124 // check for unused uppercase entries.
125
126 auto it = mpImpl->maStrMap.begin();
127 auto itEnd = mpImpl->maStrMap.end();
128 while (it != itEnd)
129 {
130 rtl_uString* p1 = it->first.str.pData;
131 rtl_uString* p2 = it->second.pData;
132 if (p1 != p2)
133 {
134 // normal case - lowercase mapped to uppercase, which
135 // means that the lowercase entry has one ref-counted
136 // entry as the key in the map
137 if (getRefCount(p1) == 1)
138 {
139 it = mpImpl->maStrMap.erase(it);
140 continue;
141 }
142 }
143 ++it;
144 }
145
146 it = mpImpl->maStrMap.begin();
147 itEnd = mpImpl->maStrMap.end();
148 while (it != itEnd)
149 {
150 rtl_uString* p1 = it->first.str.pData;
151 rtl_uString* p2 = it->second.pData;
152 if (p1 == p2)
153 {
154 // uppercase which is mapped to itself, which means
155 // one ref-counted entry as the key in the map, and
156 // one ref-counted entry in the value in the map
157 if (getRefCount(p1) == 2)
158 {
159 it = mpImpl->maStrMap.erase(it);
160 continue;
161 }
162 }
163 ++it;
164 }
165}
166
168{
169 std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
170 return mpImpl->maStrMap.size();
171}
172
174{
175 std::scoped_lock<std::mutex> aGuard(mpImpl->maMutex);
176 // this is only called from unit tests, so no need to be efficient
177 std::unordered_set<OUString> aUpperSet;
178 for (auto const& pair : mpImpl->maStrMap)
179 aUpperSet.insert(pair.second);
180 return aUpperSet.size();
181}
182}
183
184/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
void purge()
Go through all string objects in the pool, and clear those that are no longer used outside of the poo...
size_t getCountIgnoreCase() const
SharedString intern(const OUString &rStr)
Intern a string object into the shared string pool.
SharedStringPool(const SharedStringPool &)=delete
std::unique_ptr< Impl > mpImpl
static const OUString EMPTY_STRING
static const SharedString & getEmptyString()
void * p
std::size_t operator()(const StringWithHash &k) const
std::unordered_map< StringWithHash, OUString > maStrMap
Impl(const CharClass &rCharClass)
constexpr bool operator==(TypedWhichId< T > const &lhs, TypedWhichId< T > rhs)
Definition: typedwhich.hxx:43