LibreOffice Module package (master) 1
ThreadedDeflater.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 *
9 * This file incorporates work covered by the following license notice:
10 *
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 */
19
20#include <ThreadedDeflater.hxx>
21#include <zlib.h>
22#include <com/sun/star/packages/zip/ZipConstants.hpp>
23#include <sal/log.hxx>
24
25using namespace com::sun::star::packages::zip::ZipConstants;
26using namespace com::sun::star;
27
28namespace ZipUtils
29{
30const sal_Int64 MaxBlockSize = 128 * 1024;
31
32// Parallel ZLIB compression using threads. The class internally splits the data into
33// blocks and spawns ThreadPool tasks to process them independently. This is achieved
34// in a similar way how pigz works, see comments from Mark Adler at
35// https://stackoverflow.com/questions/30294766/how-to-use-multiple-threads-for-zlib-compression
36// and
37// https://stackoverflow.com/questions/30794053/how-to-use-multiple-threads-for-zlib-compression-same-input-source
38
39// Everything here should be either read-only, or writing to distinct data, or atomic.
40
42{
43 z_stream stream;
47 bool firstTask : 1;
48 bool lastTask : 1;
49
50public:
51 Task(ThreadedDeflater* deflater_, int sequence_, int blockSize_, bool firstTask_,
52 bool lastTask_)
54 , stream()
55 , deflater(deflater_)
56 , sequence(sequence_)
57 , blockSize(blockSize_)
58 , firstTask(firstTask_)
59 , lastTask(lastTask_)
60 {
61 }
62
63private:
64 virtual void doWork() override;
65};
66
68 : threadTaskTag(comphelper::ThreadPool::createThreadTaskTag())
69 , totalIn(0)
70 , totalOut(0)
71 , zlibLevel(nSetLevel)
72{
73}
74
75ThreadedDeflater::~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE { clear(); }
76
78 const css::uno::Reference<css::io::XInputStream>& xInStream,
79 std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)> aProcessInputFunc,
80 std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)> aProcessOutputFunc)
81{
83 sal_Int64 batchSize = MaxBlockSize * nThreadCount;
84 inBuffer.realloc(batchSize);
86 outBuffers.resize(nThreadCount);
87 maProcessOutputFunc = aProcessOutputFunc;
88 bool firstTask = true;
89
90 while (xInStream->available() > 0)
91 {
92 sal_Int64 inputBytes = xInStream->readBytes(inBuffer, batchSize);
93 aProcessInputFunc(inBuffer, inputBytes);
94 totalIn += inputBytes;
95 int sequence = 0;
96 bool lastBatch = xInStream->available() <= 0;
97 sal_Int64 bytesPending = inputBytes;
98 while (bytesPending > 0)
99 {
100 sal_Int64 taskSize = std::min(MaxBlockSize, bytesPending);
101 bytesPending -= taskSize;
102 bool lastTask = lastBatch && !bytesPending;
104 std::make_unique<Task>(this, sequence++, taskSize, firstTask, lastTask));
105
106 if (firstTask)
107 firstTask = false;
108 }
109
110 assert(bytesPending == 0);
111
113
114 if (!lastBatch)
115 {
116 assert(inputBytes == batchSize);
117 std::copy_n(std::cbegin(inBuffer) + (batchSize - MaxBlockSize), MaxBlockSize,
118 prevDataBlock.getArray());
119 }
120
122 }
123}
124
126{
127 sal_Int64 batchOutputSize = 0;
128 for (const auto& buffer : outBuffers)
129 batchOutputSize += buffer.size();
130
131 css::uno::Sequence<sal_Int8> outBuffer(batchOutputSize);
132
133 auto pos = outBuffer.getArray();
134 for (auto& buffer : outBuffers)
135 {
136 pos = std::copy(buffer.begin(), buffer.end(), pos);
137 buffer.clear();
138 }
139
140 maProcessOutputFunc(outBuffer, batchOutputSize);
141 totalOut += batchOutputSize;
142}
143
145{
146 inBuffer = uno::Sequence<sal_Int8>();
147 outBuffers.clear();
148}
149
150#if defined Z_PREFIX
151#define deflateInit2 z_deflateInit2
152#define deflateBound z_deflateBound
153#define deflateSetDictionary z_deflateSetDictionary
154#define deflate z_deflate
155#define deflateEnd z_deflateEnd
156#endif
157
159{
160 stream.zalloc = nullptr;
161 stream.zfree = nullptr;
162 stream.opaque = nullptr;
163 // -MAX_WBITS means 32k window size and raw stream
164 if (deflateInit2(&stream, deflater->zlibLevel, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,
165 Z_DEFAULT_STRATEGY)
166 != Z_OK)
167 {
168 SAL_WARN("package.threadeddeflate", "deflateInit2() failed");
169 abort();
170 }
171 // Find out size for our output buffer to be large enough for deflate() needing to be called just once.
172 sal_Int64 outputMaxSize = deflateBound(&stream, blockSize);
173 // add extra size for Z_SYNC_FLUSH
174 outputMaxSize += 20;
175 deflater->outBuffers[sequence].resize(outputMaxSize);
176 sal_Int64 myInBufferStart = sequence * MaxBlockSize;
177 // zlib doesn't handle const properly
178 unsigned char* inBufferPtr = reinterpret_cast<unsigned char*>(
179 const_cast<signed char*>(deflater->inBuffer.getConstArray()));
180 if (!firstTask)
181 {
182 // the window size is 32k, so set last 32k of previous data as the dictionary
183 assert(MAX_WBITS == 15);
184 assert(MaxBlockSize >= 32768);
185 if (sequence > 0)
186 {
187 deflateSetDictionary(&stream, inBufferPtr + myInBufferStart - 32768, 32768);
188 }
189 else
190 {
191 unsigned char* prevBufferPtr = reinterpret_cast<unsigned char*>(
192 const_cast<signed char*>(deflater->prevDataBlock.getConstArray()));
193 deflateSetDictionary(&stream, prevBufferPtr + MaxBlockSize - 32768, 32768);
194 }
195 }
196 stream.next_in = inBufferPtr + myInBufferStart;
197 stream.avail_in = blockSize;
198 stream.next_out = reinterpret_cast<unsigned char*>(deflater->outBuffers[sequence].data());
199 stream.avail_out = outputMaxSize;
200
201 // The trick is in using Z_SYNC_FLUSH instead of Z_NO_FLUSH. It will align the data at a byte boundary,
202 // and since we use a raw stream, the data blocks then can be simply concatenated.
203 int res = deflate(&stream, lastTask ? Z_FINISH : Z_SYNC_FLUSH);
204 assert(stream.avail_in == 0); // Check that everything has been deflated.
205 if (lastTask ? res == Z_STREAM_END : res == Z_OK)
206 { // ok
207 sal_Int64 outSize = outputMaxSize - stream.avail_out;
208 deflater->outBuffers[sequence].resize(outSize);
209 }
210 else
211 {
212 SAL_WARN("package.threadeddeflate", "deflate() failed");
213 abort();
214 }
215 deflateEnd(&stream);
216}
217
218} // namespace
219
220/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Task(ThreadedDeflater *deflater_, int sequence_, int blockSize_, bool firstTask_, bool lastTask_)
Parallel compression a stream using the libz deflate algorithm.
ThreadedDeflater(sal_Int32 nSetLevel)
~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE
css::uno::Sequence< sal_Int8 > inBuffer
std::shared_ptr< comphelper::ThreadTaskTag > threadTaskTag
css::uno::Sequence< sal_Int8 > prevDataBlock
void deflateWrite(const css::uno::Reference< css::io::XInputStream > &xInStream, std::function< void(const css::uno::Sequence< sal_Int8 > &, sal_Int32)> aProcessInputFunc, std::function< void(const css::uno::Sequence< sal_Int8 > &, sal_Int32)> aProcessOutputFunc)
std::function< void(const css::uno::Sequence< sal_Int8 > &, sal_Int32)> maProcessOutputFunc
std::vector< std::vector< sal_Int8 > > outBuffers
static ThreadPool & getSharedOptimalPool()
void waitUntilDone(const std::shared_ptr< ThreadTaskTag > &, bool bJoin=true)
void pushTask(std::unique_ptr< ThreadTask > pTask)
sal_Int32 getWorkerCount() const
ThreadTask(std::shared_ptr< ThreadTaskTag > pTag)
#define SAL_WARN(area, stream)
const sal_Int64 MaxBlockSize
size_t pos