package/html/ThreadedDeflater_8cxx_source.html

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */

/*

 * This file is part of the LibreOffice project.

 *

 * This Source Code Form is subject to the terms of the Mozilla Public

 * License, v. 2.0. If a copy of the MPL was not distributed with this

 * file, You can obtain one at http://mozilla.org/MPL/2.0/.

 *

 * This file incorporates work covered by the following license notice:

 *

 *   Licensed to the Apache Software Foundation (ASF) under one or more

 *   contributor license agreements. See the NOTICE file distributed

 *   with this work for additional information regarding copyright

 *   ownership. The ASF licenses this file to you under the Apache

 *   License, Version 2.0 (the "License"); you may not use this file

 *   except in compliance with the License. You may obtain a copy of

 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .

 */


#include <ThreadedDeflater.hxx>

#include <zlib.h>

#include <com/sun/star/packages/zip/ZipConstants.hpp>

#include <sal/log.hxx>


using namespace com::sun::star::packages::zip::ZipConstants;

using namespace com::sun::star;


namespace ZipUtils

{

const sal_Int64 MaxBlockSize = 128 * 1024;


// Parallel ZLIB compression using threads. The class internally splits the data into

// blocks and spawns ThreadPool tasks to process them independently. This is achieved

// in a similar way how pigz works, see comments from Mark Adler at

// https://stackoverflow.com/questions/30294766/how-to-use-multiple-threads-for-zlib-compression

// and

// https://stackoverflow.com/questions/30794053/how-to-use-multiple-threads-for-zlib-compression-same-input-source


// Everything here should be either read-only, or writing to distinct data, or atomic.


class ThreadedDeflater::Task : public comphelper::ThreadTask

{

    z_stream stream;

    ThreadedDeflater* deflater;

    int sequence;

    int blockSize;

    bool firstTask : 1;

    bool lastTask : 1;


public:

    Task(ThreadedDeflater* deflater_, int sequence_, int blockSize_, bool firstTask_,

         bool lastTask_)

        : comphelper::ThreadTask(deflater_->threadTaskTag)

        , stream()

        , deflater(deflater_)

        , sequence(sequence_)

        , blockSize(blockSize_)

        , firstTask(firstTask_)

        , lastTask(lastTask_)

    {

    }


private:

    virtual void doWork() override;

};


ThreadedDeflater::ThreadedDeflater(sal_Int32 nSetLevel)

    : threadTaskTag(comphelper::ThreadPool::createThreadTaskTag())

    , totalIn(0)

    , totalOut(0)

    , zlibLevel(nSetLevel)

{

}


ThreadedDeflater::~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE { clear(); }


void ThreadedDeflater::deflateWrite(

    const css::uno::Reference<css::io::XInputStream>& xInStream,

    std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)> aProcessInputFunc,

    std::function<void(const css::uno::Sequence<sal_Int8>&, sal_Int32)> aProcessOutputFunc)

{

    sal_Int64 nThreadCount = comphelper::ThreadPool::getSharedOptimalPool().getWorkerCount();

    sal_Int64 batchSize = MaxBlockSize * nThreadCount;

    inBuffer.realloc(batchSize);

    prevDataBlock.realloc(MaxBlockSize);

    outBuffers.resize(nThreadCount);

    maProcessOutputFunc = aProcessOutputFunc;

    bool firstTask = true;


    while (xInStream->available() > 0)

    {

        sal_Int64 inputBytes = xInStream->readBytes(inBuffer, batchSize);

        aProcessInputFunc(inBuffer, inputBytes);

        totalIn += inputBytes;

        int sequence = 0;

        bool lastBatch = xInStream->available() <= 0;

        sal_Int64 bytesPending = inputBytes;

        while (bytesPending > 0)

        {

            sal_Int64 taskSize = std::min(MaxBlockSize, bytesPending);

            bytesPending -= taskSize;

            bool lastTask = lastBatch && !bytesPending;

            comphelper::ThreadPool::getSharedOptimalPool().pushTask(

                std::make_unique<Task>(this, sequence++, taskSize, firstTask, lastTask));


            if (firstTask)

                firstTask = false;

        }


        assert(bytesPending == 0);


        comphelper::ThreadPool::getSharedOptimalPool().waitUntilDone(threadTaskTag);


        if (!lastBatch)

        {

            assert(inputBytes == batchSize);

            std::copy_n(std::cbegin(inBuffer) + (batchSize - MaxBlockSize), MaxBlockSize,

                        prevDataBlock.getArray());

        }


        processDeflatedBuffers();

    }

}


void ThreadedDeflater::processDeflatedBuffers()

{

    sal_Int64 batchOutputSize = 0;

    for (const auto& buffer : outBuffers)

        batchOutputSize += buffer.size();


    css::uno::Sequence<sal_Int8> outBuffer(batchOutputSize);


    auto pos = outBuffer.getArray();

    for (auto& buffer : outBuffers)

    {

        pos = std::copy(buffer.begin(), buffer.end(), pos);

        buffer.clear();

    }


    maProcessOutputFunc(outBuffer, batchOutputSize);

    totalOut += batchOutputSize;

}


void ThreadedDeflater::clear()

{

    inBuffer = uno::Sequence<sal_Int8>();

    outBuffers.clear();

}


#if defined Z_PREFIX

#define deflateInit2 z_deflateInit2

#define deflateBound z_deflateBound

#define deflateSetDictionary z_deflateSetDictionary

#define deflate z_deflate

#define deflateEnd z_deflateEnd

#endif


void ThreadedDeflater::Task::doWork()

{

    stream.zalloc = nullptr;

    stream.zfree = nullptr;

    stream.opaque = nullptr;

    // -MAX_WBITS means 32k window size and raw stream

    if (deflateInit2(&stream, deflater->zlibLevel, Z_DEFLATED, -MAX_WBITS, DEF_MEM_LEVEL,

                     Z_DEFAULT_STRATEGY)

        != Z_OK)

    {

        SAL_WARN("package.threadeddeflate", "deflateInit2() failed");

        abort();

    }

    // Find out size for our output buffer to be large enough for deflate() needing to be called just once.

    sal_Int64 outputMaxSize = deflateBound(&stream, blockSize);

    // add extra size for Z_SYNC_FLUSH

    outputMaxSize += 20;

    deflater->outBuffers[sequence].resize(outputMaxSize);

    sal_Int64 myInBufferStart = sequence * MaxBlockSize;

    // zlib doesn't handle const properly

    unsigned char* inBufferPtr = reinterpret_cast<unsigned char*>(

        const_cast<signed char*>(deflater->inBuffer.getConstArray()));

    if (!firstTask)

    {

        // the window size is 32k, so set last 32k of previous data as the dictionary

        assert(MAX_WBITS == 15);

        assert(MaxBlockSize >= 32768);

        if (sequence > 0)

        {

            deflateSetDictionary(&stream, inBufferPtr + myInBufferStart - 32768, 32768);

        }

        else

        {

            unsigned char* prevBufferPtr = reinterpret_cast<unsigned char*>(

                const_cast<signed char*>(deflater->prevDataBlock.getConstArray()));

            deflateSetDictionary(&stream, prevBufferPtr + MaxBlockSize - 32768, 32768);

        }

    }

    stream.next_in = inBufferPtr + myInBufferStart;

    stream.avail_in = blockSize;

    stream.next_out = reinterpret_cast<unsigned char*>(deflater->outBuffers[sequence].data());

    stream.avail_out = outputMaxSize;


    // The trick is in using Z_SYNC_FLUSH instead of Z_NO_FLUSH. It will align the data at a byte boundary,

    // and since we use a raw stream, the data blocks then can be simply concatenated.

    int res = deflate(&stream, lastTask ? Z_FINISH : Z_SYNC_FLUSH);

    assert(stream.avail_in == 0); // Check that everything has been deflated.

    if (lastTask ? res == Z_STREAM_END : res == Z_OK)

    { // ok

        sal_Int64 outSize = outputMaxSize - stream.avail_out;

        deflater->outBuffers[sequence].resize(outSize);

    }

    else

    {

        SAL_WARN("package.threadeddeflate", "deflate() failed");

        abort();

    }

    deflateEnd(&stream);

}


} // namespace


/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

ThreadedDeflater.hxx

ZipUtils::ThreadedDeflater::Task
Definition: ThreadedDeflater.cxx:42

ZipUtils::ThreadedDeflater::Task::Task
Task(ThreadedDeflater *deflater_, int sequence_, int blockSize_, bool firstTask_, bool lastTask_)
Definition: ThreadedDeflater.cxx:51

ZipUtils::ThreadedDeflater::Task::stream
z_stream stream
Definition: ThreadedDeflater.cxx:43

ZipUtils::ThreadedDeflater::Task::firstTask
bool firstTask
Definition: ThreadedDeflater.cxx:47

ZipUtils::ThreadedDeflater::Task::blockSize
int blockSize
Definition: ThreadedDeflater.cxx:46

ZipUtils::ThreadedDeflater::Task::doWork
virtual void doWork() override
Definition: ThreadedDeflater.cxx:158

ZipUtils::ThreadedDeflater::Task::sequence
int sequence
Definition: ThreadedDeflater.cxx:45

ZipUtils::ThreadedDeflater::Task::deflater
ThreadedDeflater * deflater
Definition: ThreadedDeflater.cxx:44

ZipUtils::ThreadedDeflater::Task::lastTask
bool lastTask
Definition: ThreadedDeflater.cxx:48

ZipUtils::ThreadedDeflater
Parallel compression a stream using the libz deflate algorithm.
Definition: ThreadedDeflater.hxx:38

ZipUtils::ThreadedDeflater::zlibLevel
int zlibLevel
Definition: ThreadedDeflater.hxx:49

ZipUtils::ThreadedDeflater::ThreadedDeflater
ThreadedDeflater(sal_Int32 nSetLevel)
Definition: ThreadedDeflater.cxx:67

ZipUtils::ThreadedDeflater::~ThreadedDeflater
~ThreadedDeflater() COVERITY_NOEXCEPT_FALSE
Definition: ThreadedDeflater.cxx:75

ZipUtils::ThreadedDeflater::inBuffer
css::uno::Sequence< sal_Int8 > inBuffer
Definition: ThreadedDeflater.hxx:44

ZipUtils::ThreadedDeflater::processDeflatedBuffers
void processDeflatedBuffers()
Definition: ThreadedDeflater.cxx:125

ZipUtils::ThreadedDeflater::threadTaskTag
std::shared_ptr< comphelper::ThreadTaskTag > threadTaskTag
Definition: ThreadedDeflater.hxx:43

ZipUtils::ThreadedDeflater::clear
void clear()
Definition: ThreadedDeflater.cxx:144

ZipUtils::ThreadedDeflater::prevDataBlock
css::uno::Sequence< sal_Int8 > prevDataBlock
Definition: ThreadedDeflater.hxx:45

ZipUtils::ThreadedDeflater::totalIn
sal_Int64 totalIn
Definition: ThreadedDeflater.hxx:47

ZipUtils::ThreadedDeflater::deflateWrite
void deflateWrite(const css::uno::Reference< css::io::XInputStream > &xInStream, std::function< void(const css::uno::Sequence< sal_Int8 > &, sal_Int32)> aProcessInputFunc, std::function< void(const css::uno::Sequence< sal_Int8 > &, sal_Int32)> aProcessOutputFunc)
Definition: ThreadedDeflater.cxx:77

ZipUtils::ThreadedDeflater::maProcessOutputFunc
std::function< void(const css::uno::Sequence< sal_Int8 > &, sal_Int32)> maProcessOutputFunc
Definition: ThreadedDeflater.hxx:46

ZipUtils::ThreadedDeflater::totalOut
sal_Int64 totalOut
Definition: ThreadedDeflater.hxx:48

ZipUtils::ThreadedDeflater::outBuffers
std::vector< std::vector< sal_Int8 > > outBuffers
Definition: ThreadedDeflater.hxx:39

comphelper::ThreadPool::getSharedOptimalPool
static ThreadPool & getSharedOptimalPool()

comphelper::ThreadPool::waitUntilDone
void waitUntilDone(const std::shared_ptr< ThreadTaskTag > &, bool bJoin=true)

comphelper::ThreadPool::pushTask
void pushTask(std::unique_ptr< ThreadTask > pTask)

comphelper::ThreadPool::getWorkerCount
sal_Int32 getWorkerCount() const

comphelper::ThreadTask

comphelper::ThreadTask::ThreadTask
ThreadTask(std::shared_ptr< ThreadTaskTag > pTag)

log.hxx

SAL_WARN
#define SAL_WARN(area, stream)

ZipUtils
Definition: ThreadedDeflater.cxx:29

ZipUtils::MaxBlockSize
const sal_Int64 MaxBlockSize
Definition: ThreadedDeflater.cxx:30

com::sun::star

comphelper

pos
size_t pos