12#include <document.hxx>
14#include <tokenarray.hxx>
15#include <compiler.hxx>
33#include <com/sun/star/sheet/FormulaLanguage.hpp>
37 "#define IllegalArgument 502\n"
38 "#define IllegalFPOperation 503 // #NUM!\n"
39 "#define NoValue 519 // #VALUE!\n"
40 "#define NoConvergence 523\n"
41 "#define DivisionByZero 532 // #DIV/0!\n"
42 "#define NOTAVAILABLE 0x7fff // #N/A\n"
44 "double CreateDoubleError(ulong nErr)\n"
50 " return as_double(0x7FF8000000000000+nErr);\n"
54 "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
55 "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
64#include <rtl/digest.h>
74std::string linenumberify(
const std::string& s)
80 while ((newline = s.find(
'\n',
start)) != std::string::npos)
82 ss <<
"/*" << std::setw(4) << linenumber++ <<
"*/ " << s.substr(
start, newline-
start+1);
86 ss <<
"/*" << std::setw(4) << linenumber++ <<
"*/ " << s.substr(
start, std::string::npos);
90bool AllStringsAreNull(
const rtl_uString*
const* pStringArray,
size_t nLength)
92 if (pStringArray ==
nullptr)
96 if (pStringArray[
i] !=
nullptr)
102OUString LimitedString( std::u16string_view str )
104 if( str.size() < 20 )
105 return OUString::Concat(
"\"") + str +
"\"";
107 return OUString::Concat(
"\"") + str.substr( 0, 20 ) +
"\"...";
110const int MAX_PEEK_ELEMENTS = 5;
112std::string DebugPeekData(
const FormulaToken* ref,
int doubleRefIndex = 0)
119 buf <<
"SingleRef {";
120 for(
size_t i = 0; i < std::min< size_t >( MAX_PEEK_ELEMENTS, pSVR->
GetArrayLength()); ++
i )
142 buf <<
"DoubleRef {";
143 for(
size_t i = 0; i < std::min< size_t >( MAX_PEEK_ELEMENTS, pDVR->
GetArrayLength()); ++
i )
147 if( pDVR->
GetArrays()[doubleRefIndex].mpStringArray !=
nullptr
148 && pDVR->
GetArrays()[doubleRefIndex].mpStringArray[
i ] != nullptr )
150 buf << LimitedString( OUString( pDVR->
GetArrays()[doubleRefIndex].mpStringArray[
i ] ));
152 else if( pDVR->
GetArrays()[doubleRefIndex].mpNumericArray !=
nullptr )
153 buf << pDVR->
GetArrays()[doubleRefIndex].mpNumericArray[
i ];
177std::string DebugPeekDoubles(
const double* data,
int size)
181 for(
int i = 0;
i < std::min( MAX_PEEK_ELEMENTS,
size ); ++
i )
187 if(
size > MAX_PEEK_ELEMENTS )
200 double* pHostBuffer =
nullptr;
201 size_t szHostBuffer = 0;
218 SAL_INFO(
"sc.opencl",
"Forced strings to zero : " << DebugPeekDoubles( pHostBuffer, pSVR->
GetArrayLength()));
242 SAL_INFO(
"sc.opencl",
"Forced strings to zero : " << DebugPeekDoubles( pHostBuffer, pDVR->
GetArrayLength()));
246 pHostBuffer =
const_cast<double*
>(pDVR->
GetArrays()[
mnIndex].mpNumericArray);
261 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
264 if (CL_SUCCESS !=
err)
266 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem <<
" size " << szHostBuffer <<
" using host buffer " << pHostBuffer);
270 if (szHostBuffer == 0)
271 szHostBuffer =
sizeof(double);
274 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
275 szHostBuffer,
nullptr, &
err);
276 if (CL_SUCCESS !=
err)
278 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem <<
" size " << szHostBuffer);
280 double* pNanBuffer =
static_cast<double*
>(clEnqueueMapBuffer(
282 szHostBuffer, 0,
nullptr,
nullptr, &
err));
283 if (CL_SUCCESS !=
err)
286 for (
size_t i = 0;
i < szHostBuffer /
sizeof(double);
i++)
287 pNanBuffer[
i] = std::numeric_limits<double>::quiet_NaN();
289 pNanBuffer, 0,
nullptr,
nullptr);
291 if (CL_SUCCESS !=
err)
295 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " <<
mpClmem <<
" (" << DebugPeekData(ref,
mnIndex) <<
")");
296 err = clSetKernelArg(k, argno,
sizeof(cl_mem),
static_cast<void*
>(&
mpClmem));
297 if (CL_SUCCESS !=
err)
307 DynamicKernelPiArgument(
const ScCalcConfig& config,
const std::string& s,
311 virtual void GenDecl( outputstream& ss )
const override
313 ss <<
"double " << mSymName;
315 virtual void GenDeclRef( outputstream& ss )
const override
319 virtual void GenSlidingWindowDecl( outputstream& ss )
const override
323 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
327 virtual size_t GetWindowSize()
const override
332 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
337 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": double: " <<
preciseFloat( tmp ) <<
" (PI)");
338 cl_int
err = clSetKernelArg(k, argno,
sizeof(
double),
static_cast<void*
>(&tmp));
339 if (CL_SUCCESS != err)
340 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
345class DynamicKernelRandomArgument :
public DynamicKernelArgument
348 DynamicKernelRandomArgument(
const ScCalcConfig& config,
const std::string& s,
350 DynamicKernelArgument(
config, s,
ft) { }
352 virtual void GenDecl( outputstream& ss )
const override
354 ss <<
"double " << mSymName;
356 virtual void GenDeclRef( outputstream& ss )
const override
360 virtual void GenSlidingWindowDecl( outputstream& ss )
const override
362 ss <<
"int " << mSymName;
364 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
366 return mSymName +
"_Random(" + mSymName +
")";
368 virtual void GenSlidingWindowFunction( outputstream& ss )
override
378#ifndef DEFINED_RANDOM123_STUFF\n\
379#define DEFINED_RANDOM123_STUFF\n\
382Copyright 2010-2011, D. E. Shaw Research.\n\
383All rights reserved.\n\
385Redistribution and use in source and binary forms, with or without\n\
386modification, are permitted provided that the following conditions are\n\
389* Redistributions of source code must retain the above copyright\n\
390 notice, this list of conditions, and the following disclaimer.\n\
392* Redistributions in binary form must reproduce the above copyright\n\
393 notice, this list of conditions, and the following disclaimer in the\n\
394 documentation and/or other materials provided with the distribution.\n\
396* Neither the name of D. E. Shaw Research nor the names of its\n\
397 contributors may be used to endorse or promote products derived from\n\
398 this software without specific prior written permission.\n\
400THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
401\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
402LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
403A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
404OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
405SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
406LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
407DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
408THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
409(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
410OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
413typedef uint uint32_t;\n\
414struct r123array2x32\n\
418enum r123_enum_threefry32x2\n\
429inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
430 __attribute__ ((always_inline));\n\
432RotL_32 (uint32_t x, unsigned int N)\n\
434 return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
437typedef struct r123array2x32 threefry2x32_ctr_t;\n\
438typedef struct r123array2x32 threefry2x32_key_t;\n\
439typedef struct r123array2x32 threefry2x32_ukey_t;\n\
440inline threefry2x32_key_t\n\
441threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
446inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
447 threefry2x32_ctr_t in,\n\
448 threefry2x32_key_t k)\n\
449 __attribute__ ((always_inline));\n\
450inline threefry2x32_ctr_t\n\
451threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
452 threefry2x32_key_t k)\n\
454 threefry2x32_ctr_t X;\n\
455 uint32_t ks[2 + 1];\n\
457 ks[2] = 0x1BD11BDA;\n\
458 for (i = 0; i < 2; i++) {\n\
465 if (Nrounds > 0) {\n\
467 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
470 if (Nrounds > 1) {\n\
472 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
475 if (Nrounds > 2) {\n\
477 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
480 if (Nrounds > 3) {\n\
482 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
485 if (Nrounds > 3) {\n\
490 if (Nrounds > 4) {\n\
492 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
495 if (Nrounds > 5) {\n\
497 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
500 if (Nrounds > 6) {\n\
502 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
505 if (Nrounds > 7) {\n\
507 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
510 if (Nrounds > 7) {\n\
515 if (Nrounds > 8) {\n\
517 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
520 if (Nrounds > 9) {\n\
522 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
525 if (Nrounds > 10) {\n\
527 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
530 if (Nrounds > 11) {\n\
532 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
535 if (Nrounds > 11) {\n\
540 if (Nrounds > 12) {\n\
542 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
545 if (Nrounds > 13) {\n\
547 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
550 if (Nrounds > 14) {\n\
552 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
555 if (Nrounds > 15) {\n\
557 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
560 if (Nrounds > 15) {\n\
565 if (Nrounds > 16) {\n\
567 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
570 if (Nrounds > 17) {\n\
572 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
575 if (Nrounds > 18) {\n\
577 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
580 if (Nrounds > 19) {\n\
582 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
585 if (Nrounds > 19) {\n\
590 if (Nrounds > 20) {\n\
592 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
595 if (Nrounds > 21) {\n\
597 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
600 if (Nrounds > 22) {\n\
602 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
605 if (Nrounds > 23) {\n\
607 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
610 if (Nrounds > 23) {\n\
615 if (Nrounds > 24) {\n\
617 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
620 if (Nrounds > 25) {\n\
622 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
625 if (Nrounds > 26) {\n\
627 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
630 if (Nrounds > 27) {\n\
632 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
635 if (Nrounds > 27) {\n\
640 if (Nrounds > 28) {\n\
642 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
645 if (Nrounds > 29) {\n\
647 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
650 if (Nrounds > 30) {\n\
652 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
655 if (Nrounds > 31) {\n\
657 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
660 if (Nrounds > 31) {\n\
668enum r123_enum_threefry2x32\n\
669{ threefry2x32_rounds = 20 };\n\
670inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
671 threefry2x32_key_t k)\n\
672 __attribute__ ((always_inline));\n\
673inline threefry2x32_ctr_t\n\
674threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
676 return threefry2x32_R (threefry2x32_rounds, in, k);\n\
681 ss <<
"double " << mSymName <<
"_Random (int seed)\n\
683 unsigned tid = get_global_id(0);\n\
684 threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
685 threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
686 c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
688 const double halffactor = 0.5*factor;\n\
689 return c.v[0] * factor + halffactor;\n\
693 virtual size_t GetWindowSize()
const override
698 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
703 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_int: " << seed <<
"(RANDOM)");
704 cl_int
err = clSetKernelArg(k, argno,
sizeof(cl_int),
static_cast<void*
>(&seed));
705 if (CL_SUCCESS != err)
706 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
712class ConstStringArgument :
public DynamicKernelArgument
715 ConstStringArgument(
const ScCalcConfig& config,
const std::string& s,
717 DynamicKernelArgument(
config, s,
ft) { }
719 virtual void GenDecl( outputstream& ss )
const override
721 ss <<
"double " << mSymName;
723 virtual void GenDeclRef( outputstream& ss )
const override
725 ss << GenSlidingWindowDeclRef();
727 virtual void GenSlidingWindowDecl( outputstream& ss )
const override
731 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
735 throw Unhandled(__FILE__, __LINE__);
740 virtual std::string GenIsString(
bool =
false )
const override
744 virtual size_t GetWindowSize()
const override
748 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
753 throw Unhandled(__FILE__, __LINE__);
758 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno
759 <<
": stringId: " << stringId <<
" (" << DebugPeekData(ref) <<
")" );
760 cl_int
err = clSetKernelArg(k, argno,
sizeof(cl_double),
static_cast<void*
>(&stringId));
761 if (CL_SUCCESS != err)
762 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
794 size_t szHostBuffer = nStrings *
sizeof(cl_double);
795 cl_double* pStringIdsBuffer =
nullptr;
801 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
802 szHostBuffer,
nullptr, &
err);
803 if (CL_SUCCESS !=
err)
805 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem <<
" size " << szHostBuffer);
807 pStringIdsBuffer =
static_cast<cl_double*
>(clEnqueueMapBuffer(
809 szHostBuffer, 0,
nullptr,
nullptr, &
err));
810 if (CL_SUCCESS !=
err)
813 for (
size_t i = 0;
i < nStrings;
i++)
818 rtl::math::setNan(&pStringIdsBuffer[
i]);
824 szHostBuffer =
sizeof(cl_double);
827 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
828 szHostBuffer,
nullptr, &
err);
829 if (CL_SUCCESS !=
err)
831 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem <<
" size " << szHostBuffer);
833 pStringIdsBuffer =
static_cast<cl_double*
>(clEnqueueMapBuffer(
835 szHostBuffer, 0,
nullptr,
nullptr, &
err));
836 if (CL_SUCCESS !=
err)
839 for (
size_t i = 0;
i < szHostBuffer /
sizeof(cl_double);
i++)
840 rtl::math::setNan(&pStringIdsBuffer[
i]);
843 pStringIdsBuffer, 0,
nullptr,
nullptr);
844 if (CL_SUCCESS !=
err)
845 throw OpenCLError(
"clEnqueueUnmapMemObject",
err, __FILE__, __LINE__);
847 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " <<
mpClmem
848 <<
" (stringIds: " << DebugPeekDoubles(pStringIdsBuffer, nStrings) <<
" "
849 << DebugPeekData(ref,
mnIndex) <<
")");
850 err = clSetKernelArg(k, argno,
sizeof(cl_mem),
static_cast<void*
>(&
mpClmem));
851 if (CL_SUCCESS !=
err)
859 return "!isnan(" +
mSymName +
"[gid0])";
875 return "!isnan(" +
mSymName +
"[gid0])";
877 ss <<
"(gid0 < " << nStrings <<
"? !isnan(" <<
mSymName <<
"[gid0]):NAN)";
884class DynamicKernelMixedArgument :
public VectorRef
887 DynamicKernelMixedArgument(
const ScCalcConfig& config,
const std::string& s,
890 virtual void GenSlidingWindowDecl( outputstream& ss )
const override
896 virtual void GenSlidingWindowFunction( outputstream& )
override { }
898 virtual void GenDecl( outputstream& ss )
const override
904 virtual void GenDeclRef( outputstream& ss )
const override
910 virtual std::string GenSlidingWindowDeclRef(
bool nested )
const override
919 virtual std::string GenDoubleSlidingWindowDeclRef(
bool nested =
false )
const override
925 virtual std::string GenStringSlidingWindowDeclRef(
bool nested =
false )
const override
931 virtual std::string GenIsString(
bool nested =
false )
const override
935 virtual size_t Marshal( cl_kernel k,
int argno,
int vw, cl_program p )
override
951 std::shared_ptr<SlidingFunctionBase> CodeGen,
int index)
967 return GetWindowSize() > 100 &&
968 ((GetStartFixed() && GetEndFixed()) ||
969 (!GetStartFixed() && !GetEndFixed()));
975 size_t nArrayLength = mpDVR->GetArrayLength();
977 if (!bIsStartFixed && !bIsEndFixed)
980 ss <<
"((i+gid0) <" << nArrayLength <<
"?";
981 ss << Base::GetName() <<
"[i + gid0]";
988 ss <<
"(i <" << nArrayLength <<
"?";
989 ss << Base::GetName() <<
"[i]";
1000 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1002 if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1004 ss <<
"for (int i = ";
1005 ss <<
"gid0; i < " << mpDVR->GetArrayLength();
1006 ss <<
" && i < " << nCurWindowSize <<
"; i++){\n\t\t";
1008 return nCurWindowSize;
1010 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1012 ss <<
"for (int i = ";
1013 ss <<
"0; i < " << mpDVR->GetArrayLength();
1014 ss <<
" && i < gid0+" << nCurWindowSize <<
"; i++){\n\t\t";
1016 return nCurWindowSize;
1018 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1020 ss <<
"tmpBottom = " <<
mpCodeGen->GetBottom() <<
";\n\t";
1021 ss <<
"{int i;\n\t";
1024 if (nCurWindowSize / outLoopSize != 0)
1026 ss <<
"for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1029 ss <<
"i = outLoop*" << outLoopSize <<
"+" <<
count <<
";\n\t";
1032 temp1 <<
"if(i + gid0 < " << mpDVR->GetArrayLength();
1033 temp1 <<
"){\n\t\t";
1034 temp1 <<
"tmp = legalize(";
1035 temp1 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1036 temp1 <<
", tmp);\n\t\t\t";
1044 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize;
count < nCurWindowSize;
count++)
1046 ss <<
"i = " <<
count <<
";\n\t";
1047 if (
count == nCurWindowSize / outLoopSize * outLoopSize)
1049 temp2 <<
"if(i + gid0 < " << mpDVR->GetArrayLength();
1050 temp2 <<
"){\n\t\t";
1051 temp2 <<
"tmp = legalize(";
1052 temp2 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1053 temp2 <<
", tmp);\n\t\t\t";
1060 return nCurWindowSize;
1066 ss <<
"tmpBottom = " <<
mpCodeGen->GetBottom() <<
";\n\t";
1067 ss <<
"{int i;\n\t";
1070 if (nCurWindowSize / outLoopSize != 0)
1072 ss <<
"for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1075 ss <<
"i = outLoop*" << outLoopSize <<
"+" <<
count <<
";\n\t";
1078 temp1 <<
"if(i < " << mpDVR->GetArrayLength();
1079 temp1 <<
"){\n\t\t";
1080 temp1 <<
"tmp = legalize(";
1081 temp1 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1082 temp1 <<
", tmp);\n\t\t\t";
1090 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize;
count < nCurWindowSize;
count++)
1092 ss <<
"i = " <<
count <<
";\n\t";
1093 if (
count == nCurWindowSize / outLoopSize * outLoopSize)
1095 temp2 <<
"if(i < " << mpDVR->GetArrayLength();
1096 temp2 <<
"){\n\t\t";
1097 temp2 <<
"tmp = legalize(";
1098 temp2 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1099 temp2 <<
", tmp);\n\t\t\t";
1106 return nCurWindowSize;
1117class DynamicKernelMixedSlidingArgument :
public VectorRef
1120 DynamicKernelMixedSlidingArgument(
const ScCalcConfig& config,
const std::string& s,
1126 virtual void GenSlidingWindowDecl( outputstream& ss )
const override
1132 virtual void GenSlidingWindowFunction( outputstream& )
override { }
1134 virtual void GenDecl( outputstream& ss )
const override
1140 virtual void GenDeclRef( outputstream& ss )
const override
1146 virtual std::string GenSlidingWindowDeclRef(
bool nested )
const override
1155 virtual std::string GenDoubleSlidingWindowDeclRef(
bool =
false )
const override
1161 virtual std::string GenStringSlidingWindowDeclRef(
bool =
false )
const override
1167 virtual size_t Marshal( cl_kernel k,
int argno,
int vw, cl_program p )
override
1176 DynamicKernelSlidingArgument<DynamicKernelStringArgument>
mStringArgument;
1183 typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1185 SymbolTable() :
mCurId(0) { }
1188 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize);
1190 void DumpSlidingWindowFunctions( outputstream& ss )
1192 for (
auto const& argument :
mParams)
1194 argument->GenSlidingWindowFunction(ss);
1200 void Marshal( cl_kernel,
int, cl_program );
1208void SymbolTable::Marshal( cl_kernel k,
int nVectorWidth, cl_program pProgram )
1213 i +=
argument->Marshal(k,
i, nVectorWidth, pProgram);
1222 std::shared_ptr<SlidingFunctionBase> CodeGen,
int index)
1240 std::string
name = Base::GetName();
1241 ss <<
"__kernel void " <<
name;
1242 ss <<
"_reduction(__global double* A, "
1243 "__global double *result,int arrayLength,int windowSize){\n";
1244 ss <<
" double tmp, current_result =" <<
1247 ss <<
" int writePos = get_group_id(1);\n";
1248 ss <<
" int lidx = get_local_id(0);\n";
1249 ss <<
" __local double shm_buf[256];\n";
1250 if (mpDVR->IsStartFixed())
1251 ss <<
" int offset = 0;\n";
1253 ss <<
" int offset = get_group_id(1);\n";
1254 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1255 ss <<
" int end = windowSize;\n";
1256 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1257 ss <<
" int end = offset + windowSize;\n";
1258 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1259 ss <<
" int end = windowSize + get_group_id(1);\n";
1260 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1261 ss <<
" int end = windowSize;\n";
1262 ss <<
" end = min(end, arrayLength);\n";
1264 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1265 ss <<
" int loop = arrayLength/512 + 1;\n";
1266 ss <<
" for (int l=0; l<loop; l++){\n";
1267 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
1268 ss <<
" int loopOffset = l*512;\n";
1269 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
1270 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
1271 "A[loopOffset + lidx + offset]",
"tmp") <<
", tmp);\n";
1272 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
1273 "A[loopOffset + lidx + offset + 256]",
"tmp") <<
", tmp);\n";
1274 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
1275 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
1276 "A[loopOffset + lidx + offset]",
"tmp") <<
", tmp);\n";
1277 ss <<
" shm_buf[lidx] = tmp;\n";
1278 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1279 ss <<
" for (int i = 128; i >0; i/=2) {\n";
1280 ss <<
" if (lidx < i)\n";
1281 ss <<
" shm_buf[lidx] = ";
1284 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
1286 ss <<
mpCodeGen->Gen2(
"shm_buf[lidx]",
"shm_buf[lidx + i]") <<
";\n";
1287 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1289 ss <<
" if (lidx == 0)\n";
1290 ss <<
" current_result =";
1292 ss <<
"current_result + shm_buf[0]";
1294 ss <<
mpCodeGen->Gen2(
"current_result",
"shm_buf[0]");
1296 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1298 ss <<
" if (lidx == 0)\n";
1299 ss <<
" result[writePos] = current_result;\n";
1304 std::string
name = Base::GetName();
1306 ss <<
"__kernel void " <<
name <<
"_sum";
1307 ss <<
"_reduction(__global double* A, "
1308 "__global double *result,int arrayLength,int windowSize){\n";
1309 ss <<
" double tmp, current_result =" <<
1312 ss <<
" int writePos = get_group_id(1);\n";
1313 ss <<
" int lidx = get_local_id(0);\n";
1314 ss <<
" __local double shm_buf[256];\n";
1315 if (mpDVR->IsStartFixed())
1316 ss <<
" int offset = 0;\n";
1318 ss <<
" int offset = get_group_id(1);\n";
1319 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1320 ss <<
" int end = windowSize;\n";
1321 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1322 ss <<
" int end = offset + windowSize;\n";
1323 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1324 ss <<
" int end = windowSize + get_group_id(1);\n";
1325 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1326 ss <<
" int end = windowSize;\n";
1327 ss <<
" end = min(end, arrayLength);\n";
1328 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1329 ss <<
" int loop = arrayLength/512 + 1;\n";
1330 ss <<
" for (int l=0; l<loop; l++){\n";
1331 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
1332 ss <<
" int loopOffset = l*512;\n";
1333 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
1334 ss <<
" tmp = legalize(";
1335 ss <<
"(A[loopOffset + lidx + offset]+ tmp)";
1337 ss <<
" tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
1339 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
1340 ss <<
" tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
1342 ss <<
" shm_buf[lidx] = tmp;\n";
1343 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1344 ss <<
" for (int i = 128; i >0; i/=2) {\n";
1345 ss <<
" if (lidx < i)\n";
1346 ss <<
" shm_buf[lidx] = ";
1347 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
1348 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1350 ss <<
" if (lidx == 0)\n";
1351 ss <<
" current_result =";
1352 ss <<
"current_result + shm_buf[0]";
1354 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1356 ss <<
" if (lidx == 0)\n";
1357 ss <<
" result[writePos] = current_result;\n";
1360 ss <<
"__kernel void " <<
name <<
"_count";
1361 ss <<
"_reduction(__global double* A, "
1362 "__global double *result,int arrayLength,int windowSize){\n";
1363 ss <<
" double tmp, current_result =" <<
1366 ss <<
" int writePos = get_group_id(1);\n";
1367 ss <<
" int lidx = get_local_id(0);\n";
1368 ss <<
" __local double shm_buf[256];\n";
1369 if (mpDVR->IsStartFixed())
1370 ss <<
" int offset = 0;\n";
1372 ss <<
" int offset = get_group_id(1);\n";
1373 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1374 ss <<
" int end = windowSize;\n";
1375 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1376 ss <<
" int end = offset + windowSize;\n";
1377 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1378 ss <<
" int end = windowSize + get_group_id(1);\n";
1379 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1380 ss <<
" int end = windowSize;\n";
1381 ss <<
" end = min(end, arrayLength);\n";
1382 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1383 ss <<
" int loop = arrayLength/512 + 1;\n";
1384 ss <<
" for (int l=0; l<loop; l++){\n";
1385 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
1386 ss <<
" int loopOffset = l*512;\n";
1387 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
1388 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1390 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
1392 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
1393 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1395 ss <<
" shm_buf[lidx] = tmp;\n";
1396 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1397 ss <<
" for (int i = 128; i >0; i/=2) {\n";
1398 ss <<
" if (lidx < i)\n";
1399 ss <<
" shm_buf[lidx] = ";
1400 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
1401 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1403 ss <<
" if (lidx == 0)\n";
1404 ss <<
" current_result =";
1405 ss <<
"current_result + shm_buf[0];";
1407 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
1409 ss <<
" if (lidx == 0)\n";
1410 ss <<
" result[writePos] = current_result;\n";
1419 if (!bIsStartFixed && !bIsEndFixed)
1420 ss << Base::GetName() <<
"[i + gid0]";
1422 ss << Base::GetName() <<
"[i]";
1431 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1432 std::string temp = Base::GetName() +
"[gid0]";
1437 ss <<
mpCodeGen->Gen2(temp,
"tmp") <<
";\n";
1438 ss <<
"nCount = nCount-1;\n";
1439 ss <<
"nCount = nCount +";
1440 ss << Base::GetName() <<
"[gid0+" << nResultSize <<
"]" <<
";\n";
1443 ss << temp <<
"+ tmp";
1448 return nCurWindowSize;
1454 assert(Base::mpClmem ==
nullptr);
1460 size_t nInput = mpDVR->GetArrayLength();
1461 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1463 if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray ==
nullptr)
1465 double* pHostBuffer =
const_cast<double*
>(
1466 mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
1467 size_t szHostBuffer = nInput *
sizeof(double);
1468 Base::mpClmem = clCreateBuffer(kEnv.
mpkContext,
1469 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
1472 SAL_INFO(
"sc.opencl",
"Created buffer " << Base::mpClmem <<
" size " << nInput <<
"*" <<
sizeof(
double) <<
"=" << szHostBuffer <<
" using host buffer " << pHostBuffer);
1475 CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
1476 sizeof(
double) *
w,
nullptr,
nullptr);
1477 if (CL_SUCCESS !=
err)
1479 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem2 <<
" size " <<
sizeof(
double) <<
"*" <<
w <<
"=" << (
sizeof(
double)*
w));
1482 std::string kernelName;
1484 kernelName = Base::GetName() +
"_reduction";
1486 kernelName = Base::GetName() +
"_sum_reduction";
1487 cl_kernel redKernel = clCreateKernel(
mpProgram, kernelName.c_str(), &
err);
1488 if (
err != CL_SUCCESS)
1490 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " <<
mpProgram);
1494 cl_mem buf = Base::GetCLBuffer();
1495 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 0 <<
": cl_mem: " << buf);
1496 err = clSetKernelArg(redKernel, 0,
sizeof(cl_mem),
1497 static_cast<void*
>(&buf));
1498 if (CL_SUCCESS !=
err)
1501 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 1 <<
": cl_mem: " <<
mpClmem2);
1502 err = clSetKernelArg(redKernel, 1,
sizeof(cl_mem), &
mpClmem2);
1503 if (CL_SUCCESS !=
err)
1506 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 2 <<
": cl_int: " << nInput);
1507 err = clSetKernelArg(redKernel, 2,
sizeof(cl_int),
static_cast<void*
>(&nInput));
1508 if (CL_SUCCESS !=
err)
1511 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 3 <<
": cl_int: " << nCurWindowSize);
1512 err = clSetKernelArg(redKernel, 3,
sizeof(cl_int),
static_cast<void*
>(&nCurWindowSize));
1513 if (CL_SUCCESS !=
err)
1517 size_t global_work_size[] = { 256,
static_cast<size_t>(
w) };
1518 size_t const local_work_size[] = { 256, 1 };
1519 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
1520 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
1521 global_work_size, local_work_size, 0,
nullptr,
nullptr);
1522 if (CL_SUCCESS !=
err)
1523 throw OpenCLError(
"clEnqueueNDRangeKernel",
err, __FILE__, __LINE__);
1525 if (CL_SUCCESS !=
err)
1530 std::unique_ptr<double[]> pAllBuffer(
new double[2 *
w]);
1531 double* resbuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
1533 CL_TRUE, CL_MAP_READ, 0,
1534 sizeof(
double) *
w, 0,
nullptr,
nullptr,
1536 if (
err != CL_SUCCESS)
1537 throw OpenCLError(
"clEnqueueMapBuffer",
err, __FILE__, __LINE__);
1539 for (
int i = 0;
i <
w;
i++)
1540 pAllBuffer[
i] = resbuf[
i];
1542 if (
err != CL_SUCCESS)
1543 throw OpenCLError(
"clEnqueueUnmapMemObject",
err, __FILE__, __LINE__);
1545 kernelName = Base::GetName() +
"_count_reduction";
1546 redKernel = clCreateKernel(
mpProgram, kernelName.c_str(), &
err);
1547 if (
err != CL_SUCCESS)
1549 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " <<
mpProgram);
1552 buf = Base::GetCLBuffer();
1553 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 0 <<
": cl_mem: " << buf);
1554 err = clSetKernelArg(redKernel, 0,
sizeof(cl_mem),
1555 static_cast<void*
>(&buf));
1556 if (CL_SUCCESS !=
err)
1559 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 1 <<
": cl_mem: " <<
mpClmem2);
1560 err = clSetKernelArg(redKernel, 1,
sizeof(cl_mem), &
mpClmem2);
1561 if (CL_SUCCESS !=
err)
1564 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 2 <<
": cl_int: " << nInput);
1565 err = clSetKernelArg(redKernel, 2,
sizeof(cl_int),
static_cast<void*
>(&nInput));
1566 if (CL_SUCCESS !=
err)
1569 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 3 <<
": cl_int: " << nCurWindowSize);
1570 err = clSetKernelArg(redKernel, 3,
sizeof(cl_int),
static_cast<void*
>(&nCurWindowSize));
1571 if (CL_SUCCESS !=
err)
1575 size_t global_work_size1[] = { 256,
static_cast<size_t>(
w) };
1576 size_t const local_work_size1[] = { 256, 1 };
1577 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
1578 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
1579 global_work_size1, local_work_size1, 0,
nullptr,
nullptr);
1580 if (CL_SUCCESS !=
err)
1581 throw OpenCLError(
"clEnqueueNDRangeKernel",
err, __FILE__, __LINE__);
1583 if (CL_SUCCESS !=
err)
1585 resbuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
1587 CL_TRUE, CL_MAP_READ, 0,
1588 sizeof(
double) *
w, 0,
nullptr,
nullptr,
1590 if (
err != CL_SUCCESS)
1591 throw OpenCLError(
"clEnqueueMapBuffer",
err, __FILE__, __LINE__);
1592 for (
int i = 0;
i <
w;
i++)
1593 pAllBuffer[
i +
w] = resbuf[
i];
1596 if (CL_SUCCESS !=
err)
1605 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
1606 w *
sizeof(
double) * 2, pAllBuffer.get(), &
err);
1607 if (CL_SUCCESS !=
err)
1609 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem2 <<
" size " <<
w <<
"*" <<
sizeof(
double) <<
"=" << (
w*
sizeof(
double)) <<
" copying host buffer " << pAllBuffer.get());
1612 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " <<
mpClmem2);
1613 err = clSetKernelArg(k, argno,
sizeof(cl_mem), &
mpClmem2);
1614 if (CL_SUCCESS !=
err)
1638 explicit SumIfsArgs(
double x) :
mCLMem(nullptr),
mConst(
x) { }
1644class DynamicKernelSoPArguments :
public DynamicKernelArgument
1647 typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
1651 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize );
1654 virtual size_t Marshal( cl_kernel k,
int argno,
int nVectorWidth, cl_program pProgram )
override
1660 i += rxSubArgument->Marshal(k, argno +
i, nVectorWidth, pProgram);
1662 if (OpSumIfs* OpSumCodeGen =
dynamic_cast<OpSumIfs*
>(
mpCodeGen.get()))
1668 DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
1669 static_cast<DynamicKernelSlidingArgument<VectorRef>*
>(
Arg);
1672 if (OpSumCodeGen->NeedReductionKernel())
1674 size_t nInput = slidingArgPtr->GetArrayLength();
1675 size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
1676 std::vector<SumIfsArgs> vclmem;
1680 if (VectorRef*
VR =
dynamic_cast<VectorRef*
>(rxSubArgument.get()))
1681 vclmem.emplace_back(
VR->GetCLBuffer());
1682 else if (DynamicKernelConstantArgument* CA =
dynamic_cast<DynamicKernelConstantArgument*
>(rxSubArgument.get()))
1683 vclmem.emplace_back(CA->GetDouble());
1685 vclmem.emplace_back(
nullptr);
1688 sizeof(
double) * nVectorWidth,
nullptr, &
err);
1689 if (CL_SUCCESS !=
err)
1690 throw OpenCLError(
"clCreateBuffer",
err, __FILE__, __LINE__);
1691 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem2 <<
" size " <<
sizeof(
double) <<
"*" << nVectorWidth <<
"=" << (
sizeof(
double)*nVectorWidth));
1693 std::string kernelName =
mvSubArguments[0]->GetName() +
"_SumIfs_reduction";
1694 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &
err);
1695 if (
err != CL_SUCCESS)
1696 throw OpenCLError(
"clCreateKernel",
err, __FILE__, __LINE__);
1697 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << pProgram);
1700 for (
size_t j = 0; j < vclmem.size(); j++)
1703 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << j <<
": cl_mem: " << vclmem[j].
mCLMem);
1706 err = clSetKernelArg(redKernel, j,
1707 vclmem[j].
mCLMem ?
sizeof(cl_mem) :
sizeof(
double),
1708 vclmem[j].
mCLMem ?
static_cast<void*
>(&vclmem[j].
mCLMem) :
1709 static_cast<void*
>(&vclmem[j].
mConst));
1710 if (CL_SUCCESS !=
err)
1711 throw OpenCLError(
"clSetKernelArg",
err, __FILE__, __LINE__);
1713 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << vclmem.size() <<
": cl_mem: " <<
mpClmem2);
1714 err = clSetKernelArg(redKernel, vclmem.size(),
sizeof(cl_mem),
static_cast<void*
>(&
mpClmem2));
1715 if (CL_SUCCESS !=
err)
1716 throw OpenCLError(
"clSetKernelArg",
err, __FILE__, __LINE__);
1718 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << (vclmem.size() + 1) <<
": cl_int: " << nInput);
1719 err = clSetKernelArg(redKernel, vclmem.size() + 1,
sizeof(cl_int),
static_cast<void*
>(&nInput));
1720 if (CL_SUCCESS !=
err)
1721 throw OpenCLError(
"clSetKernelArg",
err, __FILE__, __LINE__);
1723 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << (vclmem.size() + 2) <<
": cl_int: " << nCurWindowSize);
1724 err = clSetKernelArg(redKernel, vclmem.size() + 2,
sizeof(cl_int),
static_cast<void*
>(&nCurWindowSize));
1725 if (CL_SUCCESS !=
err)
1726 throw OpenCLError(
"clSetKernelArg",
err, __FILE__, __LINE__);
1728 size_t global_work_size[] = { 256,
static_cast<size_t>(nVectorWidth) };
1729 size_t const local_work_size[] = { 256, 1 };
1730 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
1731 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
1732 global_work_size, local_work_size, 0,
nullptr,
nullptr);
1733 if (CL_SUCCESS !=
err)
1734 throw OpenCLError(
"clEnqueueNDRangeKernel",
err, __FILE__, __LINE__);
1737 if (CL_SUCCESS !=
err)
1738 throw OpenCLError(
"clFinish",
err, __FILE__, __LINE__);
1740 SAL_INFO(
"sc.opencl",
"Releasing kernel " << redKernel);
1741 err = clReleaseKernel(redKernel);
1745 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " <<
mpClmem2);
1746 err = clSetKernelArg(k, argno,
sizeof(cl_mem),
static_cast<void*
>(&
mpClmem2));
1747 if (CL_SUCCESS !=
err)
1748 throw OpenCLError(
"clSetKernelArg",
err, __FILE__, __LINE__);
1754 virtual void GenSlidingWindowFunction( outputstream& ss )
override
1757 rArg->GenSlidingWindowFunction(ss);
1760 virtual void GenDeclRef( outputstream& ss )
const override
1769 virtual void GenDecl( outputstream& ss )
const override
1780 virtual size_t GetWindowSize()
const override
1782 size_t nCurWindowSize = 0;
1785 size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
1786 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1787 nCurChildWindowSize : nCurWindowSize;
1789 return nCurWindowSize;
1793 virtual void GenSlidingWindowDecl( outputstream& ss )
const override
1800 (*it)->GenSlidingWindowDecl(ss);
1805 virtual std::string GenSlidingWindowDeclRef(
bool nested =
false )
const override
1810 ss << mSymName <<
"_" <<
mpCodeGen->BinFuncName() <<
"(";
1822 throw Unhandled(__FILE__, __LINE__);
1823 bool bArgument1_NeedNested =
1826 bool bArgument2_NeedNested =
1832 ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
1834 ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
1839 virtual std::string DumpOpName()
const override
1841 std::string
t =
"_" +
mpCodeGen->BinFuncName();
1843 t += rSubArgument->DumpOpName();
1846 virtual void DumpInlineFun( std::set<std::string>& decls,
1847 std::set<std::string>& funs )
const override
1851 rSubArgument->DumpInlineFun(decls, funs);
1853 virtual bool IsEmpty()
const override
1856 if( !rSubArgument->IsEmpty())
1860 virtual ~DynamicKernelSoPArguments()
override
1880 const std::string& ts,
const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
1883 return std::make_shared<DynamicKernelSoPArguments>(
config, ts,
ft, std::move(pCodeGen), nResultSize);
1889 std::shared_ptr<SlidingFunctionBase>& pCodeGen,
1894 if (
dynamic_cast<OpSumIfs*
>(pCodeGen.get()))
1898 return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(
config, s,
ft, pCodeGen,
index);
1899 return std::make_shared<DynamicKernelSlidingArgument<Base>>(
config, s,
ft, pCodeGen,
index);
1908 else if (
dynamic_cast<OpMul*
>(pCodeGen.get()))
1910 return std::make_shared<DynamicKernelSlidingArgument<Base>>(
config, s,
ft, pCodeGen,
index);
1913 else if (
dynamic_cast<OpSub*
>(pCodeGen.get()))
1915 return std::make_shared<DynamicKernelSlidingArgument<Base>>(
config, s,
ft, pCodeGen,
index);
1918 else if (!
dynamic_cast<Reduction*
>(pCodeGen.get()))
1920 return std::make_shared<DynamicKernelSlidingArgument<Base>>(
config, s,
ft, pCodeGen,
index);
1925 ft->GetFormulaToken());
1932 return std::make_shared<DynamicKernelSlidingArgument<Base>>(
config, s,
ft, pCodeGen,
index);
1935DynamicKernelSoPArguments::DynamicKernelSoPArguments(
const ScCalcConfig& config,
1936 const std::string& s,
const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize ) :
1939 size_t nChildren =
ft->Children.size();
1941 for (
size_t i = 0;
i < nChildren;
i++)
1945 throw Unhandled(__FILE__, __LINE__);
1948 throw Unhandled(__FILE__, __LINE__);
1950 outputstream tmpname;
1951 tmpname << s <<
"_" <<
i;
1952 std::string ts = tmpname.str();
1970 if( !pCodeGen->canHandleMultiVector())
1971 throw UnhandledToken((
"Function '" + pCodeGen->BinFuncName()
1972 +
"' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
1974 SAL_INFO(
"sc.opencl",
"multi-column DoubleRef");
1989 throw UnhandledToken((
"Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->
GetArrays().size()) +
")").c_str(), __FILE__, __LINE__);
1993 SAL_INFO(
"sc.opencl",
"i=" << i <<
" j=" << j <<
1994 " mpNumericArray=" << pDVR->
GetArrays()[j].mpNumericArray <<
1995 " mpStringArray=" << pDVR->
GetArrays()[j].mpStringArray <<
1996 " allStringsAreNull=" << (AllStringsAreNull(pDVR->
GetArrays()[j].mpStringArray, pDVR->
GetArrayLength())?
"YES":
"NO") <<
1997 " takeNumeric=" << (pCodeGen->takeNumeric()?
"YES":
"NO") <<
1998 " takeString=" << (pCodeGen->takeString()?
"YES":
"NO"));
2000 if (pDVR->
GetArrays()[j].mpNumericArray &&
2001 pCodeGen->takeNumeric() &&
2003 pCodeGen->takeString())
2006 SAL_INFO(
"sc.opencl",
"Numbers and strings");
2008 std::make_shared<DynamicKernelMixedSlidingArgument>(
mCalcConfig,
2011 else if (pDVR->
GetArrays()[j].mpNumericArray &&
2012 pCodeGen->takeNumeric() &&
2015 || pCodeGen->forceStringsToZero()))
2020 SAL_INFO(
"sc.opencl",
"Numbers (no strings or strings treated as zero)");
2024 VectorRefFactory<VectorRefStringsToZero>(
mCalcConfig,
2034 else if (pDVR->
GetArrays()[j].mpNumericArray ==
nullptr &&
2035 pCodeGen->takeNumeric() &&
2038 || pCodeGen->forceStringsToZero()))
2042 SAL_INFO(
"sc.opencl",
"Only strings even if want numbers but should be treated as zero");
2044 VectorRefFactory<VectorRefStringsToZero>(
mCalcConfig,
2047 else if (pDVR->
GetArrays()[j].mpStringArray &&
2048 pCodeGen->takeString())
2051 SAL_INFO(
"sc.opencl",
"Strings only");
2058 pDVR->
GetArrays()[j].mpNumericArray ==
nullptr)
2062 SAL_INFO(
"sc.opencl",
"Only empty cells");
2069 SAL_INFO(
"sc.opencl",
"Unhandled case, rejecting for OpenCL");
2070 throw UnhandledToken((
"Unhandled numbers/strings combination for '"
2071 + pCodeGen->BinFuncName() +
"'").c_str(), __FILE__, __LINE__);
2084 " takeNumeric=" << (pCodeGen->takeNumeric()?
"YES":
"NO") <<
2085 " takeString=" << (pCodeGen->takeString()?
"YES":
"NO"));
2088 pCodeGen->takeNumeric() &&
2090 pCodeGen->takeString())
2093 SAL_INFO(
"sc.opencl",
"Numbers and strings");
2095 std::make_shared<DynamicKernelMixedArgument>(
mCalcConfig,
2096 ts,
ft->Children[i]));
2099 pCodeGen->takeNumeric() &&
2102 || pCodeGen->forceStringsToZero()))
2107 SAL_INFO(
"sc.opencl",
"Numbers (no strings or strings treated as zero)");
2110 std::make_shared<VectorRefStringsToZero>(
mCalcConfig, ts,
2118 pCodeGen->takeNumeric() &&
2121 || pCodeGen->forceStringsToZero()))
2125 SAL_INFO(
"sc.opencl",
"Only strings even if want numbers but should be treated as zero");
2127 std::make_shared<VectorRefStringsToZero>(
mCalcConfig, ts,
2131 pCodeGen->takeString())
2134 SAL_INFO(
"sc.opencl",
"Strings only");
2136 std::make_shared<DynamicKernelStringArgument>(
mCalcConfig,
2137 ts,
ft->Children[i]));
2144 SAL_INFO(
"sc.opencl",
"Only empty cells");
2151 SAL_INFO(
"sc.opencl",
"Unhandled case, rejecting for OpenCL");
2152 throw UnhandledToken((
"Unhandled numbers/strings combination for '"
2153 + pCodeGen->BinFuncName() +
"'").c_str(), __FILE__, __LINE__);
2158 SAL_INFO(
"sc.opencl",
"Constant number case");
2160 std::make_shared<DynamicKernelConstantArgument>(
mCalcConfig, ts,
2164 && pCodeGen->takeString())
2166 SAL_INFO(
"sc.opencl",
"Constant string case");
2168 std::make_shared<ConstStringArgument>(
mCalcConfig, ts,
2172 && !pCodeGen->takeString()
2173 && pCodeGen->takeNumeric()
2174 && pCodeGen->forceStringsToZero())
2176 SAL_INFO(
"sc.opencl",
"Constant string case, treated as zero");
2183 SAL_INFO(
"sc.opencl",
"Unhandled operand, rejecting for OpenCL");
2184 throw UnhandledToken((
"unhandled operand " +
StackVarEnumToString(pChild->
GetType()) +
" for ocPush").c_str(), __FILE__, __LINE__);
2189 std::make_shared<DynamicKernelPiArgument>(
mCalcConfig, ts,
2194 std::make_shared<DynamicKernelRandomArgument>(
mCalcConfig, ts,
2197#define CASE(opcode, createCode) \
2199 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], createCode, nResultSize)); \
2201 CASE(ocAbs, std::make_shared<OpAbs>())
2202 CASE(ocAdd,
std::make_shared<OpSum>(nResultSize))
2203 CASE(ocAnd,
std::make_shared<OpAnd>())
2204 CASE(ocArcCos,
std::make_shared<OpArcCos>())
2205 CASE(ocArcCosHyp,
std::make_shared<OpArcCosHyp>())
2206 CASE(ocArcCot,
std::make_shared<OpArcCot>())
2207 CASE(ocArcCotHyp,
std::make_shared<OpArcCotHyp>())
2208 CASE(ocArcSin,
std::make_shared<OpArcSin>())
2209 CASE(ocArcSinHyp,
std::make_shared<OpArcSinHyp>())
2210 CASE(ocArcTan,
std::make_shared<OpArcTan>())
2211 CASE(ocArcTan2,
std::make_shared<OpArcTan2>())
2212 CASE(ocArcTanHyp,
std::make_shared<OpArcTanH>())
2213 CASE(ocAveDev,
std::make_shared<OpAveDev>())
2214 CASE(ocAverage,
std::make_shared<OpAverage>(nResultSize))
2215 CASE(ocAverageA,
std::make_shared<OpAverageA>(nResultSize))
2216 CASE(ocAverageIf,
std::make_shared<OpAverageIf>())
2217 CASE(ocAverageIfs,
std::make_shared<OpAverageIfs>())
2218 CASE(ocB,
std::make_shared<OpB>())
2219 CASE(ocBetaDist,
std::make_shared<OpBetaDist>())
2220 CASE(ocBetaInv,
std::make_shared<OpBetainv>())
2221 CASE(ocBinomDist,
std::make_shared<OpBinomdist>())
2222 CASE(ocBitAnd,
std::make_shared<OpBitAnd>())
2223 CASE(ocBitLshift,
std::make_shared<OpBitLshift>())
2224 CASE(ocBitOr,
std::make_shared<OpBitOr>())
2225 CASE(ocBitRshift,
std::make_shared<OpBitRshift>())
2226 CASE(ocBitXor,
std::make_shared<OpBitXor>())
2227 CASE(ocCeil,
std::make_shared<OpCeil>())
2228 CASE(ocChiDist,
std::make_shared<OpChiDist>())
2229 CASE(ocChiInv,
std::make_shared<OpChiInv>())
2230 CASE(ocChiSqDist,
std::make_shared<OpChiSqDist>())
2231 CASE(ocChiSqInv,
std::make_shared<OpChiSqInv>())
2232 CASE(ocCombin,
std::make_shared<OpCombin>())
2233 CASE(ocCombinA,
std::make_shared<OpCombinA>())
2234 CASE(ocConfidence,
std::make_shared<OpConfidence>())
2235 CASE(ocCorrel,
std::make_shared<OpCorrel>())
2236 CASE(ocCos,
std::make_shared<OpCos>())
2237 CASE(ocCosHyp,
std::make_shared<OpCosh>())
2238 CASE(ocCosecant,
std::make_shared<OpCsc>())
2239 CASE(ocCosecantHyp,
std::make_shared<OpCscH>())
2240 CASE(ocCot,
std::make_shared<OpCot>())
2241 CASE(ocCotHyp,
std::make_shared<OpCoth>())
2242 CASE(ocCount,
std::make_shared<OpCount>(nResultSize))
2243 CASE(ocCount2,
std::make_shared<OpCountA>(nResultSize))
2244 CASE(ocCountIf,
std::make_shared<OpCountIf>())
2245 CASE(ocCountIfs,
std::make_shared<OpCountIfs>())
2246 CASE(ocCovar,
std::make_shared<OpCovar>())
2247 CASE(ocCritBinom,
std::make_shared<OpCritBinom>())
2248 CASE(ocDB,
std::make_shared<OpDB>())
2249 CASE(ocDDB,
std::make_shared<OpDDB>())
2250 CASE(ocDeg,
std::make_shared<OpDeg>())
2251 CASE(ocDevSq,
std::make_shared<OpDevSq>())
2252 CASE(ocDiv,
std::make_shared<OpDiv>(nResultSize))
2253 CASE(ocEqual,
std::make_shared<OpEqual>())
2254 CASE(ocEven,
std::make_shared<OpEven>())
2255 CASE(ocExp,
std::make_shared<OpExp>())
2256 CASE(ocExpDist,
std::make_shared<OpExponDist>())
2257 CASE(ocFDist,
std::make_shared<OpFdist>())
2258 CASE(ocFInv,
std::make_shared<OpFInv>())
2259 CASE(ocFTest,
std::make_shared<OpFTest>())
2260 CASE(ocFV,
std::make_shared<OpFV>())
2261 CASE(ocFact,
std::make_shared<OpFact>())
2262 CASE(ocFisher,
std::make_shared<OpFisher>())
2263 CASE(ocFisherInv,
std::make_shared<OpFisherInv>())
2264 CASE(ocFloor,
std::make_shared<OpFloor>())
2265 CASE(ocForecast,
std::make_shared<OpForecast>())
2266 CASE(ocGamma,
std::make_shared<OpGamma>())
2267 CASE(ocGammaDist,
std::make_shared<OpGammaDist>())
2268 CASE(ocGammaInv,
std::make_shared<OpGammaInv>())
2269 CASE(ocGammaLn,
std::make_shared<OpGammaLn>())
2270 CASE(ocGauss,
std::make_shared<OpGauss>())
2271 CASE(ocGeoMean,
std::make_shared<OpGeoMean>())
2272 CASE(ocGreater,
std::make_shared<OpGreater>())
2273 CASE(ocGreaterEqual,
std::make_shared<OpGreaterEqual>())
2274 CASE(ocHarMean,
std::make_shared<OpHarMean>())
2275 CASE(ocHypGeomDist,
std::make_shared<OpHypGeomDist>())
2276 CASE(ocIRR,
std::make_shared<OpIRR>())
2277 CASE(ocISPMT,
std::make_shared<OpISPMT>())
2278 CASE(ocIf,
std::make_shared<OpIf>())
2279 CASE(ocInt,
std::make_shared<OpInt>())
2280 CASE(ocIntercept,
std::make_shared<OpIntercept>())
2281 CASE(ocIpmt,
std::make_shared<OpIPMT>())
2282 CASE(ocIsEven,
std::make_shared<OpIsEven>())
2283 CASE(ocIsOdd,
std::make_shared<OpIsOdd>())
2284 CASE(ocKurt,
std::make_shared<OpKurt>())
2285 CASE(ocLess,
std::make_shared<OpLess>())
2286 CASE(ocLessEqual,
std::make_shared<OpLessEqual>())
2287 CASE(ocLn,
std::make_shared<OpLn>())
2288 CASE(ocLog,
std::make_shared<OpLog>())
2289 CASE(ocLog10,
std::make_shared<OpLog10>())
2290 CASE(ocLogInv,
std::make_shared<OpLogInv>())
2291 CASE(ocLogNormDist,
std::make_shared<OpLogNormDist>())
2292 CASE(ocMIRR,
std::make_shared<OpMIRR>())
2293 CASE(ocMax,
std::make_shared<OpMax>(nResultSize))
2294 CASE(ocMaxA,
std::make_shared<OpMaxA>(nResultSize))
2295 CASE(ocMin,
std::make_shared<OpMin>(nResultSize))
2296 CASE(ocMinA,
std::make_shared<OpMinA>(nResultSize))
2297 CASE(ocMod,
std::make_shared<OpMod>())
2298 CASE(ocMul,
std::make_shared<OpMul>(nResultSize))
2299 CASE(ocNPV,
std::make_shared<OpNPV>())
2300 CASE(ocNegBinomVert ,
std::make_shared<OpNegbinomdist>())
2301 CASE(ocNegSub,
std::make_shared<OpNegSub>())
2302 CASE(ocNormDist,
std::make_shared<OpNormdist>())
2303 CASE(ocNormInv,
std::make_shared<OpNorminv>())
2304 CASE(ocNot,
std::make_shared<OpNot>())
2305 CASE(ocNotEqual,
std::make_shared<OpNotEqual>())
2306 CASE(ocNper,
std::make_shared<OpNper>())
2307 CASE(ocOdd,
std::make_shared<OpOdd>())
2308 CASE(ocOr,
std::make_shared<OpOr>())
2309 CASE(ocPDuration,
std::make_shared<OpPDuration>())
2310 CASE(ocPMT,
std::make_shared<OpPMT>())
2311 CASE(ocPV,
std::make_shared<OpPV>())
2312 CASE(ocPearson,
std::make_shared<OpPearson>())
2313 CASE(ocPermut,
std::make_shared<OpPermut>())
2314 CASE(ocPermutationA,
std::make_shared<OpPermutationA>())
2315 CASE(ocPhi,
std::make_shared<OpPhi>())
2316 CASE(ocPoissonDist,
std::make_shared<OpPoisson>())
2317 CASE(ocPow,
std::make_shared<OpPower>())
2318 CASE(ocPower,
std::make_shared<OpPower>())
2319 CASE(ocPpmt,
std::make_shared<OpPPMT>())
2320 CASE(ocProduct,
std::make_shared<OpProduct>())
2321 CASE(ocRRI,
std::make_shared<OpRRI>())
2322 CASE(ocRSQ,
std::make_shared<OpRsq>())
2323 CASE(ocRad,
std::make_shared<OpRadians>())
2324 CASE(ocRate,
std::make_shared<OpRate>())
2325 CASE(ocRound,
std::make_shared<OpRound>())
2326 CASE(ocRoundDown,
std::make_shared<OpRoundDown>())
2327 CASE(ocRoundUp,
std::make_shared<OpRoundUp>())
2328 CASE(ocSLN,
std::make_shared<OpSLN>())
2329 CASE(ocSNormInv,
std::make_shared<OpNormsinv>())
2330 CASE(ocSTEYX,
std::make_shared<OpSTEYX>())
2331 CASE(ocSYD,
std::make_shared<OpSYD>())
2332 CASE(ocSecant,
std::make_shared<OpSec>())
2333 CASE(ocSecantHyp,
std::make_shared<OpSecH>())
2334 CASE(ocSin,
std::make_shared<OpSin>())
2335 CASE(ocSinHyp,
std::make_shared<OpSinh>())
2336 CASE(ocSkew,
std::make_shared<OpSkew>())
2337 CASE(ocSkewp,
std::make_shared<OpSkewp>())
2338 CASE(ocSlope,
std::make_shared<OpSlope>())
2339 CASE(ocSqrt,
std::make_shared<OpSqrt>())
2340 CASE(ocStDev,
std::make_shared<OpStDev>())
2341 CASE(ocStDevA,
std::make_shared<OpStDevA>())
2342 CASE(ocStDevP,
std::make_shared<OpStDevP>())
2343 CASE(ocStDevPA,
std::make_shared<OpStDevPA>())
2344 CASE(ocStandard,
std::make_shared<OpStandard>())
2345 CASE(ocStdNormDist,
std::make_shared<OpNormsdist>())
2346 CASE(ocSub,
std::make_shared<OpSub>(nResultSize))
2347 CASE(ocSum,
std::make_shared<OpSum>(nResultSize))
2348 CASE(ocSumIf,
std::make_shared<OpSumIf>())
2349 CASE(ocSumIfs,
std::make_shared<OpSumIfs>())
2350 CASE(ocSumProduct,
std::make_shared<OpSumProduct>())
2351 CASE(ocSumSQ,
std::make_shared<OpSumSQ>())
2352 CASE(ocSumX2DY2,
std::make_shared<OpSumX2PY2>())
2353 CASE(ocSumX2MY2,
std::make_shared<OpSumX2MY2>())
2354 CASE(ocSumXMY2,
std::make_shared<OpSumXMY2>())
2355 CASE(ocTDist,
std::make_shared<OpTDist>())
2356 CASE(ocTInv,
std::make_shared<OpTInv>())
2357 CASE(ocTTest,
std::make_shared<OpTTest>())
2358 CASE(ocTan,
std::make_shared<OpTan>())
2359 CASE(ocTanHyp,
std::make_shared<OpTanH>())
2360 CASE(ocTrunc,
std::make_shared<OpTrunc>())
2361 CASE(ocVBD,
std::make_shared<OpVDB>())
2362 CASE(ocVLookup,
std::make_shared<OpVLookup>())
2363 CASE(ocVar,
std::make_shared<OpVar>())
2364 CASE(ocVarA,
std::make_shared<OpVarA>())
2365 CASE(ocVarP,
std::make_shared<OpVarP>())
2366 CASE(ocVarPA,
std::make_shared<OpVarPA>())
2367 CASE(ocWeibull,
std::make_shared<OpWeibull>())
2368 CASE(ocXor,
std::make_shared<OpXor>())
2369 CASE(ocZTest,
std::make_shared<OpZTest>())
2372#define EXTCASE( name, createCode ) \
2373 else if (pChild->GetExternal() == name) \
2375 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], createCode, nResultSize)); \
2380 EXTCASE(
"com.sun.star.sheet.addin.Analysis.getAccrint", std::make_shared<OpAccrint>())
2381 EXTCASE("
com.sun.star.sheet.addin.Analysis.getAccrintm",
std::make_shared<OpAccrintm>())
2382 EXTCASE("
com.sun.star.sheet.addin.Analysis.getAmordegrc",
std::make_shared<OpAmordegrc>())
2383 EXTCASE("
com.sun.star.sheet.addin.Analysis.getAmorlinc",
std::make_shared<OpAmorlinc>())
2384 EXTCASE("
com.sun.star.sheet.addin.Analysis.getBesselj",
std::make_shared<OpBesselj>())
2385 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCoupdaybs",
std::make_shared<OpCoupdaybs>())
2386 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCoupdays",
std::make_shared<OpCoupdays>())
2387 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCoupdaysnc",
std::make_shared<OpCoupdaysnc>())
2388 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCoupncd",
std::make_shared<OpCoupncd>())
2389 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCoupnum",
std::make_shared<OpCoupnum>())
2390 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCouppcd",
std::make_shared<OpCouppcd>())
2391 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCumipmt",
std::make_shared<OpCumipmt>())
2392 EXTCASE("
com.sun.star.sheet.addin.Analysis.getCumprinc",
std::make_shared<OpCumprinc>())
2393 EXTCASE("
com.sun.star.sheet.addin.Analysis.getDisc",
std::make_shared<OpDISC>())
2394 EXTCASE("
com.sun.star.sheet.addin.Analysis.getDollarde",
std::make_shared<OpDollarde>())
2395 EXTCASE("
com.sun.star.sheet.addin.Analysis.getDollarfr",
std::make_shared<OpDollarfr>())
2396 EXTCASE("
com.sun.star.sheet.addin.Analysis.getDuration",
std::make_shared<OpDuration_ADD>())
2397 EXTCASE("
com.sun.star.sheet.addin.Analysis.getEffect",
std::make_shared<OpEffective>())
2398 EXTCASE("
com.sun.star.sheet.addin.Analysis.getFvschedule",
std::make_shared<OpFvschedule>())
2399 EXTCASE("
com.sun.star.sheet.addin.Analysis.getGestep",
std::make_shared<OpGestep>())
2400 EXTCASE("
com.sun.star.sheet.addin.Analysis.getIntrate",
std::make_shared<OpINTRATE>())
2401 EXTCASE("
com.sun.star.sheet.addin.Analysis.getIseven",
std::make_shared<OpIsEven>())
2402 EXTCASE("
com.sun.star.sheet.addin.Analysis.getIsodd",
std::make_shared<OpIsOdd>())
2403 EXTCASE("
com.sun.star.sheet.addin.Analysis.getMduration",
std::make_shared<OpMDuration>())
2404 EXTCASE("
com.sun.star.sheet.addin.Analysis.getMround",
std::make_shared<OpMROUND>())
2405 EXTCASE("
com.sun.star.sheet.addin.Analysis.getNominal",
std::make_shared<OpNominal>())
2406 EXTCASE("
com.sun.star.sheet.addin.Analysis.getOddlprice",
std::make_shared<OpOddlprice>())
2407 EXTCASE("
com.sun.star.sheet.addin.Analysis.getOddlyield",
std::make_shared<OpOddlyield>())
2409 EXTCASE("
com.sun.star.sheet.addin.Analysis.getPricedisc",
std::make_shared<OpPriceDisc>())
2410 EXTCASE("
com.sun.star.sheet.addin.Analysis.getPricemat",
std::make_shared<OpPriceMat>())
2411 EXTCASE("
com.sun.star.sheet.addin.Analysis.getQuotient",
std::make_shared<OpQuotient>())
2412 EXTCASE("
com.sun.star.sheet.addin.Analysis.getReceived",
std::make_shared<OpReceived>())
2413 EXTCASE("
com.sun.star.sheet.addin.Analysis.getSeriessum",
std::make_shared<OpSeriesSum>())
2414 EXTCASE("
com.sun.star.sheet.addin.Analysis.getSqrtpi",
std::make_shared<OpSqrtPi>())
2415 EXTCASE("
com.sun.star.sheet.addin.Analysis.getTbilleq",
std::make_shared<OpTbilleq>())
2416 EXTCASE("
com.sun.star.sheet.addin.Analysis.getTbillprice",
std::make_shared<OpTbillprice>())
2417 EXTCASE("
com.sun.star.sheet.addin.Analysis.getTbillyield",
std::make_shared<OpTbillyield>())
2418 EXTCASE("
com.sun.star.sheet.addin.Analysis.getXirr",
std::make_shared<OpXirr>())
2419 EXTCASE("
com.sun.star.sheet.addin.Analysis.getXnpv",
std::make_shared<OpXNPV>())
2420 EXTCASE("
com.sun.star.sheet.addin.Analysis.getYield",
std::make_shared<OpYield>())
2421 EXTCASE("
com.sun.star.sheet.addin.Analysis.getYielddisc",
std::make_shared<OpYielddisc>())
2422 EXTCASE("
com.sun.star.sheet.addin.Analysis.getYieldmat",
std::make_shared<OpYieldmat>())
2424 throw UnhandledToken(OUString("unhandled external " + pChild->GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
2429 throw UnhandledToken(OUString(
"unhandled opcode "
2431 +
"(" + OUString::number(opc) +
")").toUtf8().getStr(), __FILE__, __LINE__);
2438class DynamicKernel :
public CompiledFormula
2442 virtual ~DynamicKernel()
override;
2450 std::string
const & GetMD5();
2455 void CreateKernel();
2459 void Launch(
size_t nr );
2461 cl_mem GetResultBuffer()
const {
return mpResClmem; }
2488DynamicKernel::~DynamicKernel()
2505void DynamicKernel::CodeGen()
2513 decl <<
"#if __OPENCL_VERSION__ < 120\n";
2514 decl <<
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
2519 decl <<
"#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
2533 mSyms.DumpSlidingWindowFunctions(decl);
2536 decl <<
"(__global double *result";
2540 DK->GenSlidingWindowDecl(decl);
2542 decl <<
") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
2543 DK->GenSlidingWindowDeclRef() <<
";\n}\n";
2552std::string
const & DynamicKernel::GetMD5()
2562 RTL_DIGEST_LENGTH_MD5);
2565 md5s << std::hex << static_cast<int>(i);
2573void DynamicKernel::CreateKernel()
2587 static std::string lastOneKernelHash;
2588 static std::string lastSecondKernelHash;
2589 static cl_program lastOneProgram =
nullptr;
2590 static cl_program lastSecondProgram =
nullptr;
2592 if (lastOneKernelHash == KernelHash && lastOneProgram)
2596 else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
2603 if (lastSecondProgram)
2605 SAL_INFO(
"sc.opencl",
"Releasing program " << lastSecondProgram);
2606 err = clReleaseProgram(lastSecondProgram);
2608 lastSecondProgram =
nullptr;
2619 &src,
nullptr, &err);
2620 if (err != CL_SUCCESS)
2621 throw OpenCLError(
"clCreateProgramWithSource", err, __FILE__, __LINE__);
2626 if (err != CL_SUCCESS)
2628#if OSL_DEBUG_LEVEL > 0
2629 if (err == CL_BUILD_PROGRAM_FAILURE)
2631 cl_build_status stat;
2632 cl_int e = clGetProgramBuildInfo(
2634 CL_PROGRAM_BUILD_STATUS,
sizeof(cl_build_status),
2637 e != CL_SUCCESS,
"sc.opencl",
2638 "after CL_BUILD_PROGRAM_FAILURE,"
2639 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
2641 if (e == CL_SUCCESS)
2644 e = clGetProgramBuildInfo(
2646 CL_PROGRAM_BUILD_LOG, 0,
nullptr, &n);
2648 e != CL_SUCCESS || n == 0,
"sc.opencl",
2649 "after CL_BUILD_PROGRAM_FAILURE,"
2650 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
2652 if (e == CL_SUCCESS && n != 0)
2654 std::vector<char>
log(n);
2655 e = clGetProgramBuildInfo(
2657 CL_PROGRAM_BUILD_LOG, n,
log.data(),
nullptr);
2659 e != CL_SUCCESS || n == 0,
"sc.opencl",
2660 "after CL_BUILD_PROGRAM_FAILURE,"
2661 " clGetProgramBuildInfo("
2663 if (e == CL_SUCCESS)
2666 "CL_BUILD_PROGRAM_FAILURE, status " << stat
2667 <<
", log \"" <<
log.data() <<
"\"");
2673 SAL_WARN(
"sc.opencl",
"Program failed to build, aborting.");
2676 throw OpenCLError(
"clBuildProgram", err, __FILE__, __LINE__);
2685 lastSecondKernelHash = lastOneKernelHash;
2686 lastSecondProgram = lastOneProgram;
2687 lastOneKernelHash = KernelHash;
2691 if (err != CL_SUCCESS)
2692 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2696void DynamicKernel::Launch(
size_t nr )
2704 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
2705 nr *
sizeof(
double),
nullptr, &err);
2706 if (CL_SUCCESS != err)
2707 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2708 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpResClmem <<
" size " << nr <<
"*" <<
sizeof(
double) <<
"=" << (nr*
sizeof(
double)));
2712 if (CL_SUCCESS != err)
2713 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2716 size_t global_work_size[] = { nr };
2719 global_work_size,
nullptr, 0,
nullptr,
nullptr);
2720 if (CL_SUCCESS != err)
2721 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2723 if (CL_SUCCESS != err)
2724 throw OpenCLError(
"clFlush", err, __FILE__, __LINE__);
2730template <
typename T>
2731const DynamicKernelArgument* SymbolTable::DeclRefArg(
const ScCalcConfig& config,
2733 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize)
2736 ArgumentMap::iterator it =
mSymbols.find(ref);
2745 return new_arg.get();
2749 return it->second.get();
2753FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() {}
2755FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
2762std::shared_ptr<DynamicKernel> DynamicKernel::create(
const ScCalcConfig& rConfig,
const ScTokenArray& rCode,
int nResultSize )
2766 std::vector<FormulaToken*> aTokenVector;
2767 std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
2769 while ((pCur =
const_cast<FormulaToken*
>(aCode.Next())) !=
nullptr)
2778 if( aTokenVector.empty())
2781 aTokenVector.pop_back();
2782 if (pTempFormula->
GetOpCode() != ocPush)
2784 if (aHashMap.find(pTempFormula) == aHashMap.end())
2786 pCurNode->Children.push_back(aHashMap[pTempFormula]);
2791 std::make_shared<FormulaTreeNode>(pTempFormula);
2792 pCurNode->Children.push_back(pChildTreeNode);
2795 std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
2796 aHashMap[pCur] = pCurNode;
2798 aTokenVector.push_back(pCur);
2802 Root->Children.push_back(aHashMap[aTokenVector.back()]);
2804 auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
2809 pDynamicKernel->CodeGen();
2810 pDynamicKernel->CreateKernel();
2812 catch (
const UnhandledToken& ut)
2814 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: UnhandledToken: " << ut.mMessage <<
" at " << ut.mFile <<
":" << ut.mLineNumber);
2817 catch (
const InvalidParameterCount& ipc)
2819 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
2820 <<
" at " << ipc.mFile <<
":" << ipc.mLineNumber);
2823 catch (
const OpenCLError& oce)
2827 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: OpenCLError from " << oce.mFunction <<
": " <<
openclwrapper::errorString(oce.mError) <<
" at " << oce.mFile <<
":" << oce.mLineNumber);
2834 catch (
const Unhandled& uh)
2836 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: Unhandled at " << uh.mFile <<
":" << uh.mLineNumber);
2846 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: unexpected exception");
2850 return pDynamicKernel;
2855class CLInterpreterResult
2866 CLInterpreterResult( DynamicKernel* pKernel,
SCROW nGroupLength ) :
2869 bool isValid()
const {
return mpKernel !=
nullptr; }
2871 void fetchResultFromKernel()
2887 CL_TRUE, CL_MAP_READ, 0,
2891 if (err != CL_SUCCESS)
2915 if (err != CL_SUCCESS)
2925class CLInterpreterContext
2933 explicit CLInterpreterContext(
SCROW nGroupLength, std::shared_ptr<DynamicKernel> pKernel )
2938 ~CLInterpreterContext()
2940 DynamicKernelArgument::ClearStringIds();
2943 bool isValid()
const
2948 CLInterpreterResult launchKernel()
2951 return CLInterpreterResult();
2958 catch (
const UnhandledToken& ut)
2960 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: UnhandledToken: " << ut.mMessage <<
" at " << ut.mFile <<
":" << ut.mLineNumber);
2962 return CLInterpreterResult();
2964 catch (
const OpenCLError& oce)
2966 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: OpenCLError from " << oce.mFunction <<
": " <<
openclwrapper::errorString(oce.mError) <<
" at " << oce.mFile <<
":" << oce.mLineNumber);
2968 return CLInterpreterResult();
2970 catch (
const Unhandled& uh)
2972 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: Unhandled at " << uh.mFile <<
":" << uh.mLineNumber);
2974 return CLInterpreterResult();
2978 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: unexpected exception");
2980 return CLInterpreterResult();
2988CLInterpreterContext createCLInterpreterContext(
const ScCalcConfig& rConfig,
2991 return CLInterpreterContext(xGroup->mnLength, DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
2998 aComp.EnableJumpCommandReorder(
false);
2999 aComp.CompileTokenArray();
3002bool waitForResults()
3009 if (err != CL_SUCCESS)
3012 return err == CL_SUCCESS;
3021 SAL_INFO(
"sc.opencl",
"Interpret cell group " << rTopPos);
3022 MergeCalcConfig(rDoc);
3024 genRPNTokens(rDoc, rTopPos, rCode);
3029 CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
3030 if (!aCxt.isValid())
3033 CLInterpreterResult aRes = aCxt.launchKernel();
3034 if (!aRes.isValid())
3037 if (!waitForResults())
3040 aRes.fetchResultFromKernel();
3042 return aRes.pushResultToDocument(rDoc, rTopPos);
::boost::spirit::classic::rule< ScannerT > argument
void SC_DLLPUBLIC SetFormulaResults(const ScAddress &rTopPos, const double *pResults, size_t nLen)
Set an array of numerical formula results to a group of contiguous formula cells.
SC_DLLPUBLIC formula::FormulaGrammar::Grammar GetGrammar() const
Matrix data type that can store values of mixed types.
(Partially) abstract base class for an operand
static int GetStringId(const rtl_uString *string)
virtual void GenDeclRef(outputstream &ss) const
Generate use/references to the argument.
FormulaTreeNodeRef mFormulaTree
Handling a Double Vector that is used as a sliding window input to either a sliding window average or...
virtual bool NeedParallelReduction() const
size_t GenReductionLoopHeader(outputstream &ss, bool &needBody)
Controls how the elements in the DoubleVectorRef are traversed.
const formula::DoubleVectorRefToken * mpDVR
std::string GenSlidingWindowDeclRef(bool nested=false) const
DynamicKernelSlidingArgument(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > CodeGen, int index)
virtual size_t Marshal(cl_kernel, int, int, cl_program) override
Create buffer and pass the buffer to a given kernel.
virtual std::string GenIsString(bool=false) const override
Will generate value saying whether the value is a string.
virtual void GenSlidingWindowDecl(outputstream &ss) const override
When declared as input to a sliding window function.
virtual void GenDecl(outputstream &ss) const override
Generate declaration.
Handling a Double Vector that is used as a sliding window input Performs parallel reduction based on ...
virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
size_t GenReductionLoopHeader(outputstream &ss, int nResultSize, bool &needBody)
Controls how the elements in the DoubleVectorRef are traversed.
ParallelReductionVectorRef(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > CodeGen, int index)
virtual void GenSlidingWindowFunction(outputstream &ss)
Emit the definition for the auxiliary reduction kernel.
const formula::DoubleVectorRefToken * mpDVR
~ParallelReductionVectorRef()
virtual std::string GenSlidingWindowDeclRef(bool) const
Holds an input (read-only) argument reference to a SingleVectorRef.
virtual void GenSlidingWindowDecl(outputstream &ss) const override
When declared as input to a sliding window function.
virtual std::string GenSlidingWindowDeclRef(bool=false) const override
When referenced in a sliding window function.
virtual void GenDecl(outputstream &ss) const override
Generate declaration.
std::vector< double > dataBuffer
virtual size_t Marshal(cl_kernel, int, int, cl_program) override
Create buffer and pass the buffer to a given kernel.
const OUString & getString() const
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
int uniform_int_distribution(int a, int b)
css::uno::Reference< css::deployment::XPackageRegistry > create(css::uno::Reference< css::deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, css::uno::Reference< css::uno::XComponentContext > const &xComponentContext)
const char * errorString(cl_int nError)
bool buildProgramFromBinary(const char *buildOption, GPUEnv *gpuInfo, const char *filename, int idx)
void setKernelEnv(KernelEnv *envInfo)
sal_uInt64 kernelFailures
bool generatBinFromKernelSource(cl_program program, const char *clFileName)
std::string preciseFloat(double f)
static std::shared_ptr< DynamicKernelArgument > VectorRefFactory(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > &pCodeGen, int index)
static DynamicKernelArgumentRef SoPHelper(const ScCalcConfig &config, const std::string &ts, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > pCodeGen, int nResultSize)
std::shared_ptr< FormulaTreeNode > FormulaTreeNodeRef
std::shared_ptr< DynamicKernelArgument > DynamicKernelArgumentRef
css::uno::Reference< css::linguistic2::XProofreadingIterator > get(css::uno::Reference< css::uno::XComponentContext > const &context)
Configuration options for formula interpreter.
StringConversion meStringConversion
@ ZERO
=1+"1" or =1+"x" give 1
cl_program mpArryPrograms[MAX_CLFILE_NUM]
cl_command_queue mpkCmdQueue
::boost::intrusive_ptr< ScFormulaCellGroup > ScFormulaCellGroupRef
::boost::intrusive_ptr< ScMatrix > ScMatrixRef