12 #include <document.hxx>
14 #include <tokenarray.hxx>
15 #include <compiler.hxx>
35 #include <com/sun/star/sheet/FormulaLanguage.hpp>
42 #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
43 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
47 "#define IllegalArgument 502\n"
48 "#define IllegalFPOperation 503 // #NUM!\n"
49 "#define NoValue 519 // #VALUE!\n"
50 "#define NoConvergence 523\n"
51 "#define DivisionByZero 532 // #DIV/0!\n"
52 "#define NOTAVAILABLE 0x7fff // #N/A\n"
54 "double CreateDoubleError(ulong nErr)\n"
60 " return as_double(0x7FF8000000000000+nErr);\n"
64 "uint GetDoubleErrorValue(double fVal)\n"
66 " if (isfinite(fVal))\n"
69 " return IllegalFPOperation; // normal INF\n"
70 " if (as_ulong(fVal) & 0XFFFF0000u)\n"
71 " return NoValue; // just a normal NAN\n"
72 " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
75 "double fsum_count(double a, double b, __private int *p) {\n"
76 " bool t = isnan(a);\n"
80 "double fmin_count(double a, double b, __private int *p) {\n"
81 " double result = fmin(a, b);\n"
82 " bool t = isnan(result);\n"
86 "double fmax_count(double a, double b, __private int *p) {\n"
87 " double result = fmax(a, b);\n"
88 " bool t = isnan(result);\n"
92 "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
93 "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
94 "double fsub(double a, double b) { return a-b; }\n"
95 "double fdiv(double a, double b) { return a/b; }\n"
96 "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
97 "int is_representable_integer(double a) {\n"
98 " long kMaxInt = (1L << 53) - 1;\n"
99 " if (a <= as_double(kMaxInt))\n"
101 " long nInt = as_long(a);\n"
103 " return (nInt <= kMaxInt &&\n"
104 " (!((fInt = as_double(nInt)) < a) && !(fInt > a)));\n"
108 "int approx_equal(double a, double b) {\n"
109 " double e48 = 1.0 / (16777216.0 * 16777216.0);\n"
110 " double e44 = e48 * 16.0;\n"
113 " if (a == 0.0 || b == 0.0)\n"
115 " double d = fabs(a - b);\n"
116 " if (!isfinite(d))\n"
117 " return 0; // Nan or Inf involved\n"
118 " if (d > ((a = fabs(a)) * e44) || d > ((b = fabs(b)) * e44))\n"
120 " if (is_representable_integer(d) && is_representable_integer(a) && is_representable_integer(b))\n"
121 " return 0; // special case for representable integers.\n"
122 " return (d < a * e48 && d < b * e48);\n"
124 "double fsum_approx(double a, double b) {\n"
125 " if ( ((a < 0.0 && b > 0.0) || (b < 0.0 && a > 0.0))\n"
126 " && approx_equal( a, -b ) )\n"
130 "double fsub_approx(double a, double b) {\n"
131 " if ( ((a < 0.0 && b < 0.0) || (a > 0.0 && b > 0.0)) && approx_equal( a, b ) )\n"
142 #include <rtl/digest.h>
152 std::string linenumberify(
const std::string& s)
154 std::stringstream ss;
158 while ((newline = s.find(
'\n', start)) != std::string::npos)
160 ss <<
"/*" << std::setw(4) << linenumber++ <<
"*/ " << s.substr(start, newline-start+1);
163 if (start < s.size())
164 ss <<
"/*" << std::setw(4) << linenumber++ <<
"*/ " << s.substr(start, std::string::npos);
168 bool AllStringsAreNull(
const rtl_uString*
const* pStringArray,
size_t nLength)
170 if (pStringArray ==
nullptr)
173 for (
size_t i = 0;
i < nLength;
i++)
174 if (pStringArray[
i] !=
nullptr)
180 OUString LimitedString(
const OUString& str )
182 if( str.getLength() < 20 )
183 return "\"" + str +
"\"";
185 return OUString::Concat(
"\"") + str.subView( 0, 20 ) +
"\"...";
189 OUString DebugPeekData(
const FormulaToken* ref,
int doubleRefIndex = 0)
195 OUStringBuffer buf =
"SingleRef {";
196 for(
size_t i = 0; i < std::min< size_t >( 4, pSVR->
GetArrayLength()); ++
i )
206 buf.append(
",..." );
208 return buf.makeStringAndClear();
214 OUStringBuffer buf =
"DoubleRef {";
215 for(
size_t i = 0; i < std::min< size_t >( 4, pDVR->
GetArrayLength()); ++
i )
219 if( pDVR->
GetArrays()[doubleRefIndex].mpNumericArray != nullptr )
220 buf.append( pDVR->
GetArrays()[doubleRefIndex].mpNumericArray[
i ] );
221 else if( pDVR->
GetArrays()[doubleRefIndex].mpStringArray != nullptr )
222 buf.append( LimitedString( OUString( pDVR->
GetArrays()[doubleRefIndex].mpStringArray[
i ] )));
225 buf.append(
",..." );
227 return buf.makeStringAndClear();
235 return OUString::number(ref->
GetDouble());
244 OUString DebugPeekDoubles(
const double*
data,
int size)
246 OUStringBuffer buf =
"{";
247 for(
int i = 0;
i < std::min( 4, size ); ++
i )
251 buf.append( data[
i ] );
254 buf.append(
",..." );
256 return buf.makeStringAndClear();
262 size_t VectorRef::Marshal( cl_kernel k,
int argno,
int, cl_program )
266 double* pHostBuffer =
nullptr;
267 size_t szHostBuffer = 0;
285 pHostBuffer =
const_cast<double*
>(
300 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
303 if (CL_SUCCESS != err)
304 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
305 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer <<
" using host buffer " << pHostBuffer);
309 if (szHostBuffer == 0)
310 szHostBuffer =
sizeof(double);
313 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
314 szHostBuffer,
nullptr, &err);
315 if (CL_SUCCESS != err)
316 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
317 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer);
319 double* pNanBuffer =
static_cast<double*
>(clEnqueueMapBuffer(
320 kEnv.
mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
321 szHostBuffer, 0,
nullptr,
nullptr, &err));
322 if (CL_SUCCESS != err)
323 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
325 for (
size_t i = 0;
i < szHostBuffer /
sizeof(double);
i++)
326 pNanBuffer[
i] = std::numeric_limits<double>::quiet_NaN();
327 err = clEnqueueUnmapMemObject(kEnv.
mpkCmdQueue, mpClmem,
328 pNanBuffer, 0,
nullptr,
nullptr);
330 if (CL_SUCCESS != err)
334 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << mpClmem <<
" (" << DebugPeekData(ref,
mnIndex) <<
")");
335 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&mpClmem));
336 if (CL_SUCCESS != err)
337 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
356 ConstStringArgument(
const ScCalcConfig& config,
const std::string& s,
360 virtual void GenDecl( std::stringstream& ss )
const override
362 ss <<
"unsigned " << mSymName;
364 virtual void GenDeclRef( std::stringstream& ss )
const override
366 ss << GenSlidingWindowDeclRef();
368 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
372 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
374 std::stringstream ss;
376 throw Unhandled(__FILE__, __LINE__);
381 virtual size_t GetWindowSize()
const override
386 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
390 cl_uint hashCode = 0;
393 throw Unhandled(__FILE__, __LINE__);
397 hashCode = s.hashCode();
400 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_uint: " << hashCode <<
"(" << DebugPeekData(ref) <<
")" );
401 cl_int
err = clSetKernelArg(k, argno,
sizeof(cl_uint), static_cast<void*>(&hashCode));
402 if (CL_SUCCESS != err)
403 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
409 class DynamicKernelConstantArgument :
public DynamicKernelArgument
412 DynamicKernelConstantArgument(
const ScCalcConfig& config,
const std::string& s,
414 DynamicKernelArgument(config, s, ft) { }
416 virtual void GenDecl( std::stringstream& ss )
const override
418 ss <<
"double " << mSymName;
420 virtual void GenDeclRef( std::stringstream& ss )
const override
424 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
428 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
431 throw Unhandled(__FILE__, __LINE__);
434 virtual size_t GetWindowSize()
const override
438 double GetDouble()
const
442 throw Unhandled(__FILE__, __LINE__);
446 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
449 double tmp = GetDouble();
451 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": double: " << tmp);
452 cl_int err = clSetKernelArg(k, argno,
sizeof(
double), static_cast<void*>(&tmp));
453 if (CL_SUCCESS != err)
454 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
459 class DynamicKernelPiArgument :
public DynamicKernelArgument
462 DynamicKernelPiArgument(
const ScCalcConfig& config,
const std::string& s,
464 DynamicKernelArgument(config, s, ft) { }
466 virtual void GenDecl( std::stringstream& ss )
const override
468 ss <<
"double " << mSymName;
470 virtual void GenDeclRef( std::stringstream& ss )
const override
472 ss <<
"3.14159265358979";
474 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
478 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
482 virtual size_t GetWindowSize()
const override
487 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
492 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": double: " << tmp <<
" (PI)");
493 cl_int err = clSetKernelArg(k, argno,
sizeof(
double), static_cast<void*>(&tmp));
494 if (CL_SUCCESS != err)
495 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
500 class DynamicKernelRandomArgument :
public DynamicKernelArgument
503 DynamicKernelRandomArgument(
const ScCalcConfig& config,
const std::string& s,
505 DynamicKernelArgument(config, s, ft) { }
507 virtual void GenDecl( std::stringstream& ss )
const override
509 ss <<
"double " << mSymName;
511 virtual void GenDeclRef( std::stringstream& ss )
const override
515 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
517 ss <<
"int " << mSymName;
519 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
521 return mSymName +
"_Random(" + mSymName +
")";
523 virtual void GenSlidingWindowFunction( std::stringstream& ss )
override
533 #ifndef DEFINED_RANDOM123_STUFF\n\
534 #define DEFINED_RANDOM123_STUFF\n\
537 Copyright 2010-2011, D. E. Shaw Research.\n\
538 All rights reserved.\n\
540 Redistribution and use in source and binary forms, with or without\n\
541 modification, are permitted provided that the following conditions are\n\
544 * Redistributions of source code must retain the above copyright\n\
545 notice, this list of conditions, and the following disclaimer.\n\
547 * Redistributions in binary form must reproduce the above copyright\n\
548 notice, this list of conditions, and the following disclaimer in the\n\
549 documentation and/or other materials provided with the distribution.\n\
551 * Neither the name of D. E. Shaw Research nor the names of its\n\
552 contributors may be used to endorse or promote products derived from\n\
553 this software without specific prior written permission.\n\
555 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
556 \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
557 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
558 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
559 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
560 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
561 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
562 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
563 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
564 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
565 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
568 typedef uint uint32_t;\n\
569 struct r123array2x32\n\
573 enum r123_enum_threefry32x2\n\
584 inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
585 __attribute__ ((always_inline));\n\
587 RotL_32 (uint32_t x, unsigned int N)\n\
589 return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
592 typedef struct r123array2x32 threefry2x32_ctr_t;\n\
593 typedef struct r123array2x32 threefry2x32_key_t;\n\
594 typedef struct r123array2x32 threefry2x32_ukey_t;\n\
595 inline threefry2x32_key_t\n\
596 threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
601 inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
602 threefry2x32_ctr_t in,\n\
603 threefry2x32_key_t k)\n\
604 __attribute__ ((always_inline));\n\
605 inline threefry2x32_ctr_t\n\
606 threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
607 threefry2x32_key_t k)\n\
609 threefry2x32_ctr_t X;\n\
610 uint32_t ks[2 + 1];\n\
612 ks[2] = 0x1BD11BDA;\n\
613 for (i = 0; i < 2; i++) {\n\
620 if (Nrounds > 0) {\n\
622 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
625 if (Nrounds > 1) {\n\
627 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
630 if (Nrounds > 2) {\n\
632 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
635 if (Nrounds > 3) {\n\
637 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
640 if (Nrounds > 3) {\n\
645 if (Nrounds > 4) {\n\
647 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
650 if (Nrounds > 5) {\n\
652 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
655 if (Nrounds > 6) {\n\
657 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
660 if (Nrounds > 7) {\n\
662 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
665 if (Nrounds > 7) {\n\
670 if (Nrounds > 8) {\n\
672 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
675 if (Nrounds > 9) {\n\
677 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
680 if (Nrounds > 10) {\n\
682 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
685 if (Nrounds > 11) {\n\
687 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
690 if (Nrounds > 11) {\n\
695 if (Nrounds > 12) {\n\
697 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
700 if (Nrounds > 13) {\n\
702 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
705 if (Nrounds > 14) {\n\
707 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
710 if (Nrounds > 15) {\n\
712 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
715 if (Nrounds > 15) {\n\
720 if (Nrounds > 16) {\n\
722 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
725 if (Nrounds > 17) {\n\
727 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
730 if (Nrounds > 18) {\n\
732 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
735 if (Nrounds > 19) {\n\
737 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
740 if (Nrounds > 19) {\n\
745 if (Nrounds > 20) {\n\
747 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
750 if (Nrounds > 21) {\n\
752 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
755 if (Nrounds > 22) {\n\
757 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
760 if (Nrounds > 23) {\n\
762 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
765 if (Nrounds > 23) {\n\
770 if (Nrounds > 24) {\n\
772 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
775 if (Nrounds > 25) {\n\
777 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
780 if (Nrounds > 26) {\n\
782 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
785 if (Nrounds > 27) {\n\
787 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
790 if (Nrounds > 27) {\n\
795 if (Nrounds > 28) {\n\
797 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
800 if (Nrounds > 29) {\n\
802 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
805 if (Nrounds > 30) {\n\
807 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
810 if (Nrounds > 31) {\n\
812 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
815 if (Nrounds > 31) {\n\
823 enum r123_enum_threefry2x32\n\
824 { threefry2x32_rounds = 20 };\n\
825 inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
826 threefry2x32_key_t k)\n\
827 __attribute__ ((always_inline));\n\
828 inline threefry2x32_ctr_t\n\
829 threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
831 return threefry2x32_R (threefry2x32_rounds, in, k);\n\
836 ss <<
"double " << mSymName <<
"_Random (int seed)\n\
838 unsigned tid = get_global_id(0);\n\
839 threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
840 threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
841 c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
843 const double halffactor = 0.5*factor;\n\
844 return c.v[0] * factor + halffactor;\n\
848 virtual size_t GetWindowSize()
const override
853 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
858 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_int: " << seed <<
"(RANDOM)");
859 cl_int err = clSetKernelArg(k, argno,
sizeof(cl_int), static_cast<void*>(&seed));
860 if (CL_SUCCESS != err)
861 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
867 class DynamicKernelStringArgument :
public VectorRef
870 DynamicKernelStringArgument(
const ScCalcConfig& config,
const std::string& s,
872 VectorRef(config, s, ft,
index) { }
874 virtual void GenSlidingWindowFunction( std::stringstream& )
override { }
876 virtual void GenDecl( std::stringstream& ss )
const override
878 ss <<
"__global unsigned int *" << mSymName;
880 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
882 DynamicKernelStringArgument::GenDecl(ss);
884 virtual size_t Marshal( cl_kernel,
int,
int, cl_program )
override;
890 size_t DynamicKernelStringArgument::Marshal( cl_kernel k,
int argno,
int, cl_program )
914 size_t szHostBuffer = nStrings *
sizeof(cl_int);
915 cl_uint* pHashBuffer =
nullptr;
921 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
922 szHostBuffer,
nullptr, &err);
923 if (CL_SUCCESS != err)
924 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
925 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer);
927 pHashBuffer =
static_cast<cl_uint*
>(clEnqueueMapBuffer(
928 kEnv.
mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
929 szHostBuffer, 0,
nullptr,
nullptr, &err));
930 if (CL_SUCCESS != err)
931 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
933 for (
size_t i = 0;
i < nStrings;
i++)
938 pHashBuffer[
i] = tmp.hashCode();
949 szHostBuffer =
sizeof(cl_int);
952 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
953 szHostBuffer,
nullptr, &err);
954 if (CL_SUCCESS != err)
955 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
956 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer);
958 pHashBuffer =
static_cast<cl_uint*
>(clEnqueueMapBuffer(
959 kEnv.
mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
960 szHostBuffer, 0,
nullptr,
nullptr, &err));
961 if (CL_SUCCESS != err)
962 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
964 for (
size_t i = 0;
i < szHostBuffer /
sizeof(cl_int);
i++)
967 err = clEnqueueUnmapMemObject(kEnv.
mpkCmdQueue, mpClmem,
968 pHashBuffer, 0,
nullptr,
nullptr);
969 if (CL_SUCCESS != err)
970 throw OpenCLError(
"clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
972 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << mpClmem <<
" (" << DebugPeekData(ref,mnIndex) <<
")");
973 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&mpClmem));
974 if (CL_SUCCESS != err)
975 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
982 class DynamicKernelMixedArgument :
public VectorRef
985 DynamicKernelMixedArgument(
const ScCalcConfig& config,
const std::string& s,
988 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
990 VectorRef::GenSlidingWindowDecl(ss);
994 virtual void GenSlidingWindowFunction( std::stringstream& )
override { }
996 virtual void GenDecl( std::stringstream& ss )
const override
998 VectorRef::GenDecl(ss);
1002 virtual void GenDeclRef( std::stringstream& ss )
const override
1004 VectorRef::GenDeclRef(ss);
1008 virtual std::string GenSlidingWindowDeclRef(
bool nested )
const override
1010 std::stringstream ss;
1011 ss <<
"(!isnan(" << VectorRef::GenSlidingWindowDeclRef();
1012 ss <<
")?" << VectorRef::GenSlidingWindowDeclRef();
1017 virtual std::string GenDoubleSlidingWindowDeclRef(
bool =
false )
const override
1019 std::stringstream ss;
1020 ss << VectorRef::GenSlidingWindowDeclRef();
1023 virtual std::string GenStringSlidingWindowDeclRef(
bool =
false )
const override
1025 std::stringstream ss;
1029 virtual size_t Marshal( cl_kernel k,
int argno,
int vw, cl_program p )
override
1031 int i = VectorRef::Marshal(k, argno, vw, p);
1043 template<
class Base>
1044 class DynamicKernelSlidingArgument :
public Base
1049 const std::shared_ptr<SlidingFunctionBase>& CodeGen,
int index)
1050 :
Base(config, s, ft, index)
1055 throw Unhandled(__FILE__, __LINE__);
1062 virtual bool NeedParallelReduction()
const
1064 assert(dynamic_cast<OpSumIfs*>(
mpCodeGen.get()));
1065 return GetWindowSize() > 100 &&
1066 ((GetStartFixed() && GetEndFixed()) ||
1067 (!GetStartFixed() && !GetEndFixed()));
1070 virtual void GenSlidingWindowFunction( std::stringstream& ) { }
1072 std::string GenSlidingWindowDeclRef(
bool nested =
false )
const
1075 std::stringstream ss;
1079 ss <<
"((i+gid0) <" << nArrayLength <<
"?";
1080 ss << Base::GetName() <<
"[i + gid0]";
1087 ss <<
"(i <" << nArrayLength <<
"?";
1088 ss << Base::GetName() <<
"[i]";
1095 size_t GenReductionLoopHeader(
1096 std::stringstream& ss,
bool& needBody )
1104 ss <<
"for (int i = ";
1106 ss <<
" && i < " << nCurWindowSize <<
"; i++){\n\t\t";
1108 return nCurWindowSize;
1112 ss <<
"for (int i = ";
1114 ss <<
" && i < gid0+" << nCurWindowSize <<
"; i++){\n\t\t";
1116 return nCurWindowSize;
1120 ss <<
"tmpBottom = " <<
mpCodeGen->GetBottom() <<
";\n\t";
1121 ss <<
"{int i;\n\t";
1122 std::stringstream temp1, temp2;
1124 if (nCurWindowSize / outLoopSize != 0)
1126 ss <<
"for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1127 for (
int count = 0;
count < outLoopSize;
count++)
1129 ss <<
"i = outLoop*" << outLoopSize <<
"+" <<
count <<
";\n\t";
1133 temp1 <<
"){\n\t\t";
1134 temp1 <<
"tmp = legalize(";
1135 temp1 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1136 temp1 <<
", tmp);\n\t\t\t";
1144 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1146 ss <<
"i = " << count <<
";\n\t";
1147 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1150 temp2 <<
"){\n\t\t";
1151 temp2 <<
"tmp = legalize(";
1152 temp2 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1153 temp2 <<
", tmp);\n\t\t\t";
1160 return nCurWindowSize;
1166 ss <<
"tmpBottom = " <<
mpCodeGen->GetBottom() <<
";\n\t";
1167 ss <<
"{int i;\n\t";
1168 std::stringstream temp1, temp2;
1170 if (nCurWindowSize / outLoopSize != 0)
1172 ss <<
"for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1173 for (
int count = 0; count < outLoopSize; count++)
1175 ss <<
"i = outLoop*" << outLoopSize <<
"+" << count <<
";\n\t";
1179 temp1 <<
"){\n\t\t";
1180 temp1 <<
"tmp = legalize(";
1181 temp1 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1182 temp1 <<
", tmp);\n\t\t\t";
1190 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1192 ss <<
"i = " << count <<
";\n\t";
1193 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1196 temp2 <<
"){\n\t\t";
1197 temp2 <<
"tmp = legalize(";
1198 temp2 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1199 temp2 <<
", tmp);\n\t\t\t";
1206 return nCurWindowSize;
1227 class DynamicKernelMixedSlidingArgument :
public VectorRef
1230 DynamicKernelMixedSlidingArgument(
const ScCalcConfig& config,
const std::string& s,
1233 VectorRef(config, s, ft),
1236 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
1242 virtual void GenSlidingWindowFunction( std::stringstream& )
override { }
1244 virtual void GenDecl( std::stringstream& ss )
const override
1250 virtual void GenDeclRef( std::stringstream& ss )
const override
1256 virtual std::string GenSlidingWindowDeclRef(
bool nested )
const override
1258 std::stringstream ss;
1265 virtual std::string GenDoubleSlidingWindowDeclRef(
bool =
false )
const override
1267 std::stringstream ss;
1271 virtual std::string GenStringSlidingWindowDeclRef(
bool =
false )
const override
1273 std::stringstream ss;
1277 virtual size_t Marshal( cl_kernel k,
int argno,
int vw, cl_program p )
override
1286 DynamicKernelSlidingArgument<DynamicKernelStringArgument>
mStringArgument;
1293 typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1295 SymbolTable() :
mCurId(0) { }
1298 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize);
1300 void DumpSlidingWindowFunctions( std::stringstream& ss )
1302 for (
auto const& argument :
mParams)
1304 argument->GenSlidingWindowFunction(ss);
1310 void Marshal( cl_kernel,
int, cl_program );
1320 void SymbolTable::Marshal( cl_kernel k,
int nVectorWidth, cl_program pProgram )
1325 i +=
argument->Marshal(k, i, nVectorWidth, pProgram);
1333 template<
class Base>
1334 class ParallelReductionVectorRef :
public Base
1337 ParallelReductionVectorRef(
const ScCalcConfig& config,
const std::string& s,
1339 const std::shared_ptr<SlidingFunctionBase>& CodeGen,
int index)
1340 :
Base(config, s, ft, index)
1346 throw Unhandled(__FILE__, __LINE__);
1353 virtual void GenSlidingWindowFunction( std::stringstream& ss );
1355 virtual std::string GenSlidingWindowDeclRef(
bool )
const
1357 std::stringstream ss;
1359 ss << Base::GetName() <<
"[i + gid0]";
1361 ss << Base::GetName() <<
"[i]";
1366 size_t GenReductionLoopHeader(
1367 std::stringstream& ss,
int nResultSize,
bool& needBody );
1369 virtual size_t Marshal( cl_kernel k,
int argno,
int w, cl_program
mpProgram );
1371 ~ParallelReductionVectorRef()
1376 err = clReleaseMemObject(
mpClmem2);
1394 std::shared_ptr<SlidingFunctionBase>
mpCodeGen;
1399 class Reduction :
public SlidingFunctionBase
1403 explicit Reduction(
int nResultSize) : mnResultSize(nResultSize) {}
1405 typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1406 typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1407 typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1409 virtual bool HandleNaNArgument( std::stringstream&,
unsigned, SubArguments& )
const
1414 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1415 const std::string& sSymName, SubArguments& vSubArguments )
override
1417 ss <<
"\ndouble " << sSymName;
1418 ss <<
"_" << BinFuncName() <<
"(";
1419 for (
size_t i = 0; i < vSubArguments.size(); i++)
1423 vSubArguments[i]->GenSlidingWindowDecl(ss);
1426 ss <<
"double tmp = " << GetBottom() <<
";\n";
1427 ss <<
"int gid0 = get_global_id(0);\n";
1428 if (isAverage() || isMinOrMax())
1429 ss <<
"int nCount = 0;\n";
1430 ss <<
"double tmpBottom;\n";
1431 unsigned i = vSubArguments.size();
1434 if (NumericRange* NR =
1435 dynamic_cast<NumericRange*>(vSubArguments[i].
get()))
1438 NR->GenReductionLoopHeader(ss, needBody);
1442 else if (ParallelNumericRange* PNR =
1443 dynamic_cast<ParallelNumericRange*>(vSubArguments[i].
get()))
1446 bool bNeedBody =
false;
1447 PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
1451 else if (StringRange* SR =
1452 dynamic_cast<StringRange*>(vSubArguments[i].
get()))
1456 SR->GenReductionLoopHeader(ss, needBody);
1462 FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1472 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1474 bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
1476 ss <<
"tmpBottom = " << GetBottom() <<
";\n";
1481 ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1483 if (ZeroReturnZero())
1484 ss <<
" return 0;\n";
1488 ss << Gen2(
"tmpBottom",
"tmp") <<
";\n";
1494 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(),
"tmp");
1502 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(),
"tmp");
1509 " return CreateDoubleError(DivisionByZero);\n";
1510 else if (isMinOrMax())
1516 ss <<
"*pow((double)nCount,-1.0)";
1519 virtual bool isAverage()
const {
return false; }
1520 virtual bool isMinOrMax()
const {
return false; }
1521 virtual bool takeString()
const override {
return false; }
1522 virtual bool takeNumeric()
const override {
return true; }
1526 class Binary :
public SlidingFunctionBase
1529 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1530 const std::string& sSymName, SubArguments& vSubArguments )
override
1532 ss <<
"\ndouble " << sSymName;
1533 ss <<
"_" << BinFuncName() <<
"(";
1534 assert(vSubArguments.size() == 2);
1535 for (
size_t i = 0; i < vSubArguments.size(); i++)
1539 vSubArguments[i]->GenSlidingWindowDecl(ss);
1542 ss <<
"int gid0 = get_global_id(0), i = 0;\n\t";
1543 ss <<
"double tmp = ";
1544 ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(),
1545 vSubArguments[1]->GenSlidingWindowDeclRef()) <<
";\n\t";
1546 ss <<
"return tmp;\n}";
1548 virtual bool takeString()
const override {
return true; }
1549 virtual bool takeNumeric()
const override {
return true; }
1552 class SumOfProduct :
public SlidingFunctionBase
1555 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1556 const std::string& sSymName, SubArguments& vSubArguments )
override
1558 size_t nCurWindowSize = 0;
1561 ss <<
"\ndouble " << sSymName;
1562 ss <<
"_" << BinFuncName() <<
"(";
1563 for (
size_t i = 0; i < vSubArguments.size(); i++)
1567 vSubArguments[i]->GenSlidingWindowDecl(ss);
1568 size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1569 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1570 nCurChildWindowSize : nCurWindowSize;
1571 tmpCur = vSubArguments[i]->GetFormulaToken();
1577 throw Unhandled(__FILE__, __LINE__);
1581 ss <<
" double tmp = 0.0;\n";
1582 ss <<
" int gid0 = get_global_id(0);\n";
1584 ss <<
"\tint i;\n\t";
1585 ss <<
"int currentCount0;\n";
1586 for (
size_t i = 0; i < vSubArguments.size() - 1; i++)
1587 ss <<
"int currentCount" << i + 1 <<
";\n";
1588 std::stringstream temp3, temp4;
1590 if (nCurWindowSize / outLoopSize != 0)
1592 ss <<
"for(int outLoop=0; outLoop<" <<
1593 nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1594 for (
int count = 0; count < outLoopSize; count++)
1596 ss <<
"i = outLoop*" << outLoopSize <<
"+" << count <<
";\n";
1599 for (
size_t i = 0; i < vSubArguments.size(); i++)
1601 tmpCur = vSubArguments[i]->GetFormulaToken();
1607 temp3 <<
" currentCount";
1609 temp3 <<
" =i+gid0+1;\n";
1613 temp3 <<
" currentCount";
1615 temp3 <<
" =i+1;\n";
1620 temp3 <<
"tmp = fsum(";
1621 for (
size_t i = 0; i < vSubArguments.size(); i++)
1625 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1628 temp3 <<
"(currentCount";
1631 if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1636 (vSubArguments[i]->GetFormulaToken());
1638 temp3 <<
")||isnan(" << vSubArguments[i]
1639 ->GenSlidingWindowDeclRef();
1641 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1644 else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1649 (vSubArguments[i]->GetFormulaToken());
1651 temp3 <<
")||isnan(" << vSubArguments[i]
1652 ->GenSlidingWindowDeclRef(
true);
1654 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(
true);
1660 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(
true);
1662 temp3 <<
", tmp);\n\t";
1669 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize;
1670 count < nCurWindowSize; count++)
1672 ss <<
"i =" << count <<
";\n";
1673 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1675 for (
size_t i = 0; i < vSubArguments.size(); i++)
1677 tmpCur = vSubArguments[i]->GetFormulaToken();
1683 temp4 <<
" currentCount";
1685 temp4 <<
" =i+gid0+1;\n";
1689 temp4 <<
" currentCount";
1691 temp4 <<
" =i+1;\n";
1696 temp4 <<
"tmp = fsum(";
1697 for (
size_t i = 0; i < vSubArguments.size(); i++)
1701 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1704 temp4 <<
"(currentCount";
1707 if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1712 (vSubArguments[i]->GetFormulaToken());
1714 temp4 <<
")||isnan(" << vSubArguments[i]
1715 ->GenSlidingWindowDeclRef();
1717 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
1720 else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1725 (vSubArguments[i]->GetFormulaToken());
1727 temp4 <<
")||isnan(" << vSubArguments[i]
1728 ->GenSlidingWindowDeclRef(
true);
1730 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(
true);
1737 temp4 << vSubArguments[i]
1738 ->GenSlidingWindowDeclRef(
true);
1741 temp4 <<
", tmp);\n\t";
1745 ss <<
"return tmp;\n";
1748 virtual bool takeString()
const override {
return false; }
1749 virtual bool takeNumeric()
const override {
return true; }
1753 class OpNop :
public Reduction
1756 explicit OpNop(
int nResultSize) : Reduction(nResultSize) {}
1758 virtual std::string GetBottom()
override {
return "0"; }
1759 virtual std::string Gen2(
const std::string& lhs,
const std::string& )
const override
1763 virtual std::string BinFuncName()
const override {
return "nop"; }
1766 class OpCount :
public Reduction
1769 explicit OpCount(
int nResultSize) : Reduction(nResultSize) {}
1771 virtual std::string GetBottom()
override {
return "0"; }
1772 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1774 std::stringstream ss;
1775 ss <<
"(isnan(" << lhs <<
")?" << rhs <<
":" << rhs <<
"+1.0)";
1778 virtual std::string BinFuncName()
const override {
return "fcount"; }
1779 virtual bool canHandleMultiVector()
const override {
return true; }
1782 class OpEqual :
public Binary
1785 virtual std::string GetBottom()
override {
return "0"; }
1786 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1788 std::stringstream ss;
1789 ss <<
"strequal(" << lhs <<
"," << rhs <<
")";
1792 virtual std::string BinFuncName()
const override {
return "eq"; }
1795 class OpLessEqual :
public Binary
1798 virtual std::string GetBottom()
override {
return "0"; }
1799 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1801 std::stringstream ss;
1802 ss <<
"(" << lhs <<
"<=" << rhs <<
")";
1805 virtual std::string BinFuncName()
const override {
return "leq"; }
1808 class OpLess :
public Binary
1811 virtual std::string GetBottom()
override {
return "0"; }
1812 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1814 std::stringstream ss;
1815 ss <<
"(" << lhs <<
"<" << rhs <<
")";
1818 virtual std::string BinFuncName()
const override {
return "less"; }
1821 class OpGreater :
public Binary
1824 virtual std::string GetBottom()
override {
return "0"; }
1825 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1827 std::stringstream ss;
1828 ss <<
"(" << lhs <<
">" << rhs <<
")";
1831 virtual std::string BinFuncName()
const override {
return "gt"; }
1834 class OpSum :
public Reduction
1837 explicit OpSum(
int nResultSize) : Reduction(nResultSize) {}
1839 virtual std::string GetBottom()
override {
return "0"; }
1840 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1842 std::stringstream ss;
1843 ss <<
"fsum_approx((" << lhs <<
"),(" << rhs <<
"))";
1846 virtual std::string BinFuncName()
const override {
return "fsum"; }
1848 virtual bool canHandleMultiVector()
const override {
return true; }
1851 class OpAverage :
public Reduction
1854 explicit OpAverage(
int nResultSize) : Reduction(nResultSize) {}
1856 virtual std::string GetBottom()
override {
return "0"; }
1857 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1859 std::stringstream ss;
1860 ss <<
"fsum_count(" << lhs <<
"," << rhs <<
", &nCount)";
1863 virtual std::string BinFuncName()
const override {
return "average"; }
1864 virtual bool isAverage()
const override {
return true; }
1865 virtual bool canHandleMultiVector()
const override {
return true; }
1868 class OpSub :
public Reduction
1871 explicit OpSub(
int nResultSize) : Reduction(nResultSize) {}
1873 virtual std::string GetBottom()
override {
return "0"; }
1874 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1876 return "fsub_approx(" + lhs +
"," + rhs +
")";
1878 virtual std::string BinFuncName()
const override {
return "fsub"; }
1881 class OpMul :
public Reduction
1884 explicit OpMul(
int nResultSize) : Reduction(nResultSize) {}
1886 virtual std::string GetBottom()
override {
return "1"; }
1887 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1889 return lhs +
"*" + rhs;
1891 virtual std::string BinFuncName()
const override {
return "fmul"; }
1892 virtual bool ZeroReturnZero()
override {
return true; }
1896 class OpDiv :
public Reduction
1899 explicit OpDiv(
int nResultSize) : Reduction(nResultSize) {}
1901 virtual std::string GetBottom()
override {
return "1.0"; }
1902 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1904 return "(" + rhs +
"==0 ? CreateDoubleError(DivisionByZero) : (" + lhs +
"/" + rhs +
") )";
1906 virtual std::string BinFuncName()
const override {
return "fdiv"; }
1908 virtual bool HandleNaNArgument( std::stringstream& ss,
unsigned argno, SubArguments& vSubArguments )
const override
1913 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() <<
")) {\n"
1914 " return CreateDoubleError(DivisionByZero);\n"
1918 else if (argno == 0)
1921 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() <<
") &&\n"
1922 " !(isnan(" << vSubArguments[1]->GenSlidingWindowDeclRef() <<
") || " << vSubArguments[1]->GenSlidingWindowDeclRef() <<
" == 0)) {\n"
1931 class OpMin :
public Reduction
1934 explicit OpMin(
int nResultSize) : Reduction(nResultSize) {}
1936 virtual std::string GetBottom()
override {
return "NAN"; }
1937 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1939 return "fmin_count(" + lhs +
"," + rhs +
", &nCount)";
1941 virtual std::string BinFuncName()
const override {
return "min"; }
1942 virtual bool isMinOrMax()
const override {
return true; }
1943 virtual bool canHandleMultiVector()
const override {
return true; }
1946 class OpMax :
public Reduction
1949 explicit OpMax(
int nResultSize) : Reduction(nResultSize) {}
1951 virtual std::string GetBottom()
override {
return "NAN"; }
1952 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1954 return "fmax_count(" + lhs +
"," + rhs +
", &nCount)";
1956 virtual std::string BinFuncName()
const override {
return "max"; }
1957 virtual bool isMinOrMax()
const override {
return true; }
1958 virtual bool canHandleMultiVector()
const override {
return true; }
1961 class OpSumProduct :
public SumOfProduct
1964 virtual std::string GetBottom()
override {
return "0"; }
1965 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1967 return lhs +
"*" + rhs;
1969 virtual std::string BinFuncName()
const override {
return "fsop"; }
1972 template<
class Base>
1973 void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
1975 if (!dynamic_cast<OpAverage*>(
mpCodeGen.get()))
1977 std::string
name = Base::GetName();
1978 ss <<
"__kernel void " << name;
1979 ss <<
"_reduction(__global double* A, "
1980 "__global double *result,int arrayLength,int windowSize){\n";
1981 ss <<
" double tmp, current_result =" <<
1984 ss <<
" int writePos = get_group_id(1);\n";
1985 ss <<
" int lidx = get_local_id(0);\n";
1986 ss <<
" __local double shm_buf[256];\n";
1988 ss <<
" int offset = 0;\n";
1990 ss <<
" int offset = get_group_id(1);\n";
1992 ss <<
" int end = windowSize;\n";
1994 ss <<
" int end = offset + windowSize;\n";
1996 ss <<
" int end = windowSize + get_group_id(1);\n";
1998 ss <<
" int end = windowSize;\n";
1999 ss <<
" end = min(end, arrayLength);\n";
2001 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2002 ss <<
" int loop = arrayLength/512 + 1;\n";
2003 ss <<
" for (int l=0; l<loop; l++){\n";
2004 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
2005 ss <<
" int loopOffset = l*512;\n";
2006 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
2007 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
2008 "A[loopOffset + lidx + offset]",
"tmp") <<
", tmp);\n";
2009 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
2010 "A[loopOffset + lidx + offset + 256]",
"tmp") <<
", tmp);\n";
2011 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
2012 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
2013 "A[loopOffset + lidx + offset]",
"tmp") <<
", tmp);\n";
2014 ss <<
" shm_buf[lidx] = tmp;\n";
2015 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2016 ss <<
" for (int i = 128; i >0; i/=2) {\n";
2017 ss <<
" if (lidx < i)\n";
2018 ss <<
" shm_buf[lidx] = ";
2020 if (dynamic_cast<OpCount*>(
mpCodeGen.get()))
2021 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
2023 ss <<
mpCodeGen->Gen2(
"shm_buf[lidx]",
"shm_buf[lidx + i]") <<
";\n";
2024 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2026 ss <<
" if (lidx == 0)\n";
2027 ss <<
" current_result =";
2028 if (dynamic_cast<OpCount*>(
mpCodeGen.get()))
2029 ss <<
"current_result + shm_buf[0]";
2031 ss <<
mpCodeGen->Gen2(
"current_result",
"shm_buf[0]");
2033 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2035 ss <<
" if (lidx == 0)\n";
2036 ss <<
" result[writePos] = current_result;\n";
2041 std::string name = Base::GetName();
2043 ss <<
"__kernel void " << name <<
"_sum";
2044 ss <<
"_reduction(__global double* A, "
2045 "__global double *result,int arrayLength,int windowSize){\n";
2046 ss <<
" double tmp, current_result =" <<
2049 ss <<
" int writePos = get_group_id(1);\n";
2050 ss <<
" int lidx = get_local_id(0);\n";
2051 ss <<
" __local double shm_buf[256];\n";
2053 ss <<
" int offset = 0;\n";
2055 ss <<
" int offset = get_group_id(1);\n";
2057 ss <<
" int end = windowSize;\n";
2059 ss <<
" int end = offset + windowSize;\n";
2061 ss <<
" int end = windowSize + get_group_id(1);\n";
2063 ss <<
" int end = windowSize;\n";
2064 ss <<
" end = min(end, arrayLength);\n";
2065 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2066 ss <<
" int loop = arrayLength/512 + 1;\n";
2067 ss <<
" for (int l=0; l<loop; l++){\n";
2068 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
2069 ss <<
" int loopOffset = l*512;\n";
2070 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
2071 ss <<
" tmp = legalize(";
2072 ss <<
"(A[loopOffset + lidx + offset]+ tmp)";
2074 ss <<
" tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
2076 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
2077 ss <<
" tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
2079 ss <<
" shm_buf[lidx] = tmp;\n";
2080 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2081 ss <<
" for (int i = 128; i >0; i/=2) {\n";
2082 ss <<
" if (lidx < i)\n";
2083 ss <<
" shm_buf[lidx] = ";
2084 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
2085 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2087 ss <<
" if (lidx == 0)\n";
2088 ss <<
" current_result =";
2089 ss <<
"current_result + shm_buf[0]";
2091 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2093 ss <<
" if (lidx == 0)\n";
2094 ss <<
" result[writePos] = current_result;\n";
2097 ss <<
"__kernel void " << name <<
"_count";
2098 ss <<
"_reduction(__global double* A, "
2099 "__global double *result,int arrayLength,int windowSize){\n";
2100 ss <<
" double tmp, current_result =" <<
2103 ss <<
" int writePos = get_group_id(1);\n";
2104 ss <<
" int lidx = get_local_id(0);\n";
2105 ss <<
" __local double shm_buf[256];\n";
2107 ss <<
" int offset = 0;\n";
2109 ss <<
" int offset = get_group_id(1);\n";
2111 ss <<
" int end = windowSize;\n";
2113 ss <<
" int end = offset + windowSize;\n";
2115 ss <<
" int end = windowSize + get_group_id(1);\n";
2117 ss <<
" int end = windowSize;\n";
2118 ss <<
" end = min(end, arrayLength);\n";
2119 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2120 ss <<
" int loop = arrayLength/512 + 1;\n";
2121 ss <<
" for (int l=0; l<loop; l++){\n";
2122 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
2123 ss <<
" int loopOffset = l*512;\n";
2124 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
2125 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2127 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
2129 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
2130 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2132 ss <<
" shm_buf[lidx] = tmp;\n";
2133 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2134 ss <<
" for (int i = 128; i >0; i/=2) {\n";
2135 ss <<
" if (lidx < i)\n";
2136 ss <<
" shm_buf[lidx] = ";
2137 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
2138 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2140 ss <<
" if (lidx == 0)\n";
2141 ss <<
" current_result =";
2142 ss <<
"current_result + shm_buf[0];";
2144 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2146 ss <<
" if (lidx == 0)\n";
2147 ss <<
" result[writePos] = current_result;\n";
2153 template<
class Base>
2154 size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
2155 std::stringstream& ss,
int nResultSize,
bool& needBody )
2159 std::string temp = Base::GetName() +
"[gid0]";
2162 if (dynamic_cast<OpAverage*>(
mpCodeGen.get()))
2164 ss <<
mpCodeGen->Gen2(temp,
"tmp") <<
";\n";
2165 ss <<
"nCount = nCount-1;\n";
2166 ss <<
"nCount = nCount +";
2167 ss << Base::GetName() <<
"[gid0+" << nResultSize <<
"]" <<
";\n";
2169 else if (dynamic_cast<OpCount*>(
mpCodeGen.get()))
2170 ss << temp <<
"+ tmp";
2175 return nCurWindowSize;
2178 template<
class Base>
2179 size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k,
int argno,
int w, cl_program
mpProgram )
2181 assert(Base::mpClmem ==
nullptr);
2191 throw Unhandled(__FILE__, __LINE__);
2192 double* pHostBuffer =
const_cast<double*
>(
2194 size_t szHostBuffer = nInput *
sizeof(double);
2195 Base::mpClmem = clCreateBuffer(kEnv.
mpkContext,
2196 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
2199 SAL_INFO(
"sc.opencl",
"Created buffer " << Base::mpClmem <<
" size " << nInput <<
"*" <<
sizeof(
double) <<
"=" << szHostBuffer <<
" using host buffer " << pHostBuffer);
2202 CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
2203 sizeof(
double) * w,
nullptr,
nullptr);
2204 if (CL_SUCCESS != err)
2205 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2206 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem2 <<
" size " <<
sizeof(
double) <<
"*" << w <<
"=" << (
sizeof(
double)*w));
2209 std::string kernelName;
2210 if (!dynamic_cast<OpAverage*>(
mpCodeGen.get()))
2211 kernelName = Base::GetName() +
"_reduction";
2213 kernelName = Base::GetName() +
"_sum_reduction";
2214 cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2215 if (err != CL_SUCCESS)
2216 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2217 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << mpProgram);
2221 cl_mem buf = Base::GetCLBuffer();
2222 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 0 <<
": cl_mem: " << buf);
2223 err = clSetKernelArg(redKernel, 0,
sizeof(cl_mem),
2224 static_cast<void*>(&buf));
2225 if (CL_SUCCESS != err)
2226 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2228 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 1 <<
": cl_mem: " <<
mpClmem2);
2229 err = clSetKernelArg(redKernel, 1,
sizeof(cl_mem), &
mpClmem2);
2230 if (CL_SUCCESS != err)
2231 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2233 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 2 <<
": cl_int: " << nInput);
2234 err = clSetKernelArg(redKernel, 2,
sizeof(cl_int), static_cast<void*>(&nInput));
2235 if (CL_SUCCESS != err)
2236 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2238 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 3 <<
": cl_int: " << nCurWindowSize);
2239 err = clSetKernelArg(redKernel, 3,
sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2240 if (CL_SUCCESS != err)
2241 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2244 size_t global_work_size[] = { 256,
static_cast<size_t>(w) };
2245 size_t const local_work_size[] = { 256, 1 };
2246 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2247 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2248 global_work_size, local_work_size, 0,
nullptr,
nullptr);
2249 if (CL_SUCCESS != err)
2250 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2252 if (CL_SUCCESS != err)
2253 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2254 if (dynamic_cast<OpAverage*>(
mpCodeGen.get()))
2257 std::unique_ptr<double[]> pAllBuffer(
new double[2 * w]);
2258 double* resbuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
2260 CL_TRUE, CL_MAP_READ, 0,
2261 sizeof(
double) * w, 0,
nullptr,
nullptr,
2263 if (err != CL_SUCCESS)
2264 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
2266 for (
int i = 0; i < w; i++)
2267 pAllBuffer[i] = resbuf[i];
2269 if (err != CL_SUCCESS)
2270 throw OpenCLError(
"clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
2272 kernelName = Base::GetName() +
"_count_reduction";
2273 redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2274 if (err != CL_SUCCESS)
2275 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2276 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << mpProgram);
2279 buf = Base::GetCLBuffer();
2280 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 0 <<
": cl_mem: " << buf);
2281 err = clSetKernelArg(redKernel, 0,
sizeof(cl_mem),
2282 static_cast<void*>(&buf));
2283 if (CL_SUCCESS != err)
2284 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2286 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 1 <<
": cl_mem: " <<
mpClmem2);
2287 err = clSetKernelArg(redKernel, 1,
sizeof(cl_mem), &
mpClmem2);
2288 if (CL_SUCCESS != err)
2289 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2291 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 2 <<
": cl_int: " << nInput);
2292 err = clSetKernelArg(redKernel, 2,
sizeof(cl_int), static_cast<void*>(&nInput));
2293 if (CL_SUCCESS != err)
2294 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2296 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 3 <<
": cl_int: " << nCurWindowSize);
2297 err = clSetKernelArg(redKernel, 3,
sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2298 if (CL_SUCCESS != err)
2299 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2302 size_t global_work_size1[] = { 256,
static_cast<size_t>(w) };
2303 size_t const local_work_size1[] = { 256, 1 };
2304 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2305 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2306 global_work_size1, local_work_size1, 0,
nullptr,
nullptr);
2307 if (CL_SUCCESS != err)
2308 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2310 if (CL_SUCCESS != err)
2311 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2312 resbuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
2314 CL_TRUE, CL_MAP_READ, 0,
2315 sizeof(
double) * w, 0,
nullptr,
nullptr,
2317 if (err != CL_SUCCESS)
2318 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
2319 for (
int i = 0; i < w; i++)
2320 pAllBuffer[i + w] = resbuf[i];
2323 if (CL_SUCCESS != err)
2327 err = clReleaseMemObject(
mpClmem2);
2332 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
2333 w *
sizeof(
double) * 2, pAllBuffer.get(), &err);
2334 if (CL_SUCCESS != err)
2335 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2336 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem2 <<
" size " << w <<
"*" <<
sizeof(
double) <<
"=" << (w*
sizeof(
double)) <<
" copying host buffer " << pAllBuffer.get());
2339 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " <<
mpClmem2);
2340 err = clSetKernelArg(k, argno,
sizeof(cl_mem), &
mpClmem2);
2341 if (CL_SUCCESS != err)
2342 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2348 explicit SumIfsArgs(cl_mem x) :
mCLMem(x),
mConst(0.0) { }
2349 explicit SumIfsArgs(
double x) :
mCLMem(nullptr),
mConst(x) { }
2355 class DynamicKernelSoPArguments :
public DynamicKernelArgument
2358 typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
2362 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize );
2365 virtual size_t Marshal( cl_kernel k,
int argno,
int nVectorWidth, cl_program pProgram )
override
2371 i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram);
2373 if (dynamic_cast<OpGeoMean*>(
mpCodeGen.get()))
2380 std::vector<cl_mem> vclmem;
2381 for (
const auto& rxSubArgument : mvSubArguments)
2383 if (VectorRef*
VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2384 vclmem.push_back(
VR->GetCLBuffer());
2386 vclmem.push_back(
nullptr);
2388 pClmem2 = clCreateBuffer(kEnv.
mpkContext, CL_MEM_READ_WRITE,
2389 sizeof(
double) * nVectorWidth,
nullptr, &err);
2390 if (CL_SUCCESS != err)
2391 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2392 SAL_INFO(
"sc.opencl",
"Created buffer " << pClmem2 <<
" size " <<
sizeof(
double) <<
"*" << nVectorWidth <<
"=" << (
sizeof(
double)*nVectorWidth));
2394 std::string kernelName =
"GeoMean_reduction";
2395 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2396 if (err != CL_SUCCESS)
2397 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2398 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << pProgram);
2401 for (
size_t j = 0; j < vclmem.size(); j++)
2403 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << j <<
": " << (vclmem[j] ?
"cl_mem" :
"double") <<
": " << vclmem[j]);
2404 err = clSetKernelArg(redKernel, j,
2405 vclmem[j] ?
sizeof(cl_mem) :
sizeof(
double),
2406 static_cast<void*>(&vclmem[j]));
2407 if (CL_SUCCESS != err)
2408 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2410 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << vclmem.size() <<
": cl_mem: " << pClmem2);
2411 err = clSetKernelArg(redKernel, vclmem.size(),
sizeof(cl_mem), static_cast<void*>(&pClmem2));
2412 if (CL_SUCCESS != err)
2413 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2416 size_t global_work_size[] = { 256,
static_cast<size_t>(nVectorWidth) };
2417 size_t const local_work_size[] = { 256, 1 };
2418 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2419 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2420 global_work_size, local_work_size, 0,
nullptr,
nullptr);
2421 if (CL_SUCCESS != err)
2422 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2424 if (CL_SUCCESS != err)
2425 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2428 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << pClmem2);
2429 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&pClmem2));
2430 if (CL_SUCCESS != err)
2431 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2433 if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(
mpCodeGen.get()))
2438 DynamicKernelArgument*
Arg = mvSubArguments[0].get();
2439 DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
2440 static_cast<DynamicKernelSlidingArgument<VectorRef>*
>(Arg);
2443 if (OpSumCodeGen->NeedReductionKernel())
2445 size_t nInput = slidingArgPtr->GetArrayLength();
2446 size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
2447 std::vector<SumIfsArgs> vclmem;
2449 for (
const auto& rxSubArgument : mvSubArguments)
2451 if (VectorRef*
VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2452 vclmem.emplace_back(
VR->GetCLBuffer());
2453 else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(rxSubArgument.get()))
2454 vclmem.emplace_back(CA->GetDouble());
2456 vclmem.emplace_back(
nullptr);
2458 mpClmem2 = clCreateBuffer(kEnv.
mpkContext, CL_MEM_READ_WRITE,
2459 sizeof(
double) * nVectorWidth,
nullptr, &err);
2460 if (CL_SUCCESS != err)
2461 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2462 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem2 <<
" size " <<
sizeof(
double) <<
"*" << nVectorWidth <<
"=" << (
sizeof(
double)*nVectorWidth));
2464 std::string kernelName = mvSubArguments[0]->GetName() +
"_SumIfs_reduction";
2465 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2466 if (err != CL_SUCCESS)
2467 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2468 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << pProgram);
2471 for (
size_t j = 0; j < vclmem.size(); j++)
2474 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << j <<
": cl_mem: " << vclmem[j].mCLMem);
2476 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << j <<
": double: " << vclmem[j].
mConst);
2477 err = clSetKernelArg(redKernel, j,
2478 vclmem[j].mCLMem ?
sizeof(cl_mem) :
sizeof(
double),
2479 vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
2480 static_cast<void*>(&vclmem[j].mConst));
2481 if (CL_SUCCESS != err)
2482 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2484 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << vclmem.size() <<
": cl_mem: " <<
mpClmem2);
2485 err = clSetKernelArg(redKernel, vclmem.size(),
sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2486 if (CL_SUCCESS != err)
2487 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2489 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << (vclmem.size() + 1) <<
": cl_int: " << nInput);
2490 err = clSetKernelArg(redKernel, vclmem.size() + 1,
sizeof(cl_int), static_cast<void*>(&nInput));
2491 if (CL_SUCCESS != err)
2492 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2494 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << (vclmem.size() + 2) <<
": cl_int: " << nCurWindowSize);
2495 err = clSetKernelArg(redKernel, vclmem.size() + 2,
sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2496 if (CL_SUCCESS != err)
2497 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2499 size_t global_work_size[] = { 256,
static_cast<size_t>(nVectorWidth) };
2500 size_t const local_work_size[] = { 256, 1 };
2501 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2502 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2503 global_work_size, local_work_size, 0,
nullptr,
nullptr);
2504 if (CL_SUCCESS != err)
2505 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2508 if (CL_SUCCESS != err)
2509 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2511 SAL_INFO(
"sc.opencl",
"Releasing kernel " << redKernel);
2512 err = clReleaseKernel(redKernel);
2516 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << mpClmem2);
2517 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2518 if (CL_SUCCESS != err)
2519 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2525 virtual void GenSlidingWindowFunction( std::stringstream& ss )
override
2528 rArg->GenSlidingWindowFunction(ss);
2529 mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2531 virtual void GenDeclRef( std::stringstream& ss )
const override
2533 for (
size_t i = 0; i < mvSubArguments.size(); i++)
2537 mvSubArguments[i]->GenDeclRef(ss);
2540 virtual void GenDecl( std::stringstream& ss )
const override
2542 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2545 if (it != mvSubArguments.begin())
2551 virtual size_t GetWindowSize()
const override
2553 size_t nCurWindowSize = 0;
2554 for (
const auto & rSubArgument : mvSubArguments)
2556 size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
2557 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2558 nCurChildWindowSize : nCurWindowSize;
2560 return nCurWindowSize;
2564 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
2566 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2569 if (it != mvSubArguments.begin())
2571 (*it)->GenSlidingWindowDecl(ss);
2576 virtual std::string GenSlidingWindowDeclRef(
bool nested =
false )
const override
2578 std::stringstream ss;
2581 ss << mSymName <<
"_" <<
mpCodeGen->BinFuncName() <<
"(";
2582 for (
size_t i = 0; i < mvSubArguments.size(); i++)
2586 mvSubArguments[i]->GenDeclRef(ss);
2592 if (mvSubArguments.size() != 2)
2593 throw Unhandled(__FILE__, __LINE__);
2594 bool bArgument1_NeedNested =
2595 mvSubArguments[0]->GetFormulaToken()->GetType()
2597 bool bArgument2_NeedNested =
2598 mvSubArguments[1]->GetFormulaToken()->GetType()
2602 Gen2(mvSubArguments[0]
2603 ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2605 ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2610 virtual std::string DumpOpName()
const override
2612 std::string t =
"_" +
mpCodeGen->BinFuncName();
2613 for (
const auto & rSubArgument : mvSubArguments)
2614 t += rSubArgument->DumpOpName();
2617 virtual void DumpInlineFun( std::set<std::string>& decls,
2618 std::set<std::string>& funs )
const override
2621 for (
const auto & rSubArgument : mvSubArguments)
2622 rSubArgument->DumpInlineFun(decls, funs);
2624 virtual bool IsEmpty()
const override
2626 for (
const auto & rSubArgument : mvSubArguments)
2627 if( !rSubArgument->IsEmpty())
2631 virtual ~DynamicKernelSoPArguments()
override
2636 err = clReleaseMemObject(
mpClmem2);
2644 std::shared_ptr<SlidingFunctionBase>
mpCodeGen;
2651 const std::string& ts,
const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
2654 return std::make_shared<DynamicKernelSoPArguments>(config, ts, ft, std::move(pCodeGen), nResultSize);
2657 template<
class Base>
2660 std::shared_ptr<SlidingFunctionBase>& pCodeGen,
2665 if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2669 return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(config, s, ft, pCodeGen, index);
2670 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2679 else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2681 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2684 else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2686 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2689 else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2691 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2696 ft->GetFormulaToken());
2699 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2701 return std::make_shared<ParallelReductionVectorRef<Base>>(config, s, ft, pCodeGen, index);
2703 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2706 DynamicKernelSoPArguments::DynamicKernelSoPArguments(
const ScCalcConfig& config,
2707 const std::string& s,
const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize ) :
2710 size_t nChildren = ft->Children.size();
2712 for (
size_t i = 0; i < nChildren; i++)
2716 throw Unhandled(__FILE__, __LINE__);
2719 throw Unhandled(__FILE__, __LINE__);
2721 std::stringstream tmpname;
2722 tmpname << s <<
"_" << i;
2723 std::string ts = tmpname.str();
2741 if( !pCodeGen->canHandleMultiVector())
2742 throw UnhandledToken((
"Function '" + pCodeGen->BinFuncName()
2743 +
"' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
2745 SAL_INFO(
"sc.opencl",
"multi-column DoubleRef");
2760 throw UnhandledToken((
"Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->
GetArrays().size()) +
")").c_str(), __FILE__, __LINE__);
2762 for (
size_t j = 0; j < pDVR->
GetArrays().size(); ++j)
2764 SAL_INFO(
"sc.opencl",
"i=" << i <<
" j=" << j <<
2765 " mpNumericArray=" << pDVR->
GetArrays()[j].mpNumericArray <<
2766 " mpStringArray=" << pDVR->
GetArrays()[j].mpStringArray <<
2767 " allStringsAreNull=" << (AllStringsAreNull(pDVR->
GetArrays()[j].mpStringArray, pDVR->
GetArrayLength())?
"YES":
"NO") <<
2768 " takeNumeric=" << (pCodeGen->takeNumeric()?
"YES":
"NO") <<
2769 " takeString=" << (pCodeGen->takeString()?
"YES":
"NO"));
2771 if (pDVR->
GetArrays()[j].mpNumericArray &&
2772 pCodeGen->takeNumeric() &&
2774 pCodeGen->takeString())
2777 SAL_INFO(
"sc.opencl",
"Numbers and strings");
2779 std::make_shared<DynamicKernelMixedSlidingArgument>(
mCalcConfig,
2782 else if (pDVR->
GetArrays()[j].mpNumericArray &&
2783 pCodeGen->takeNumeric() &&
2789 SAL_INFO(
"sc.opencl",
"Numbers (no strings or strings treated as zero)");
2794 else if (pDVR->
GetArrays()[j].mpNumericArray ==
nullptr &&
2795 pCodeGen->takeNumeric() &&
2801 SAL_INFO(
"sc.opencl",
"Only strings even if want numbers but should be treated as zero");
2806 else if (pDVR->
GetArrays()[j].mpStringArray &&
2807 pCodeGen->takeString())
2810 SAL_INFO(
"sc.opencl",
"Strings only");
2817 pDVR->
GetArrays()[j].mpNumericArray ==
nullptr)
2821 SAL_INFO(
"sc.opencl",
"Only empty cells");
2828 SAL_INFO(
"sc.opencl",
"Unhandled case, rejecting for OpenCL");
2829 throw UnhandledToken((
"Unhandled numbers/strings combination for '"
2830 + pCodeGen->BinFuncName() +
"'").c_str(), __FILE__, __LINE__);
2843 " takeNumeric=" << (pCodeGen->takeNumeric()?
"YES":
"NO") <<
2844 " takeString=" << (pCodeGen->takeString()?
"YES":
"NO"));
2847 pCodeGen->takeNumeric() &&
2849 pCodeGen->takeString())
2852 SAL_INFO(
"sc.opencl",
"Numbers and strings");
2854 std::make_shared<DynamicKernelMixedArgument>(
mCalcConfig,
2855 ts, ft->Children[i]));
2858 pCodeGen->takeNumeric() &&
2864 SAL_INFO(
"sc.opencl",
"Numbers (no strings or strings treated as zero)");
2870 pCodeGen->takeNumeric() &&
2876 SAL_INFO(
"sc.opencl",
"Only strings even if want numbers but should be treated as zero");
2882 pCodeGen->takeString())
2885 SAL_INFO(
"sc.opencl",
"Strings only");
2887 std::make_shared<DynamicKernelStringArgument>(
mCalcConfig,
2888 ts, ft->Children[i]));
2895 SAL_INFO(
"sc.opencl",
"Only empty cells");
2902 SAL_INFO(
"sc.opencl",
"Unhandled case, rejecting for OpenCL");
2903 throw UnhandledToken((
"Unhandled numbers/strings combination for '"
2904 + pCodeGen->BinFuncName() +
"'").c_str(), __FILE__, __LINE__);
2909 SAL_INFO(
"sc.opencl",
"Constant number case");
2911 std::make_shared<DynamicKernelConstantArgument>(
mCalcConfig, ts,
2915 && pCodeGen->takeString())
2917 SAL_INFO(
"sc.opencl",
"Constant string case");
2919 std::make_shared<ConstStringArgument>(
mCalcConfig, ts,
2924 SAL_INFO(
"sc.opencl",
"Unhandled operand, rejecting for OpenCL");
2925 throw UnhandledToken((
"unhandled operand " +
StackVarEnumToString(pChild->
GetType()) +
" for ocPush").c_str(), __FILE__, __LINE__);
3036 ft->Children[i], std::make_shared<OpPDuration>(), nResultSize));
3040 ft->Children[i], std::make_shared<OpSinh>(), nResultSize));
3044 ft->Children[i], std::make_shared<OpAbs>(), nResultSize));
3048 ft->Children[i], std::make_shared<OpPV>(), nResultSize));
3052 ft->Children[i], std::make_shared<OpSin>(), nResultSize));
3056 ft->Children[i], std::make_shared<OpTan>(), nResultSize));
3060 ft->Children[i], std::make_shared<OpTanH>(), nResultSize));
3064 ft->Children[i], std::make_shared<OpStandard>(), nResultSize));
3068 ft->Children[i], std::make_shared<OpWeibull>(), nResultSize));
3076 ft->Children[i], std::make_shared<OpDDB>(), nResultSize));
3080 ft->Children[i], std::make_shared<OpFV>(), nResultSize));
3084 ft->Children[i], std::make_shared<OpSumIfs>(), nResultSize));
3092 ft->Children[i], std::make_shared<OpKurt>(), nResultSize));
3100 ft->Children[i], std::make_shared<OpNormdist>(), nResultSize));
3104 ft->Children[i], std::make_shared<OpArcCos>(), nResultSize));
3108 ft->Children[i], std::make_shared<OpSqrt>(), nResultSize));
3112 ft->Children[i], std::make_shared<OpArcCosHyp>(), nResultSize));
3116 ft->Children[i], std::make_shared<OpNPV>(), nResultSize));
3120 ft->Children[i], std::make_shared<OpNormsdist>(), nResultSize));
3124 ft->Children[i], std::make_shared<OpNorminv>(), nResultSize));
3128 ft->Children[i], std::make_shared<OpNormsinv>(), nResultSize));
3132 ft->Children[i], std::make_shared<OpPermut>(), nResultSize));
3136 ft->Children[i], std::make_shared<OpPermutationA>(), nResultSize));
3140 ft->Children[i], std::make_shared<OpPhi>(), nResultSize));
3144 ft->Children[i], std::make_shared<OpIPMT>(), nResultSize));
3148 ft->Children[i], std::make_shared<OpConfidence>(), nResultSize));
3152 ft->Children[i], std::make_shared<OpIntercept>(), nResultSize));
3156 std::make_shared<OpDB>(), nResultSize));
3160 ft->Children[i], std::make_shared<OpLogInv>(), nResultSize));
3164 ft->Children[i], std::make_shared<OpArcCot>(), nResultSize));
3168 ft->Children[i], std::make_shared<OpCosh>(), nResultSize));
3172 ft->Children[i], std::make_shared<OpCritBinom>(), nResultSize));
3176 ft->Children[i], std::make_shared<OpArcCotHyp>(), nResultSize));
3180 ft->Children[i], std::make_shared<OpArcSin>(), nResultSize));
3184 ft->Children[i], std::make_shared<OpArcSinHyp>(), nResultSize));
3188 ft->Children[i], std::make_shared<OpArcTan>(), nResultSize));
3192 ft->Children[i], std::make_shared<OpArcTanH>(), nResultSize));
3196 ft->Children[i], std::make_shared<OpBitAnd>(), nResultSize));
3200 ft->Children[i], std::make_shared<OpForecast>(), nResultSize));
3204 ft->Children[i], std::make_shared<OpLogNormDist>(), nResultSize));
3212 ft->Children[i], std::make_shared<OpLn>(), nResultSize));
3216 ft->Children[i], std::make_shared<OpRound>(), nResultSize));
3220 ft->Children[i], std::make_shared<OpCot>(), nResultSize));
3224 ft->Children[i], std::make_shared<OpCoth>(), nResultSize));
3228 ft->Children[i], std::make_shared<OpFdist>(), nResultSize));
3232 ft->Children[i], std::make_shared<OpVar>(), nResultSize));
3241 ft->Children[i], std::make_shared<OpPower>(), nResultSize));
3245 ft->Children[i], std::make_shared<OpOdd>(), nResultSize));
3261 ft->Children[i], std::make_shared<OpFloor>(), nResultSize));
3269 ft->Children[i], std::make_shared<OpFTest>(), nResultSize));
3273 ft->Children[i], std::make_shared<OpB>(), nResultSize));
3277 ft->Children[i], std::make_shared<OpBetaDist>(), nResultSize));
3281 ft->Children[i], std::make_shared<OpCscH>(), nResultSize));
3285 ft->Children[i], std::make_shared<OpExp>(), nResultSize));
3289 ft->Children[i], std::make_shared<OpLog10>(), nResultSize));
3293 ft->Children[i], std::make_shared<OpExponDist>(), nResultSize));
3297 ft->Children[i], std::make_shared<OpAverageIfs>(), nResultSize));
3301 ft->Children[i], std::make_shared<OpCountIfs>(), nResultSize));
3305 ft->Children[i], std::make_shared<OpCombinA>(), nResultSize));
3309 ft->Children[i], std::make_shared<OpEven>(), nResultSize));
3313 ft->Children[i], std::make_shared<OpLog>(), nResultSize));
3317 ft->Children[i], std::make_shared<OpMod>(), nResultSize));
3321 ft->Children[i], std::make_shared<OpTrunc>(), nResultSize));
3325 ft->Children[i], std::make_shared<OpSkew>(), nResultSize));
3329 ft->Children[i], std::make_shared<OpArcTan2>(), nResultSize));
3333 ft->Children[i], std::make_shared<OpBitOr>(), nResultSize));
3337 ft->Children[i], std::make_shared<OpBitLshift>(), nResultSize));
3341 ft->Children[i], std::make_shared<OpBitRshift>(), nResultSize));
3345 ft->Children[i], std::make_shared<OpBitXor>(), nResultSize));
3353 ft->Children[i], std::make_shared<OpPoisson>(), nResultSize));
3357 ft->Children[i], std::make_shared<OpSumSQ>(), nResultSize));
3361 ft->Children[i], std::make_shared<OpSkewp>(), nResultSize));
3365 ft->Children[i], std::make_shared<OpBinomdist>(), nResultSize));
3369 ft->Children[i], std::make_shared<OpVarP>(), nResultSize));
3373 ft->Children[i], std::make_shared<OpCeil>(), nResultSize));
3377 ft->Children[i], std::make_shared<OpCombin>(), nResultSize));
3381 ft->Children[i], std::make_shared<OpDevSq>(), nResultSize));
3385 ft->Children[i], std::make_shared<OpStDev>(), nResultSize));
3389 ft->Children[i], std::make_shared<OpSlope>(), nResultSize));
3393 ft->Children[i], std::make_shared<OpSTEYX>(), nResultSize));
3397 ft->Children[i], std::make_shared<OpZTest>(), nResultSize));
3401 std::make_shared<DynamicKernelPiArgument>(
mCalcConfig, ts,
3406 std::make_shared<DynamicKernelRandomArgument>(
mCalcConfig, ts,
3411 ft->Children[i], std::make_shared<OpProduct>(), nResultSize));
3419 ft->Children[i], std::make_shared<OpSumX2MY2>(), nResultSize));
3423 ft->Children[i], std::make_shared<OpSumX2PY2>(), nResultSize));
3431 ft->Children[i], std::make_shared<OpTTest>(), nResultSize));
3435 ft->Children[i], std::make_shared<OpTDist>(), nResultSize));
3443 ft->Children[i], std::make_shared<OpSumXMY2>(), nResultSize));
3447 ft->Children[i], std::make_shared<OpStDevP>(), nResultSize));
3451 ft->Children[i], std::make_shared<OpCovar>(), nResultSize));
3455 ft->Children[i], std::make_shared<OpAnd>(), nResultSize));
3459 ft->Children[i], std::make_shared<OpVLookup>(), nResultSize));
3463 ft->Children[i], std::make_shared<OpOr>(), nResultSize));
3467 ft->Children[i], std::make_shared<OpNot>(), nResultSize));
3471 ft->Children[i], std::make_shared<OpXor>(), nResultSize));
3475 ft->Children[i], std::make_shared<OpDmax>(), nResultSize));
3479 ft->Children[i], std::make_shared<OpDmin>(), nResultSize));
3483 ft->Children[i], std::make_shared<OpDproduct>(), nResultSize));
3487 ft->Children[i], std::make_shared<OpDaverage>(), nResultSize));
3491 ft->Children[i], std::make_shared<OpDstdev>(), nResultSize));
3495 ft->Children[i], std::make_shared<OpDstdevp>(), nResultSize));
3499 ft->Children[i], std::make_shared<OpDsum>(), nResultSize));
3503 ft->Children[i], std::make_shared<OpDvar>(), nResultSize));
3507 ft->Children[i], std::make_shared<OpDvarp>(), nResultSize));
3511 ft->Children[i], std::make_shared<OpAverageIf>(), nResultSize));
3515 ft->Children[i], std::make_shared<OpDcount>(), nResultSize));
3519 ft->Children[i], std::make_shared<OpDcount2>(), nResultSize));
3523 ft->Children[i], std::make_shared<OpDeg>(), nResultSize));
3527 ft->Children[i], std::make_shared<OpRoundUp>(), nResultSize));
3531 ft->Children[i], std::make_shared<OpRoundDown>(), nResultSize));
3535 ft->Children[i], std::make_shared<OpInt>(), nResultSize));
3539 ft->Children[i], std::make_shared<OpRadians>(), nResultSize));
3543 ft->Children[i], std::make_shared<OpCountIf>(), nResultSize));
3547 ft->Children[i], std::make_shared<OpIsEven>(), nResultSize));
3551 ft->Children[i], std::make_shared<OpIsOdd>(), nResultSize));
3555 ft->Children[i], std::make_shared<OpFact>(), nResultSize));
3559 ft->Children[i], std::make_shared<OpMinA>(), nResultSize));
3563 ft->Children[i], std::make_shared<OpCountA>(), nResultSize));
3567 ft->Children[i], std::make_shared<OpMaxA>(), nResultSize));
3571 ft->Children[i], std::make_shared<OpAverageA>(), nResultSize));
3575 ft->Children[i], std::make_shared<OpVarA>(), nResultSize));
3579 ft->Children[i], std::make_shared<OpVarPA>(), nResultSize));
3583 ft->Children[i], std::make_shared<OpStDevA>(), nResultSize));
3587 ft->Children[i], std::make_shared<OpStDevPA>(), nResultSize));
3591 ft->Children[i], std::make_shared<OpSec>(), nResultSize));
3595 ft->Children[i], std::make_shared<OpSecH>(), nResultSize));
3599 ft->Children[i], std::make_shared<OpSumIf>(), nResultSize));
3603 ft->Children[i], std::make_shared<OpNegSub>(), nResultSize));
3607 ft->Children[i], std::make_shared<OpAveDev>(), nResultSize));
3611 ft->Children[i], std::make_shared<OpIf>(), nResultSize));
3614 if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getEffect")
3618 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCumipmt")
3622 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getNominal")
3626 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCumprinc")
3630 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getXnpv")
3634 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getPricemat")
3638 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getReceived")
3642 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getTbilleq")
3646 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getTbillprice")
3650 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getTbillyield")
3654 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getFvschedule")
3662 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getYielddisc")
3666 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getYieldmat")
3670 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getAccrintm")
3674 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupdaybs")
3678 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getDollarde")
3682 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getDollarfr")
3686 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupdays")
3690 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupdaysnc")
3694 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getDisc")
3698 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getIntrate")
3702 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getPrice")
3705 ft->Children[i], std::make_shared<OpPrice>(), nResultSize));
3707 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupnum")
3710 std::make_shared<OpCoupnum>(), nResultSize));
3722 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getAmorlinc")
3725 std::make_shared<OpAmorlinc>(), nResultSize));
3727 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getMduration")
3730 std::make_shared<OpMDuration>(), nResultSize));
3737 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getOddlprice")
3740 ft->Children[i], std::make_shared<OpOddlprice>(), nResultSize));
3742 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getOddlyield")
3745 std::make_shared<OpOddlyield>(), nResultSize));
3747 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getPricedisc")
3750 ft->Children[i], std::make_shared<OpPriceDisc>(), nResultSize));
3752 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCouppcd")
3755 std::make_shared<OpCouppcd>(), nResultSize));
3757 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupncd")
3760 std::make_shared<OpCoupncd>(), nResultSize));
3762 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getAccrint")
3765 std::make_shared<OpAccrint>(), nResultSize));
3767 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getSqrtpi")
3770 std::make_shared<OpSqrtPi>(), nResultSize));
3772 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getConvert")
3775 std::make_shared<OpConvert>(), nResultSize));
3777 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getIseven")
3780 std::make_shared<OpIsEven>(), nResultSize));
3782 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getIsodd")
3785 std::make_shared<OpIsOdd>(), nResultSize));
3787 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getMround")
3790 std::make_shared<OpMROUND>(), nResultSize));
3792 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getQuotient")
3795 std::make_shared<OpQuotient>(), nResultSize));
3797 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getSeriessum")
3800 std::make_shared<OpSeriesSum>(), nResultSize));
3802 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getBesselj")
3805 std::make_shared<OpBesselj>(), nResultSize));
3807 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getGestep")
3810 std::make_shared<OpGestep>(), nResultSize));
3813 throw UnhandledToken(OUString(
"unhandled external " + pChild->
GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
3817 throw UnhandledToken(OUString(
"unhandled opcode "
3819 +
"(" + OUString::number(opc) +
")").toUtf8().getStr(), __FILE__, __LINE__);
3826 class DynamicKernel :
public CompiledFormula
3830 virtual ~DynamicKernel()
override;
3838 std::string
const & GetMD5();
3843 void CreateKernel();
3847 void Launch(
size_t nr );
3849 cl_mem GetResultBuffer()
const {
return mpResClmem; }
3874 mnResultSize(nResultSize) {}
3876 DynamicKernel::~DynamicKernel()
3893 void DynamicKernel::CodeGen()
3898 std::stringstream decl;
3901 decl <<
"#if __OPENCL_VERSION__ < 120\n";
3902 decl <<
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3907 decl <<
"#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3921 mSyms.DumpSlidingWindowFunctions(decl);
3924 decl <<
"(__global double *result";
3928 DK->GenSlidingWindowDecl(decl);
3930 decl <<
") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3931 DK->GenSlidingWindowDeclRef() <<
";\n}\n";
3935 (mKernelSignature[0] ==
'_'
3940 std::string
const & DynamicKernel::GetMD5()
3944 std::stringstream md5s;
3950 RTL_DIGEST_LENGTH_MD5);
3953 md5s << std::hex << static_cast<int>(i);
3961 void DynamicKernel::CreateKernel()
3975 static std::string lastOneKernelHash;
3976 static std::string lastSecondKernelHash;
3977 static cl_program lastOneProgram =
nullptr;
3978 static cl_program lastSecondProgram =
nullptr;
3979 std::string KernelHash = mKernelSignature + GetMD5();
3980 if (lastOneKernelHash == KernelHash && lastOneProgram)
3982 mpProgram = lastOneProgram;
3984 else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3986 mpProgram = lastSecondProgram;
3991 if (lastSecondProgram)
3993 SAL_INFO(
"sc.opencl",
"Releasing program " << lastSecondProgram);
3994 err = clReleaseProgram(lastSecondProgram);
3996 lastSecondProgram =
nullptr;
4006 mpProgram = clCreateProgramWithSource(kEnv.
mpkContext, 1,
4007 &src,
nullptr, &err);
4008 if (err != CL_SUCCESS)
4009 throw OpenCLError(
"clCreateProgramWithSource", err, __FILE__, __LINE__);
4010 SAL_INFO(
"sc.opencl",
"Created program " << mpProgram);
4012 err = clBuildProgram(mpProgram, 1,
4014 if (err != CL_SUCCESS)
4016 #if OSL_DEBUG_LEVEL > 0
4017 if (err == CL_BUILD_PROGRAM_FAILURE)
4019 cl_build_status stat;
4020 cl_int e = clGetProgramBuildInfo(
4022 CL_PROGRAM_BUILD_STATUS,
sizeof(cl_build_status),
4025 e != CL_SUCCESS,
"sc.opencl",
4026 "after CL_BUILD_PROGRAM_FAILURE,"
4027 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
4029 if (e == CL_SUCCESS)
4032 e = clGetProgramBuildInfo(
4034 CL_PROGRAM_BUILD_LOG, 0,
nullptr, &n);
4036 e != CL_SUCCESS || n == 0,
"sc.opencl",
4037 "after CL_BUILD_PROGRAM_FAILURE,"
4038 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
4040 if (e == CL_SUCCESS && n != 0)
4042 std::vector<char>
log(n);
4043 e = clGetProgramBuildInfo(
4045 CL_PROGRAM_BUILD_LOG, n,
log.data(),
nullptr);
4047 e != CL_SUCCESS || n == 0,
"sc.opencl",
4048 "after CL_BUILD_PROGRAM_FAILURE,"
4049 " clGetProgramBuildInfo("
4051 if (e == CL_SUCCESS)
4054 "CL_BUILD_PROGRAM_FAILURE, status " << stat
4055 <<
", log \"" <<
log.data() <<
"\"");
4061 SAL_WARN(
"sc.opencl",
"Program failed to build, aborting.");
4064 throw OpenCLError(
"clBuildProgram", err, __FILE__, __LINE__);
4067 SAL_INFO(
"sc.opencl",
"Built program " << mpProgram);
4071 (mKernelSignature + GetMD5()).c_str());
4073 lastSecondKernelHash = lastOneKernelHash;
4074 lastSecondProgram = lastOneProgram;
4075 lastOneKernelHash = KernelHash;
4078 mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
4079 if (err != CL_SUCCESS)
4080 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
4081 SAL_INFO(
"sc.opencl",
"Created kernel " <<
mpKernel <<
" with name " << kname <<
" in program " << mpProgram);
4084 void DynamicKernel::Launch(
size_t nr )
4092 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
4093 nr *
sizeof(
double),
nullptr, &err);
4094 if (CL_SUCCESS != err)
4095 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
4096 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpResClmem <<
" size " << nr <<
"*" <<
sizeof(
double) <<
"=" << (nr*
sizeof(
double)));
4100 if (CL_SUCCESS != err)
4101 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
4104 size_t global_work_size[] = { nr };
4107 global_work_size,
nullptr, 0,
nullptr,
nullptr);
4108 if (CL_SUCCESS != err)
4109 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
4111 if (CL_SUCCESS != err)
4112 throw OpenCLError(
"clFlush", err, __FILE__, __LINE__);
4118 template <
typename T>
4119 const DynamicKernelArgument* SymbolTable::DeclRefArg(
const ScCalcConfig& config,
4121 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize)
4124 ArgumentMap::iterator it =
mSymbols.find(ref);
4128 std::stringstream ss;
4133 return new_arg.get();
4137 return it->second.get();
4141 FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() {}
4143 FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
4150 std::shared_ptr<DynamicKernel> DynamicKernel::create(
const ScCalcConfig& rConfig,
const ScTokenArray& rCode,
int nResultSize )
4154 std::vector<FormulaToken*> aTokenVector;
4155 std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
4157 while ((pCur = const_cast<FormulaToken*>(aCode.Next())) !=
nullptr)
4166 if( aTokenVector.empty())
4169 aTokenVector.pop_back();
4172 if (aHashMap.find(pTempFormula) == aHashMap.end())
4174 pCurNode->Children.push_back(aHashMap[pTempFormula]);
4179 std::make_shared<FormulaTreeNode>(pTempFormula);
4180 pCurNode->Children.push_back(pChildTreeNode);
4183 std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
4184 aHashMap[pCur] = pCurNode;
4186 aTokenVector.push_back(pCur);
4190 Root->Children.push_back(aHashMap[aTokenVector.back()]);
4192 auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
4197 pDynamicKernel->CodeGen();
4198 pDynamicKernel->CreateKernel();
4200 catch (
const UnhandledToken& ut)
4202 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: UnhandledToken: " << ut.mMessage <<
" at " << ut.mFile <<
":" << ut.mLineNumber);
4205 catch (
const InvalidParameterCount& ipc)
4207 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
4208 <<
" at " << ipc.mFile <<
":" << ipc.mLineNumber);
4211 catch (
const OpenCLError& oce)
4215 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: OpenCLError from " << oce.mFunction <<
": " <<
openclwrapper::errorString(oce.mError) <<
" at " << oce.mFile <<
":" << oce.mLineNumber);
4222 catch (
const Unhandled& uh)
4224 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: Unhandled at " << uh.mFile <<
":" << uh.mLineNumber);
4234 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: unexpected exception");
4238 return pDynamicKernel;
4243 class CLInterpreterResult
4253 CLInterpreterResult() : mpKernel(nullptr), mnGroupLength(0), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4254 CLInterpreterResult( DynamicKernel* pKernel,
SCROW nGroupLength ) :
4255 mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4257 bool isValid()
const {
return mpKernel !=
nullptr; }
4259 void fetchResultFromKernel()
4267 mpCLResBuf = mpKernel->GetResultBuffer();
4273 mpResBuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
4275 CL_TRUE, CL_MAP_READ, 0,
4276 mnGroupLength *
sizeof(
double), 0,
nullptr,
nullptr,
4279 if (err != CL_SUCCESS)
4285 SAL_INFO(
"sc.opencl",
"Kernel results: cl_mem: " << mpResBuf <<
" (" << DebugPeekDoubles(mpResBuf, mnGroupLength) <<
")");
4301 err = clEnqueueUnmapMemObject(kEnv.
mpkCmdQueue, mpCLResBuf, mpResBuf, 0,
nullptr,
nullptr);
4303 if (err != CL_SUCCESS)
4313 class CLInterpreterContext
4321 explicit CLInterpreterContext(
SCROW nGroupLength)
4323 , mnGroupLength(nGroupLength) {}
4325 bool isValid()
const
4327 return mpKernel !=
nullptr;
4330 void setManagedKernel( std::shared_ptr<DynamicKernel> pKernel )
4332 mpKernelStore = std::move(pKernel);
4333 mpKernel = mpKernelStore.get();
4336 CLInterpreterResult launchKernel()
4339 return CLInterpreterResult();
4344 mpKernel->Launch(mnGroupLength);
4346 catch (
const UnhandledToken& ut)
4348 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: UnhandledToken: " << ut.mMessage <<
" at " << ut.mFile <<
":" << ut.mLineNumber);
4350 return CLInterpreterResult();
4352 catch (
const OpenCLError& oce)
4354 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: OpenCLError from " << oce.mFunction <<
": " <<
openclwrapper::errorString(oce.mError) <<
" at " << oce.mFile <<
":" << oce.mLineNumber);
4356 return CLInterpreterResult();
4358 catch (
const Unhandled& uh)
4360 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: Unhandled at " << uh.mFile <<
":" << uh.mLineNumber);
4362 return CLInterpreterResult();
4366 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: unexpected exception");
4368 return CLInterpreterResult();
4371 return CLInterpreterResult(mpKernel, mnGroupLength);
4376 CLInterpreterContext createCLInterpreterContext(
const ScCalcConfig& rConfig,
4379 CLInterpreterContext aCxt(xGroup->mnLength);
4381 aCxt.setManagedKernel(DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
4391 aComp.CompileTokenArray();
4394 bool waitForResults()
4401 if (err != CL_SUCCESS)
4404 return err == CL_SUCCESS;
4413 MergeCalcConfig(rDoc);
4415 genRPNTokens(rDoc, rTopPos, rCode);
4420 CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
4421 if (!aCxt.isValid())
4424 CLInterpreterResult aRes = aCxt.launchKernel();
4425 if (!aRes.isValid())
4428 if (!waitForResults())
4431 aRes.fetchResultFromKernel();
4433 return aRes.pushResultToDocument(rDoc, rTopPos);
Matrix data type that can store values of mixed types.
bool generatBinFromKernelSource(cl_program program, const char *clFileName)
::boost::intrusive_ptr< ScFormulaCellGroup > ScFormulaCellGroupRef
sal_uInt64 kernelFailures
OUString getString() const
std::shared_ptr< DynamicKernelArgument > DynamicKernelArgumentRef
void SC_DLLPUBLIC SetFormulaResults(const ScAddress &rTopPos, const double *pResults, size_t nLen)
Set an array of numerical formula results to a group of contiguous formula cells. ...
cl_program mpArryPrograms[MAX_CLFILE_NUM]
void setKernelEnv(KernelEnv *envInfo)
std::shared_ptr< FormulaTreeNode > FormulaTreeNodeRef
SC_DLLPUBLIC formula::FormulaGrammar::Grammar GetGrammar() const
(Partially) abstract base class for an operand
::boost::intrusive_ptr< ScMatrix > ScMatrixRef
StringConversion meStringConversion
Reference< deployment::XPackageRegistry > create(Reference< deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, Reference< XComponentContext > const &xComponentContext)
static DynamicKernelArgumentRef SoPHelper(const ScCalcConfig &config, const std::string &ts, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > pCodeGen, int nResultSize)
Arguments that are actually compile-time constant string Currently, only the hash is passed...
::boost::spirit::classic::rule< ScannerT > argument
exports com.sun.star.chart2. data
bool buildProgramFromBinary(const char *buildOption, GPUEnv *gpuInfo, const char *filename, int idx)
Configuration options for formula interpreter.
static std::shared_ptr< DynamicKernelArgument > VectorRefFactory(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > &pCodeGen, int index)
#define SAL_WARN_IF(condition, area, stream)
#define SAL_INFO(area, stream)
cl_command_queue mpkCmdQueue
int uniform_int_distribution(int a, int b)
#define SAL_WARN(area, stream)
const char * errorString(cl_int nError)