12 #include <document.hxx>
14 #include <tokenarray.hxx>
15 #include <compiler.hxx>
20 #include <rtl/math.hxx>
34 #include <com/sun/star/sheet/FormulaLanguage.hpp>
41 #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
42 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
46 "#define IllegalArgument 502\n"
47 "#define IllegalFPOperation 503 // #NUM!\n"
48 "#define NoValue 519 // #VALUE!\n"
49 "#define NoConvergence 523\n"
50 "#define DivisionByZero 532 // #DIV/0!\n"
51 "#define NOTAVAILABLE 0x7fff // #N/A\n"
53 "double CreateDoubleError(ulong nErr)\n"
59 " return as_double(0x7FF8000000000000+nErr);\n"
63 "uint GetDoubleErrorValue(double fVal)\n"
65 " if (isfinite(fVal))\n"
68 " return IllegalFPOperation; // normal INF\n"
69 " if (as_ulong(fVal) & 0XFFFF0000u)\n"
70 " return NoValue; // just a normal NAN\n"
71 " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
74 "double fsum_count(double a, double b, __private int *p) {\n"
75 " bool t = isnan(a);\n"
79 "double fmin_count(double a, double b, __private int *p) {\n"
80 " double result = fmin(a, b);\n"
81 " bool t = isnan(result);\n"
85 "double fmax_count(double a, double b, __private int *p) {\n"
86 " double result = fmax(a, b);\n"
87 " bool t = isnan(result);\n"
91 "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
92 "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
93 "double fsub(double a, double b) { return a-b; }\n"
94 "double fdiv(double a, double b) { return a/b; }\n"
95 "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
96 "int is_representable_integer(double a) {\n"
97 " long kMaxInt = (1L << 53) - 1;\n"
98 " if (a <= as_double(kMaxInt))\n"
100 " long nInt = as_long(a);\n"
102 " return (nInt <= kMaxInt &&\n"
103 " (!((fInt = as_double(nInt)) < a) && !(fInt > a)));\n"
107 "int approx_equal(double a, double b) {\n"
108 " double e48 = 1.0 / (16777216.0 * 16777216.0);\n"
109 " double e44 = e48 * 16.0;\n"
112 " if (a == 0.0 || b == 0.0)\n"
114 " double d = fabs(a - b);\n"
115 " if (!isfinite(d))\n"
116 " return 0; // Nan or Inf involved\n"
117 " if (d > ((a = fabs(a)) * e44) || d > ((b = fabs(b)) * e44))\n"
119 " if (is_representable_integer(d) && is_representable_integer(a) && is_representable_integer(b))\n"
120 " return 0; // special case for representable integers.\n"
121 " return (d < a * e48 && d < b * e48);\n"
123 "double fsum_approx(double a, double b) {\n"
124 " if ( ((a < 0.0 && b > 0.0) || (b < 0.0 && a > 0.0))\n"
125 " && approx_equal( a, -b ) )\n"
129 "double fsub_approx(double a, double b) {\n"
130 " if ( ((a < 0.0 && b < 0.0) || (a > 0.0 && b > 0.0)) && approx_equal( a, b ) )\n"
141 #include <rtl/digest.h>
151 std::string linenumberify(
const std::string& s)
153 std::stringstream ss;
157 while ((newline = s.find(
'\n', start)) != std::string::npos)
159 ss <<
"/*" << std::setw(4) << linenumber++ <<
"*/ " << s.substr(start, newline-start+1);
162 if (start < s.size())
163 ss <<
"/*" << std::setw(4) << linenumber++ <<
"*/ " << s.substr(start, std::string::npos);
167 bool AllStringsAreNull(
const rtl_uString*
const* pStringArray,
size_t nLength)
169 if (pStringArray ==
nullptr)
172 for (
size_t i = 0;
i < nLength;
i++)
173 if (pStringArray[
i] !=
nullptr)
179 OUString LimitedString(
const OUString& str )
181 if( str.getLength() < 20 )
182 return "\"" + str +
"\"";
184 return OUString::Concat(
"\"") + str.subView( 0, 20 ) +
"\"...";
188 OUString DebugPeekData(
const FormulaToken* ref,
int doubleRefIndex = 0)
194 OUStringBuffer buf =
"SingleRef {";
195 for(
size_t i = 0; i < std::min< size_t >( 4, pSVR->
GetArrayLength()); ++
i )
205 buf.append(
",..." );
207 return buf.makeStringAndClear();
213 OUStringBuffer buf =
"DoubleRef {";
214 for(
size_t i = 0; i < std::min< size_t >( 4, pDVR->
GetArrayLength()); ++
i )
218 if( pDVR->
GetArrays()[doubleRefIndex].mpNumericArray != nullptr )
219 buf.append( pDVR->
GetArrays()[doubleRefIndex].mpNumericArray[
i ] );
220 else if( pDVR->
GetArrays()[doubleRefIndex].mpStringArray != nullptr )
221 buf.append( LimitedString( OUString( pDVR->
GetArrays()[doubleRefIndex].mpStringArray[
i ] )));
224 buf.append(
",..." );
226 return buf.makeStringAndClear();
234 return OUString::number(ref->
GetDouble());
243 OUString DebugPeekDoubles(
const double*
data,
int size)
245 OUStringBuffer buf =
"{";
246 for(
int i = 0;
i < std::min( 4, size ); ++
i )
250 buf.append( data[
i ] );
253 buf.append(
",..." );
255 return buf.makeStringAndClear();
261 size_t VectorRef::Marshal( cl_kernel k,
int argno,
int, cl_program )
265 double* pHostBuffer =
nullptr;
266 size_t szHostBuffer = 0;
284 pHostBuffer =
const_cast<double*
>(
299 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
302 if (CL_SUCCESS != err)
303 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
304 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer <<
" using host buffer " << pHostBuffer);
308 if (szHostBuffer == 0)
309 szHostBuffer =
sizeof(double);
312 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
313 szHostBuffer,
nullptr, &err);
314 if (CL_SUCCESS != err)
315 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
316 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer);
318 double* pNanBuffer =
static_cast<double*
>(clEnqueueMapBuffer(
319 kEnv.
mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
320 szHostBuffer, 0,
nullptr,
nullptr, &err));
321 if (CL_SUCCESS != err)
322 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
324 for (
size_t i = 0;
i < szHostBuffer /
sizeof(double);
i++)
325 rtl::math::setNan(&pNanBuffer[
i]);
326 err = clEnqueueUnmapMemObject(kEnv.
mpkCmdQueue, mpClmem,
327 pNanBuffer, 0,
nullptr,
nullptr);
329 if (CL_SUCCESS != err)
333 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << mpClmem <<
" (" << DebugPeekData(ref,
mnIndex) <<
")");
334 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&mpClmem));
335 if (CL_SUCCESS != err)
336 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
355 ConstStringArgument(
const ScCalcConfig& config,
const std::string& s,
359 virtual void GenDecl( std::stringstream& ss )
const override
361 ss <<
"unsigned " << mSymName;
363 virtual void GenDeclRef( std::stringstream& ss )
const override
365 ss << GenSlidingWindowDeclRef();
367 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
371 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
373 std::stringstream ss;
375 throw Unhandled(__FILE__, __LINE__);
380 virtual size_t GetWindowSize()
const override
385 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
389 cl_uint hashCode = 0;
392 throw Unhandled(__FILE__, __LINE__);
396 hashCode = s.hashCode();
399 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_uint: " << hashCode <<
"(" << DebugPeekData(ref) <<
")" );
400 cl_int
err = clSetKernelArg(k, argno,
sizeof(cl_uint), static_cast<void*>(&hashCode));
401 if (CL_SUCCESS != err)
402 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
408 class DynamicKernelConstantArgument :
public DynamicKernelArgument
411 DynamicKernelConstantArgument(
const ScCalcConfig& config,
const std::string& s,
413 DynamicKernelArgument(config, s, ft) { }
415 virtual void GenDecl( std::stringstream& ss )
const override
417 ss <<
"double " << mSymName;
419 virtual void GenDeclRef( std::stringstream& ss )
const override
423 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
427 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
430 throw Unhandled(__FILE__, __LINE__);
433 virtual size_t GetWindowSize()
const override
437 double GetDouble()
const
441 throw Unhandled(__FILE__, __LINE__);
445 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
448 double tmp = GetDouble();
450 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": double: " << tmp);
451 cl_int err = clSetKernelArg(k, argno,
sizeof(
double), static_cast<void*>(&tmp));
452 if (CL_SUCCESS != err)
453 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
458 class DynamicKernelPiArgument :
public DynamicKernelArgument
461 DynamicKernelPiArgument(
const ScCalcConfig& config,
const std::string& s,
463 DynamicKernelArgument(config, s, ft) { }
465 virtual void GenDecl( std::stringstream& ss )
const override
467 ss <<
"double " << mSymName;
469 virtual void GenDeclRef( std::stringstream& ss )
const override
471 ss <<
"3.14159265358979";
473 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
477 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
481 virtual size_t GetWindowSize()
const override
486 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
491 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": double: " << tmp <<
" (PI)");
492 cl_int err = clSetKernelArg(k, argno,
sizeof(
double), static_cast<void*>(&tmp));
493 if (CL_SUCCESS != err)
494 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
499 class DynamicKernelRandomArgument :
public DynamicKernelArgument
502 DynamicKernelRandomArgument(
const ScCalcConfig& config,
const std::string& s,
504 DynamicKernelArgument(config, s, ft) { }
506 virtual void GenDecl( std::stringstream& ss )
const override
508 ss <<
"double " << mSymName;
510 virtual void GenDeclRef( std::stringstream& ss )
const override
514 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
516 ss <<
"int " << mSymName;
518 virtual std::string GenSlidingWindowDeclRef(
bool =
false )
const override
520 return mSymName +
"_Random(" + mSymName +
")";
522 virtual void GenSlidingWindowFunction( std::stringstream& ss )
override
532 #ifndef DEFINED_RANDOM123_STUFF\n\
533 #define DEFINED_RANDOM123_STUFF\n\
536 Copyright 2010-2011, D. E. Shaw Research.\n\
537 All rights reserved.\n\
539 Redistribution and use in source and binary forms, with or without\n\
540 modification, are permitted provided that the following conditions are\n\
543 * Redistributions of source code must retain the above copyright\n\
544 notice, this list of conditions, and the following disclaimer.\n\
546 * Redistributions in binary form must reproduce the above copyright\n\
547 notice, this list of conditions, and the following disclaimer in the\n\
548 documentation and/or other materials provided with the distribution.\n\
550 * Neither the name of D. E. Shaw Research nor the names of its\n\
551 contributors may be used to endorse or promote products derived from\n\
552 this software without specific prior written permission.\n\
554 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
555 \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
556 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
557 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
558 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
559 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
560 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
561 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
562 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
563 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
564 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
567 typedef uint uint32_t;\n\
568 struct r123array2x32\n\
572 enum r123_enum_threefry32x2\n\
583 inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
584 __attribute__ ((always_inline));\n\
586 RotL_32 (uint32_t x, unsigned int N)\n\
588 return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
591 typedef struct r123array2x32 threefry2x32_ctr_t;\n\
592 typedef struct r123array2x32 threefry2x32_key_t;\n\
593 typedef struct r123array2x32 threefry2x32_ukey_t;\n\
594 inline threefry2x32_key_t\n\
595 threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
600 inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
601 threefry2x32_ctr_t in,\n\
602 threefry2x32_key_t k)\n\
603 __attribute__ ((always_inline));\n\
604 inline threefry2x32_ctr_t\n\
605 threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
606 threefry2x32_key_t k)\n\
608 threefry2x32_ctr_t X;\n\
609 uint32_t ks[2 + 1];\n\
611 ks[2] = 0x1BD11BDA;\n\
612 for (i = 0; i < 2; i++) {\n\
619 if (Nrounds > 0) {\n\
621 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
624 if (Nrounds > 1) {\n\
626 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
629 if (Nrounds > 2) {\n\
631 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
634 if (Nrounds > 3) {\n\
636 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
639 if (Nrounds > 3) {\n\
644 if (Nrounds > 4) {\n\
646 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
649 if (Nrounds > 5) {\n\
651 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
654 if (Nrounds > 6) {\n\
656 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
659 if (Nrounds > 7) {\n\
661 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
664 if (Nrounds > 7) {\n\
669 if (Nrounds > 8) {\n\
671 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
674 if (Nrounds > 9) {\n\
676 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
679 if (Nrounds > 10) {\n\
681 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
684 if (Nrounds > 11) {\n\
686 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
689 if (Nrounds > 11) {\n\
694 if (Nrounds > 12) {\n\
696 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
699 if (Nrounds > 13) {\n\
701 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
704 if (Nrounds > 14) {\n\
706 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
709 if (Nrounds > 15) {\n\
711 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
714 if (Nrounds > 15) {\n\
719 if (Nrounds > 16) {\n\
721 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
724 if (Nrounds > 17) {\n\
726 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
729 if (Nrounds > 18) {\n\
731 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
734 if (Nrounds > 19) {\n\
736 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
739 if (Nrounds > 19) {\n\
744 if (Nrounds > 20) {\n\
746 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
749 if (Nrounds > 21) {\n\
751 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
754 if (Nrounds > 22) {\n\
756 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
759 if (Nrounds > 23) {\n\
761 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
764 if (Nrounds > 23) {\n\
769 if (Nrounds > 24) {\n\
771 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
774 if (Nrounds > 25) {\n\
776 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
779 if (Nrounds > 26) {\n\
781 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
784 if (Nrounds > 27) {\n\
786 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
789 if (Nrounds > 27) {\n\
794 if (Nrounds > 28) {\n\
796 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
799 if (Nrounds > 29) {\n\
801 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
804 if (Nrounds > 30) {\n\
806 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
809 if (Nrounds > 31) {\n\
811 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
814 if (Nrounds > 31) {\n\
822 enum r123_enum_threefry2x32\n\
823 { threefry2x32_rounds = 20 };\n\
824 inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
825 threefry2x32_key_t k)\n\
826 __attribute__ ((always_inline));\n\
827 inline threefry2x32_ctr_t\n\
828 threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
830 return threefry2x32_R (threefry2x32_rounds, in, k);\n\
835 ss <<
"double " << mSymName <<
"_Random (int seed)\n\
837 unsigned tid = get_global_id(0);\n\
838 threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
839 threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
840 c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
842 const double halffactor = 0.5*factor;\n\
843 return c.v[0] * factor + halffactor;\n\
847 virtual size_t GetWindowSize()
const override
852 virtual size_t Marshal( cl_kernel k,
int argno,
int, cl_program )
override
857 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_int: " << seed <<
"(RANDOM)");
858 cl_int err = clSetKernelArg(k, argno,
sizeof(cl_int), static_cast<void*>(&seed));
859 if (CL_SUCCESS != err)
860 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
866 class DynamicKernelStringArgument :
public VectorRef
869 DynamicKernelStringArgument(
const ScCalcConfig& config,
const std::string& s,
871 VectorRef(config, s, ft,
index) { }
873 virtual void GenSlidingWindowFunction( std::stringstream& )
override { }
875 virtual void GenDecl( std::stringstream& ss )
const override
877 ss <<
"__global unsigned int *" << mSymName;
879 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
881 DynamicKernelStringArgument::GenDecl(ss);
883 virtual size_t Marshal( cl_kernel,
int,
int, cl_program )
override;
889 size_t DynamicKernelStringArgument::Marshal( cl_kernel k,
int argno,
int, cl_program )
913 size_t szHostBuffer = nStrings *
sizeof(cl_int);
914 cl_uint* pHashBuffer =
nullptr;
920 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
921 szHostBuffer,
nullptr, &err);
922 if (CL_SUCCESS != err)
923 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
924 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer);
926 pHashBuffer =
static_cast<cl_uint*
>(clEnqueueMapBuffer(
927 kEnv.
mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
928 szHostBuffer, 0,
nullptr,
nullptr, &err));
929 if (CL_SUCCESS != err)
930 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
932 for (
size_t i = 0;
i < nStrings;
i++)
937 pHashBuffer[
i] = tmp.hashCode();
948 szHostBuffer =
sizeof(cl_int);
951 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
952 szHostBuffer,
nullptr, &err);
953 if (CL_SUCCESS != err)
954 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
955 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem <<
" size " << szHostBuffer);
957 pHashBuffer =
static_cast<cl_uint*
>(clEnqueueMapBuffer(
958 kEnv.
mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
959 szHostBuffer, 0,
nullptr,
nullptr, &err));
960 if (CL_SUCCESS != err)
961 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
963 for (
size_t i = 0;
i < szHostBuffer /
sizeof(cl_int);
i++)
966 err = clEnqueueUnmapMemObject(kEnv.
mpkCmdQueue, mpClmem,
967 pHashBuffer, 0,
nullptr,
nullptr);
968 if (CL_SUCCESS != err)
969 throw OpenCLError(
"clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
971 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << mpClmem <<
" (" << DebugPeekData(ref,mnIndex) <<
")");
972 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&mpClmem));
973 if (CL_SUCCESS != err)
974 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
981 class DynamicKernelMixedArgument :
public VectorRef
984 DynamicKernelMixedArgument(
const ScCalcConfig& config,
const std::string& s,
987 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
989 VectorRef::GenSlidingWindowDecl(ss);
993 virtual void GenSlidingWindowFunction( std::stringstream& )
override { }
995 virtual void GenDecl( std::stringstream& ss )
const override
997 VectorRef::GenDecl(ss);
1001 virtual void GenDeclRef( std::stringstream& ss )
const override
1003 VectorRef::GenDeclRef(ss);
1007 virtual std::string GenSlidingWindowDeclRef(
bool nested )
const override
1009 std::stringstream ss;
1010 ss <<
"(!isnan(" << VectorRef::GenSlidingWindowDeclRef();
1011 ss <<
")?" << VectorRef::GenSlidingWindowDeclRef();
1016 virtual std::string GenDoubleSlidingWindowDeclRef(
bool =
false )
const override
1018 std::stringstream ss;
1019 ss << VectorRef::GenSlidingWindowDeclRef();
1022 virtual std::string GenStringSlidingWindowDeclRef(
bool =
false )
const override
1024 std::stringstream ss;
1028 virtual size_t Marshal( cl_kernel k,
int argno,
int vw, cl_program p )
override
1030 int i = VectorRef::Marshal(k, argno, vw, p);
1042 template<
class Base>
1043 class DynamicKernelSlidingArgument :
public Base
1048 const std::shared_ptr<SlidingFunctionBase>& CodeGen,
int index)
1049 :
Base(config, s, ft, index)
1054 throw Unhandled(__FILE__, __LINE__);
1061 virtual bool NeedParallelReduction()
const
1064 return GetWindowSize() > 100 &&
1065 ((GetStartFixed() && GetEndFixed()) ||
1066 (!GetStartFixed() && !GetEndFixed()));
1069 virtual void GenSlidingWindowFunction( std::stringstream& ) { }
1071 std::string GenSlidingWindowDeclRef(
bool nested =
false )
const
1074 std::stringstream ss;
1078 ss <<
"((i+gid0) <" << nArrayLength <<
"?";
1079 ss << Base::GetName() <<
"[i + gid0]";
1086 ss <<
"(i <" << nArrayLength <<
"?";
1087 ss << Base::GetName() <<
"[i]";
1094 size_t GenReductionLoopHeader(
1095 std::stringstream& ss,
bool& needBody )
1103 ss <<
"for (int i = ";
1105 ss <<
" && i < " << nCurWindowSize <<
"; i++){\n\t\t";
1107 return nCurWindowSize;
1111 ss <<
"for (int i = ";
1113 ss <<
" && i < gid0+" << nCurWindowSize <<
"; i++){\n\t\t";
1115 return nCurWindowSize;
1119 ss <<
"tmpBottom = " <<
mpCodeGen->GetBottom() <<
";\n\t";
1120 ss <<
"{int i;\n\t";
1121 std::stringstream temp1, temp2;
1123 if (nCurWindowSize / outLoopSize != 0)
1125 ss <<
"for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1126 for (
int count = 0;
count < outLoopSize;
count++)
1128 ss <<
"i = outLoop*" << outLoopSize <<
"+" <<
count <<
";\n\t";
1132 temp1 <<
"){\n\t\t";
1133 temp1 <<
"tmp = legalize(";
1134 temp1 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1135 temp1 <<
", tmp);\n\t\t\t";
1143 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1145 ss <<
"i = " << count <<
";\n\t";
1146 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1149 temp2 <<
"){\n\t\t";
1150 temp2 <<
"tmp = legalize(";
1151 temp2 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1152 temp2 <<
", tmp);\n\t\t\t";
1159 return nCurWindowSize;
1165 ss <<
"tmpBottom = " <<
mpCodeGen->GetBottom() <<
";\n\t";
1166 ss <<
"{int i;\n\t";
1167 std::stringstream temp1, temp2;
1169 if (nCurWindowSize / outLoopSize != 0)
1171 ss <<
"for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1172 for (
int count = 0; count < outLoopSize; count++)
1174 ss <<
"i = outLoop*" << outLoopSize <<
"+" << count <<
";\n\t";
1178 temp1 <<
"){\n\t\t";
1179 temp1 <<
"tmp = legalize(";
1180 temp1 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1181 temp1 <<
", tmp);\n\t\t\t";
1189 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1191 ss <<
"i = " << count <<
";\n\t";
1192 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1195 temp2 <<
"){\n\t\t";
1196 temp2 <<
"tmp = legalize(";
1197 temp2 <<
mpCodeGen->Gen2(GenSlidingWindowDeclRef(),
"tmp");
1198 temp2 <<
", tmp);\n\t\t\t";
1205 return nCurWindowSize;
1226 class DynamicKernelMixedSlidingArgument :
public VectorRef
1229 DynamicKernelMixedSlidingArgument(
const ScCalcConfig& config,
const std::string& s,
1232 VectorRef(config, s, ft),
1235 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
1241 virtual void GenSlidingWindowFunction( std::stringstream& )
override { }
1243 virtual void GenDecl( std::stringstream& ss )
const override
1249 virtual void GenDeclRef( std::stringstream& ss )
const override
1255 virtual std::string GenSlidingWindowDeclRef(
bool nested )
const override
1257 std::stringstream ss;
1264 virtual std::string GenDoubleSlidingWindowDeclRef(
bool =
false )
const override
1266 std::stringstream ss;
1270 virtual std::string GenStringSlidingWindowDeclRef(
bool =
false )
const override
1272 std::stringstream ss;
1276 virtual size_t Marshal( cl_kernel k,
int argno,
int vw, cl_program p )
override
1285 DynamicKernelSlidingArgument<DynamicKernelStringArgument>
mStringArgument;
1292 typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1294 SymbolTable() :
mCurId(0) { }
1297 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize);
1299 void DumpSlidingWindowFunctions( std::stringstream& ss )
1301 for (
auto const& argument :
mParams)
1303 argument->GenSlidingWindowFunction(ss);
1309 void Marshal( cl_kernel,
int, cl_program );
1319 void SymbolTable::Marshal( cl_kernel k,
int nVectorWidth, cl_program pProgram )
1324 i +=
argument->Marshal(k, i, nVectorWidth, pProgram);
1332 template<
class Base>
1333 class ParallelReductionVectorRef :
public Base
1336 ParallelReductionVectorRef(
const ScCalcConfig& config,
const std::string& s,
1338 const std::shared_ptr<SlidingFunctionBase>& CodeGen,
int index)
1339 :
Base(config, s, ft, index)
1345 throw Unhandled(__FILE__, __LINE__);
1352 virtual void GenSlidingWindowFunction( std::stringstream& ss );
1354 virtual std::string GenSlidingWindowDeclRef(
bool )
const
1356 std::stringstream ss;
1358 ss << Base::GetName() <<
"[i + gid0]";
1360 ss << Base::GetName() <<
"[i]";
1365 size_t GenReductionLoopHeader(
1366 std::stringstream& ss,
int nResultSize,
bool& needBody );
1368 virtual size_t Marshal( cl_kernel k,
int argno,
int w, cl_program
mpProgram );
1370 ~ParallelReductionVectorRef()
1375 err = clReleaseMemObject(
mpClmem2);
1393 std::shared_ptr<SlidingFunctionBase>
mpCodeGen;
1398 class Reduction :
public SlidingFunctionBase
1402 explicit Reduction(
int nResultSize) : mnResultSize(nResultSize) {}
1404 typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1405 typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1406 typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1408 virtual bool HandleNaNArgument( std::stringstream&,
unsigned, SubArguments& )
const
1413 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1414 const std::string& sSymName, SubArguments& vSubArguments )
override
1416 ss <<
"\ndouble " << sSymName;
1417 ss <<
"_" << BinFuncName() <<
"(";
1418 for (
size_t i = 0; i < vSubArguments.size(); i++)
1422 vSubArguments[i]->GenSlidingWindowDecl(ss);
1425 ss <<
"double tmp = " << GetBottom() <<
";\n";
1426 ss <<
"int gid0 = get_global_id(0);\n";
1427 if (isAverage() || isMinOrMax())
1428 ss <<
"int nCount = 0;\n";
1429 ss <<
"double tmpBottom;\n";
1430 unsigned i = vSubArguments.size();
1433 if (NumericRange* NR =
1434 dynamic_cast<NumericRange*>(vSubArguments[i].
get()))
1437 NR->GenReductionLoopHeader(ss, needBody);
1441 else if (ParallelNumericRange* PNR =
1442 dynamic_cast<ParallelNumericRange*>(vSubArguments[i].
get()))
1445 bool bNeedBody =
false;
1446 PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
1450 else if (StringRange* SR =
1451 dynamic_cast<StringRange*>(vSubArguments[i].
get()))
1455 SR->GenReductionLoopHeader(ss, needBody);
1461 FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1471 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1473 bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
1475 ss <<
"tmpBottom = " << GetBottom() <<
";\n";
1480 ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1482 if (ZeroReturnZero())
1483 ss <<
" return 0;\n";
1487 ss << Gen2(
"tmpBottom",
"tmp") <<
";\n";
1493 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(),
"tmp");
1501 ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(),
"tmp");
1508 " return CreateDoubleError(DivisionByZero);\n";
1509 else if (isMinOrMax())
1515 ss <<
"*pow((double)nCount,-1.0)";
1518 virtual bool isAverage()
const {
return false; }
1519 virtual bool isMinOrMax()
const {
return false; }
1520 virtual bool takeString()
const override {
return false; }
1521 virtual bool takeNumeric()
const override {
return true; }
1525 class Binary :
public SlidingFunctionBase
1528 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1529 const std::string& sSymName, SubArguments& vSubArguments )
override
1531 ss <<
"\ndouble " << sSymName;
1532 ss <<
"_" << BinFuncName() <<
"(";
1533 assert(vSubArguments.size() == 2);
1534 for (
size_t i = 0; i < vSubArguments.size(); i++)
1538 vSubArguments[i]->GenSlidingWindowDecl(ss);
1541 ss <<
"int gid0 = get_global_id(0), i = 0;\n\t";
1542 ss <<
"double tmp = ";
1543 ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(),
1544 vSubArguments[1]->GenSlidingWindowDeclRef()) <<
";\n\t";
1545 ss <<
"return tmp;\n}";
1547 virtual bool takeString()
const override {
return true; }
1548 virtual bool takeNumeric()
const override {
return true; }
1551 class SumOfProduct :
public SlidingFunctionBase
1554 virtual void GenSlidingWindowFunction( std::stringstream& ss,
1555 const std::string& sSymName, SubArguments& vSubArguments )
override
1557 size_t nCurWindowSize = 0;
1560 ss <<
"\ndouble " << sSymName;
1561 ss <<
"_" << BinFuncName() <<
"(";
1562 for (
size_t i = 0; i < vSubArguments.size(); i++)
1566 vSubArguments[i]->GenSlidingWindowDecl(ss);
1567 size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1568 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1569 nCurChildWindowSize : nCurWindowSize;
1570 tmpCur = vSubArguments[i]->GetFormulaToken();
1576 throw Unhandled(__FILE__, __LINE__);
1580 ss <<
" double tmp = 0.0;\n";
1581 ss <<
" int gid0 = get_global_id(0);\n";
1583 ss <<
"\tint i;\n\t";
1584 ss <<
"int currentCount0;\n";
1585 for (
size_t i = 0; i < vSubArguments.size() - 1; i++)
1586 ss <<
"int currentCount" << i + 1 <<
";\n";
1587 std::stringstream temp3, temp4;
1589 if (nCurWindowSize / outLoopSize != 0)
1591 ss <<
"for(int outLoop=0; outLoop<" <<
1592 nCurWindowSize / outLoopSize <<
"; outLoop++){\n\t";
1593 for (
int count = 0; count < outLoopSize; count++)
1595 ss <<
"i = outLoop*" << outLoopSize <<
"+" << count <<
";\n";
1598 for (
size_t i = 0; i < vSubArguments.size(); i++)
1600 tmpCur = vSubArguments[i]->GetFormulaToken();
1606 temp3 <<
" currentCount";
1608 temp3 <<
" =i+gid0+1;\n";
1612 temp3 <<
" currentCount";
1614 temp3 <<
" =i+1;\n";
1619 temp3 <<
"tmp = fsum(";
1620 for (
size_t i = 0; i < vSubArguments.size(); i++)
1624 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1627 temp3 <<
"(currentCount";
1630 if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1635 (vSubArguments[i]->GetFormulaToken());
1637 temp3 <<
")||isnan(" << vSubArguments[i]
1638 ->GenSlidingWindowDeclRef();
1640 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1643 else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1648 (vSubArguments[i]->GetFormulaToken());
1650 temp3 <<
")||isnan(" << vSubArguments[i]
1651 ->GenSlidingWindowDeclRef(
true);
1653 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(
true);
1659 temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(
true);
1661 temp3 <<
", tmp);\n\t";
1668 for (
size_t count = nCurWindowSize / outLoopSize * outLoopSize;
1669 count < nCurWindowSize; count++)
1671 ss <<
"i =" << count <<
";\n";
1672 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1674 for (
size_t i = 0; i < vSubArguments.size(); i++)
1676 tmpCur = vSubArguments[i]->GetFormulaToken();
1682 temp4 <<
" currentCount";
1684 temp4 <<
" =i+gid0+1;\n";
1688 temp4 <<
" currentCount";
1690 temp4 <<
" =i+1;\n";
1695 temp4 <<
"tmp = fsum(";
1696 for (
size_t i = 0; i < vSubArguments.size(); i++)
1700 if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1703 temp4 <<
"(currentCount";
1706 if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1711 (vSubArguments[i]->GetFormulaToken());
1713 temp4 <<
")||isnan(" << vSubArguments[i]
1714 ->GenSlidingWindowDeclRef();
1716 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
1719 else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1724 (vSubArguments[i]->GetFormulaToken());
1726 temp4 <<
")||isnan(" << vSubArguments[i]
1727 ->GenSlidingWindowDeclRef(
true);
1729 temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(
true);
1736 temp4 << vSubArguments[i]
1737 ->GenSlidingWindowDeclRef(
true);
1740 temp4 <<
", tmp);\n\t";
1744 ss <<
"return tmp;\n";
1747 virtual bool takeString()
const override {
return false; }
1748 virtual bool takeNumeric()
const override {
return true; }
1752 class OpNop :
public Reduction
1755 explicit OpNop(
int nResultSize) : Reduction(nResultSize) {}
1757 virtual std::string GetBottom()
override {
return "0"; }
1758 virtual std::string Gen2(
const std::string& lhs,
const std::string& )
const override
1762 virtual std::string BinFuncName()
const override {
return "nop"; }
1765 class OpCount :
public Reduction
1768 explicit OpCount(
int nResultSize) : Reduction(nResultSize) {}
1770 virtual std::string GetBottom()
override {
return "0"; }
1771 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1773 std::stringstream ss;
1774 ss <<
"(isnan(" << lhs <<
")?" << rhs <<
":" << rhs <<
"+1.0)";
1777 virtual std::string BinFuncName()
const override {
return "fcount"; }
1778 virtual bool canHandleMultiVector()
const override {
return true; }
1781 class OpEqual :
public Binary
1784 virtual std::string GetBottom()
override {
return "0"; }
1785 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1787 std::stringstream ss;
1788 ss <<
"strequal(" << lhs <<
"," << rhs <<
")";
1791 virtual std::string BinFuncName()
const override {
return "eq"; }
1794 class OpLessEqual :
public Binary
1797 virtual std::string GetBottom()
override {
return "0"; }
1798 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1800 std::stringstream ss;
1801 ss <<
"(" << lhs <<
"<=" << rhs <<
")";
1804 virtual std::string BinFuncName()
const override {
return "leq"; }
1807 class OpLess :
public Binary
1810 virtual std::string GetBottom()
override {
return "0"; }
1811 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1813 std::stringstream ss;
1814 ss <<
"(" << lhs <<
"<" << rhs <<
")";
1817 virtual std::string BinFuncName()
const override {
return "less"; }
1820 class OpGreater :
public Binary
1823 virtual std::string GetBottom()
override {
return "0"; }
1824 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1826 std::stringstream ss;
1827 ss <<
"(" << lhs <<
">" << rhs <<
")";
1830 virtual std::string BinFuncName()
const override {
return "gt"; }
1833 class OpSum :
public Reduction
1836 explicit OpSum(
int nResultSize) : Reduction(nResultSize) {}
1838 virtual std::string GetBottom()
override {
return "0"; }
1839 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1841 std::stringstream ss;
1842 ss <<
"fsum_approx((" << lhs <<
"),(" << rhs <<
"))";
1845 virtual std::string BinFuncName()
const override {
return "fsum"; }
1847 virtual bool canHandleMultiVector()
const override {
return true; }
1850 class OpAverage :
public Reduction
1853 explicit OpAverage(
int nResultSize) : Reduction(nResultSize) {}
1855 virtual std::string GetBottom()
override {
return "0"; }
1856 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1858 std::stringstream ss;
1859 ss <<
"fsum_count(" << lhs <<
"," << rhs <<
", &nCount)";
1862 virtual std::string BinFuncName()
const override {
return "average"; }
1863 virtual bool isAverage()
const override {
return true; }
1864 virtual bool canHandleMultiVector()
const override {
return true; }
1867 class OpSub :
public Reduction
1870 explicit OpSub(
int nResultSize) : Reduction(nResultSize) {}
1872 virtual std::string GetBottom()
override {
return "0"; }
1873 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1875 return "fsub_approx(" + lhs +
"," + rhs +
")";
1877 virtual std::string BinFuncName()
const override {
return "fsub"; }
1880 class OpMul :
public Reduction
1883 explicit OpMul(
int nResultSize) : Reduction(nResultSize) {}
1885 virtual std::string GetBottom()
override {
return "1"; }
1886 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1888 return lhs +
"*" + rhs;
1890 virtual std::string BinFuncName()
const override {
return "fmul"; }
1891 virtual bool ZeroReturnZero()
override {
return true; }
1895 class OpDiv :
public Reduction
1898 explicit OpDiv(
int nResultSize) : Reduction(nResultSize) {}
1900 virtual std::string GetBottom()
override {
return "1.0"; }
1901 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1903 return "(" + rhs +
"==0 ? CreateDoubleError(DivisionByZero) : (" + lhs +
"/" + rhs +
") )";
1905 virtual std::string BinFuncName()
const override {
return "fdiv"; }
1907 virtual bool HandleNaNArgument( std::stringstream& ss,
unsigned argno, SubArguments& vSubArguments )
const override
1912 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() <<
")) {\n"
1913 " return CreateDoubleError(DivisionByZero);\n"
1917 else if (argno == 0)
1920 "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() <<
") &&\n"
1921 " !(isnan(" << vSubArguments[1]->GenSlidingWindowDeclRef() <<
") || " << vSubArguments[1]->GenSlidingWindowDeclRef() <<
" == 0)) {\n"
1930 class OpMin :
public Reduction
1933 explicit OpMin(
int nResultSize) : Reduction(nResultSize) {}
1935 virtual std::string GetBottom()
override {
return "NAN"; }
1936 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1938 return "fmin_count(" + lhs +
"," + rhs +
", &nCount)";
1940 virtual std::string BinFuncName()
const override {
return "min"; }
1941 virtual bool isMinOrMax()
const override {
return true; }
1942 virtual bool canHandleMultiVector()
const override {
return true; }
1945 class OpMax :
public Reduction
1948 explicit OpMax(
int nResultSize) : Reduction(nResultSize) {}
1950 virtual std::string GetBottom()
override {
return "NAN"; }
1951 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1953 return "fmax_count(" + lhs +
"," + rhs +
", &nCount)";
1955 virtual std::string BinFuncName()
const override {
return "max"; }
1956 virtual bool isMinOrMax()
const override {
return true; }
1957 virtual bool canHandleMultiVector()
const override {
return true; }
1960 class OpSumProduct :
public SumOfProduct
1963 virtual std::string GetBottom()
override {
return "0"; }
1964 virtual std::string Gen2(
const std::string& lhs,
const std::string& rhs )
const override
1966 return lhs +
"*" + rhs;
1968 virtual std::string BinFuncName()
const override {
return "fsop"; }
1971 template<
class Base>
1972 void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
1974 if (!dynamic_cast<OpAverage*>(
mpCodeGen.get()))
1976 std::string
name = Base::GetName();
1977 ss <<
"__kernel void " << name;
1978 ss <<
"_reduction(__global double* A, "
1979 "__global double *result,int arrayLength,int windowSize){\n";
1980 ss <<
" double tmp, current_result =" <<
1983 ss <<
" int writePos = get_group_id(1);\n";
1984 ss <<
" int lidx = get_local_id(0);\n";
1985 ss <<
" __local double shm_buf[256];\n";
1987 ss <<
" int offset = 0;\n";
1989 ss <<
" int offset = get_group_id(1);\n";
1991 ss <<
" int end = windowSize;\n";
1993 ss <<
" int end = offset + windowSize;\n";
1995 ss <<
" int end = windowSize + get_group_id(1);\n";
1997 ss <<
" int end = windowSize;\n";
1998 ss <<
" end = min(end, arrayLength);\n";
2000 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2001 ss <<
" int loop = arrayLength/512 + 1;\n";
2002 ss <<
" for (int l=0; l<loop; l++){\n";
2003 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
2004 ss <<
" int loopOffset = l*512;\n";
2005 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
2006 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
2007 "A[loopOffset + lidx + offset]",
"tmp") <<
", tmp);\n";
2008 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
2009 "A[loopOffset + lidx + offset + 256]",
"tmp") <<
", tmp);\n";
2010 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
2011 ss <<
" tmp = legalize(" <<
mpCodeGen->Gen2(
2012 "A[loopOffset + lidx + offset]",
"tmp") <<
", tmp);\n";
2013 ss <<
" shm_buf[lidx] = tmp;\n";
2014 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2015 ss <<
" for (int i = 128; i >0; i/=2) {\n";
2016 ss <<
" if (lidx < i)\n";
2017 ss <<
" shm_buf[lidx] = ";
2019 if (dynamic_cast<OpCount*>(
mpCodeGen.get()))
2020 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
2022 ss <<
mpCodeGen->Gen2(
"shm_buf[lidx]",
"shm_buf[lidx + i]") <<
";\n";
2023 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2025 ss <<
" if (lidx == 0)\n";
2026 ss <<
" current_result =";
2027 if (dynamic_cast<OpCount*>(
mpCodeGen.get()))
2028 ss <<
"current_result + shm_buf[0]";
2030 ss <<
mpCodeGen->Gen2(
"current_result",
"shm_buf[0]");
2032 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2034 ss <<
" if (lidx == 0)\n";
2035 ss <<
" result[writePos] = current_result;\n";
2040 std::string name = Base::GetName();
2042 ss <<
"__kernel void " << name <<
"_sum";
2043 ss <<
"_reduction(__global double* A, "
2044 "__global double *result,int arrayLength,int windowSize){\n";
2045 ss <<
" double tmp, current_result =" <<
2048 ss <<
" int writePos = get_group_id(1);\n";
2049 ss <<
" int lidx = get_local_id(0);\n";
2050 ss <<
" __local double shm_buf[256];\n";
2052 ss <<
" int offset = 0;\n";
2054 ss <<
" int offset = get_group_id(1);\n";
2056 ss <<
" int end = windowSize;\n";
2058 ss <<
" int end = offset + windowSize;\n";
2060 ss <<
" int end = windowSize + get_group_id(1);\n";
2062 ss <<
" int end = windowSize;\n";
2063 ss <<
" end = min(end, arrayLength);\n";
2064 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2065 ss <<
" int loop = arrayLength/512 + 1;\n";
2066 ss <<
" for (int l=0; l<loop; l++){\n";
2067 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
2068 ss <<
" int loopOffset = l*512;\n";
2069 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
2070 ss <<
" tmp = legalize(";
2071 ss <<
"(A[loopOffset + lidx + offset]+ tmp)";
2073 ss <<
" tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
2075 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
2076 ss <<
" tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
2078 ss <<
" shm_buf[lidx] = tmp;\n";
2079 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2080 ss <<
" for (int i = 128; i >0; i/=2) {\n";
2081 ss <<
" if (lidx < i)\n";
2082 ss <<
" shm_buf[lidx] = ";
2083 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
2084 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2086 ss <<
" if (lidx == 0)\n";
2087 ss <<
" current_result =";
2088 ss <<
"current_result + shm_buf[0]";
2090 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2092 ss <<
" if (lidx == 0)\n";
2093 ss <<
" result[writePos] = current_result;\n";
2096 ss <<
"__kernel void " << name <<
"_count";
2097 ss <<
"_reduction(__global double* A, "
2098 "__global double *result,int arrayLength,int windowSize){\n";
2099 ss <<
" double tmp, current_result =" <<
2102 ss <<
" int writePos = get_group_id(1);\n";
2103 ss <<
" int lidx = get_local_id(0);\n";
2104 ss <<
" __local double shm_buf[256];\n";
2106 ss <<
" int offset = 0;\n";
2108 ss <<
" int offset = get_group_id(1);\n";
2110 ss <<
" int end = windowSize;\n";
2112 ss <<
" int end = offset + windowSize;\n";
2114 ss <<
" int end = windowSize + get_group_id(1);\n";
2116 ss <<
" int end = windowSize;\n";
2117 ss <<
" end = min(end, arrayLength);\n";
2118 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2119 ss <<
" int loop = arrayLength/512 + 1;\n";
2120 ss <<
" for (int l=0; l<loop; l++){\n";
2121 ss <<
" tmp = " <<
mpCodeGen->GetBottom() <<
";\n";
2122 ss <<
" int loopOffset = l*512;\n";
2123 ss <<
" if((loopOffset + lidx + offset + 256) < end) {\n";
2124 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2126 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
2128 ss <<
" } else if ((loopOffset + lidx + offset) < end)\n";
2129 ss <<
" tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2131 ss <<
" shm_buf[lidx] = tmp;\n";
2132 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2133 ss <<
" for (int i = 128; i >0; i/=2) {\n";
2134 ss <<
" if (lidx < i)\n";
2135 ss <<
" shm_buf[lidx] = ";
2136 ss <<
"shm_buf[lidx] + shm_buf[lidx + i];\n";
2137 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2139 ss <<
" if (lidx == 0)\n";
2140 ss <<
" current_result =";
2141 ss <<
"current_result + shm_buf[0];";
2143 ss <<
" barrier(CLK_LOCAL_MEM_FENCE);\n";
2145 ss <<
" if (lidx == 0)\n";
2146 ss <<
" result[writePos] = current_result;\n";
2152 template<
class Base>
2153 size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
2154 std::stringstream& ss,
int nResultSize,
bool& needBody )
2158 std::string temp = Base::GetName() +
"[gid0]";
2161 if (dynamic_cast<OpAverage*>(
mpCodeGen.get()))
2163 ss <<
mpCodeGen->Gen2(temp,
"tmp") <<
";\n";
2164 ss <<
"nCount = nCount-1;\n";
2165 ss <<
"nCount = nCount +";
2166 ss << Base::GetName() <<
"[gid0+" << nResultSize <<
"]" <<
";\n";
2168 else if (dynamic_cast<OpCount*>(
mpCodeGen.get()))
2169 ss << temp <<
"+ tmp";
2174 return nCurWindowSize;
2177 template<
class Base>
2178 size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k,
int argno,
int w, cl_program
mpProgram )
2180 assert(Base::mpClmem ==
nullptr);
2190 throw Unhandled(__FILE__, __LINE__);
2191 double* pHostBuffer =
const_cast<double*
>(
2193 size_t szHostBuffer = nInput *
sizeof(double);
2194 Base::mpClmem = clCreateBuffer(kEnv.
mpkContext,
2195 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
2198 SAL_INFO(
"sc.opencl",
"Created buffer " << Base::mpClmem <<
" size " << nInput <<
"*" <<
sizeof(
double) <<
"=" << szHostBuffer <<
" using host buffer " << pHostBuffer);
2201 CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
2202 sizeof(
double) * w,
nullptr,
nullptr);
2203 if (CL_SUCCESS != err)
2204 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2205 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem2 <<
" size " <<
sizeof(
double) <<
"*" << w <<
"=" << (
sizeof(
double)*w));
2208 std::string kernelName;
2209 if (!dynamic_cast<OpAverage*>(
mpCodeGen.get()))
2210 kernelName = Base::GetName() +
"_reduction";
2212 kernelName = Base::GetName() +
"_sum_reduction";
2213 cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2214 if (err != CL_SUCCESS)
2215 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2216 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << mpProgram);
2220 cl_mem buf = Base::GetCLBuffer();
2221 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 0 <<
": cl_mem: " << buf);
2222 err = clSetKernelArg(redKernel, 0,
sizeof(cl_mem),
2223 static_cast<void*>(&buf));
2224 if (CL_SUCCESS != err)
2225 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2227 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 1 <<
": cl_mem: " <<
mpClmem2);
2228 err = clSetKernelArg(redKernel, 1,
sizeof(cl_mem), &
mpClmem2);
2229 if (CL_SUCCESS != err)
2230 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2232 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 2 <<
": cl_int: " << nInput);
2233 err = clSetKernelArg(redKernel, 2,
sizeof(cl_int), static_cast<void*>(&nInput));
2234 if (CL_SUCCESS != err)
2235 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2237 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 3 <<
": cl_int: " << nCurWindowSize);
2238 err = clSetKernelArg(redKernel, 3,
sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2239 if (CL_SUCCESS != err)
2240 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2243 size_t global_work_size[] = { 256,
static_cast<size_t>(w) };
2244 size_t const local_work_size[] = { 256, 1 };
2245 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2246 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2247 global_work_size, local_work_size, 0,
nullptr,
nullptr);
2248 if (CL_SUCCESS != err)
2249 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2251 if (CL_SUCCESS != err)
2252 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2253 if (dynamic_cast<OpAverage*>(
mpCodeGen.get()))
2256 std::unique_ptr<double[]> pAllBuffer(
new double[2 * w]);
2257 double* resbuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
2259 CL_TRUE, CL_MAP_READ, 0,
2260 sizeof(
double) * w, 0,
nullptr,
nullptr,
2262 if (err != CL_SUCCESS)
2263 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
2265 for (
int i = 0; i < w; i++)
2266 pAllBuffer[i] = resbuf[i];
2268 if (err != CL_SUCCESS)
2269 throw OpenCLError(
"clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
2271 kernelName = Base::GetName() +
"_count_reduction";
2272 redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2273 if (err != CL_SUCCESS)
2274 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2275 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << mpProgram);
2278 buf = Base::GetCLBuffer();
2279 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 0 <<
": cl_mem: " << buf);
2280 err = clSetKernelArg(redKernel, 0,
sizeof(cl_mem),
2281 static_cast<void*>(&buf));
2282 if (CL_SUCCESS != err)
2283 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2285 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 1 <<
": cl_mem: " <<
mpClmem2);
2286 err = clSetKernelArg(redKernel, 1,
sizeof(cl_mem), &
mpClmem2);
2287 if (CL_SUCCESS != err)
2288 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2290 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 2 <<
": cl_int: " << nInput);
2291 err = clSetKernelArg(redKernel, 2,
sizeof(cl_int), static_cast<void*>(&nInput));
2292 if (CL_SUCCESS != err)
2293 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2295 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << 3 <<
": cl_int: " << nCurWindowSize);
2296 err = clSetKernelArg(redKernel, 3,
sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2297 if (CL_SUCCESS != err)
2298 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2301 size_t global_work_size1[] = { 256,
static_cast<size_t>(w) };
2302 size_t const local_work_size1[] = { 256, 1 };
2303 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2304 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2305 global_work_size1, local_work_size1, 0,
nullptr,
nullptr);
2306 if (CL_SUCCESS != err)
2307 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2309 if (CL_SUCCESS != err)
2310 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2311 resbuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
2313 CL_TRUE, CL_MAP_READ, 0,
2314 sizeof(
double) * w, 0,
nullptr,
nullptr,
2316 if (err != CL_SUCCESS)
2317 throw OpenCLError(
"clEnqueueMapBuffer", err, __FILE__, __LINE__);
2318 for (
int i = 0; i < w; i++)
2319 pAllBuffer[i + w] = resbuf[i];
2322 if (CL_SUCCESS != err)
2326 err = clReleaseMemObject(
mpClmem2);
2331 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
2332 w *
sizeof(
double) * 2, pAllBuffer.get(), &err);
2333 if (CL_SUCCESS != err)
2334 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2335 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpClmem2 <<
" size " << w <<
"*" <<
sizeof(
double) <<
"=" << (w*
sizeof(
double)) <<
" copying host buffer " << pAllBuffer.get());
2338 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " <<
mpClmem2);
2339 err = clSetKernelArg(k, argno,
sizeof(cl_mem), &
mpClmem2);
2340 if (CL_SUCCESS != err)
2341 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2347 explicit SumIfsArgs(cl_mem x) :
mCLMem(x),
mConst(0.0) { }
2348 explicit SumIfsArgs(
double x) :
mCLMem(nullptr),
mConst(x) { }
2354 class DynamicKernelSoPArguments :
public DynamicKernelArgument
2357 typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
2361 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize );
2364 virtual size_t Marshal( cl_kernel k,
int argno,
int nVectorWidth, cl_program pProgram )
override
2370 i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram);
2372 if (dynamic_cast<OpGeoMean*>(
mpCodeGen.get()))
2379 std::vector<cl_mem> vclmem;
2380 for (
const auto& rxSubArgument : mvSubArguments)
2382 if (VectorRef*
VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2383 vclmem.push_back(
VR->GetCLBuffer());
2385 vclmem.push_back(
nullptr);
2387 pClmem2 = clCreateBuffer(kEnv.
mpkContext, CL_MEM_READ_WRITE,
2388 sizeof(
double) * nVectorWidth,
nullptr, &err);
2389 if (CL_SUCCESS != err)
2390 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2391 SAL_INFO(
"sc.opencl",
"Created buffer " << pClmem2 <<
" size " <<
sizeof(
double) <<
"*" << nVectorWidth <<
"=" << (
sizeof(
double)*nVectorWidth));
2393 std::string kernelName =
"GeoMean_reduction";
2394 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2395 if (err != CL_SUCCESS)
2396 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2397 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << pProgram);
2400 for (
size_t j = 0; j < vclmem.size(); j++)
2402 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << j <<
": " << (vclmem[j] ?
"cl_mem" :
"double") <<
": " << vclmem[j]);
2403 err = clSetKernelArg(redKernel, j,
2404 vclmem[j] ?
sizeof(cl_mem) :
sizeof(
double),
2405 static_cast<void*>(&vclmem[j]));
2406 if (CL_SUCCESS != err)
2407 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2409 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << vclmem.size() <<
": cl_mem: " << pClmem2);
2410 err = clSetKernelArg(redKernel, vclmem.size(),
sizeof(cl_mem), static_cast<void*>(&pClmem2));
2411 if (CL_SUCCESS != err)
2412 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2415 size_t global_work_size[] = { 256,
static_cast<size_t>(nVectorWidth) };
2416 size_t const local_work_size[] = { 256, 1 };
2417 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2418 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2419 global_work_size, local_work_size, 0,
nullptr,
nullptr);
2420 if (CL_SUCCESS != err)
2421 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2423 if (CL_SUCCESS != err)
2424 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2427 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << pClmem2);
2428 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&pClmem2));
2429 if (CL_SUCCESS != err)
2430 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2432 if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(
mpCodeGen.get()))
2437 DynamicKernelArgument*
Arg = mvSubArguments[0].get();
2438 DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
2439 static_cast<DynamicKernelSlidingArgument<VectorRef>*
>(Arg);
2442 if (OpSumCodeGen->NeedReductionKernel())
2444 size_t nInput = slidingArgPtr->GetArrayLength();
2445 size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
2446 std::vector<SumIfsArgs> vclmem;
2448 for (
const auto& rxSubArgument : mvSubArguments)
2450 if (VectorRef*
VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2451 vclmem.emplace_back(
VR->GetCLBuffer());
2452 else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(rxSubArgument.get()))
2453 vclmem.emplace_back(CA->GetDouble());
2455 vclmem.emplace_back(
nullptr);
2457 mpClmem2 = clCreateBuffer(kEnv.
mpkContext, CL_MEM_READ_WRITE,
2458 sizeof(
double) * nVectorWidth,
nullptr, &err);
2459 if (CL_SUCCESS != err)
2460 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
2461 SAL_INFO(
"sc.opencl",
"Created buffer " << mpClmem2 <<
" size " <<
sizeof(
double) <<
"*" << nVectorWidth <<
"=" << (
sizeof(
double)*nVectorWidth));
2463 std::string kernelName = mvSubArguments[0]->GetName() +
"_SumIfs_reduction";
2464 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2465 if (err != CL_SUCCESS)
2466 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
2467 SAL_INFO(
"sc.opencl",
"Created kernel " << redKernel <<
" with name " << kernelName <<
" in program " << pProgram);
2470 for (
size_t j = 0; j < vclmem.size(); j++)
2473 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << j <<
": cl_mem: " << vclmem[j].mCLMem);
2475 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << j <<
": double: " << vclmem[j].
mConst);
2476 err = clSetKernelArg(redKernel, j,
2477 vclmem[j].mCLMem ?
sizeof(cl_mem) :
sizeof(
double),
2478 vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
2479 static_cast<void*>(&vclmem[j].mConst));
2480 if (CL_SUCCESS != err)
2481 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2483 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << vclmem.size() <<
": cl_mem: " <<
mpClmem2);
2484 err = clSetKernelArg(redKernel, vclmem.size(),
sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2485 if (CL_SUCCESS != err)
2486 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2488 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << (vclmem.size() + 1) <<
": cl_int: " << nInput);
2489 err = clSetKernelArg(redKernel, vclmem.size() + 1,
sizeof(cl_int), static_cast<void*>(&nInput));
2490 if (CL_SUCCESS != err)
2491 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2493 SAL_INFO(
"sc.opencl",
"Kernel " << redKernel <<
" arg " << (vclmem.size() + 2) <<
": cl_int: " << nCurWindowSize);
2494 err = clSetKernelArg(redKernel, vclmem.size() + 2,
sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2495 if (CL_SUCCESS != err)
2496 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2498 size_t global_work_size[] = { 256,
static_cast<size_t>(nVectorWidth) };
2499 size_t const local_work_size[] = { 256, 1 };
2500 SAL_INFO(
"sc.opencl",
"Enqueuing kernel " << redKernel);
2501 err = clEnqueueNDRangeKernel(kEnv.
mpkCmdQueue, redKernel, 2,
nullptr,
2502 global_work_size, local_work_size, 0,
nullptr,
nullptr);
2503 if (CL_SUCCESS != err)
2504 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2507 if (CL_SUCCESS != err)
2508 throw OpenCLError(
"clFinish", err, __FILE__, __LINE__);
2510 SAL_INFO(
"sc.opencl",
"Releasing kernel " << redKernel);
2511 err = clReleaseKernel(redKernel);
2515 SAL_INFO(
"sc.opencl",
"Kernel " << k <<
" arg " << argno <<
": cl_mem: " << mpClmem2);
2516 err = clSetKernelArg(k, argno,
sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2517 if (CL_SUCCESS != err)
2518 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
2524 virtual void GenSlidingWindowFunction( std::stringstream& ss )
override
2527 rArg->GenSlidingWindowFunction(ss);
2528 mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2530 virtual void GenDeclRef( std::stringstream& ss )
const override
2532 for (
size_t i = 0; i < mvSubArguments.size(); i++)
2536 mvSubArguments[i]->GenDeclRef(ss);
2539 virtual void GenDecl( std::stringstream& ss )
const override
2541 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2544 if (it != mvSubArguments.begin())
2550 virtual size_t GetWindowSize()
const override
2552 size_t nCurWindowSize = 0;
2553 for (
const auto & rSubArgument : mvSubArguments)
2555 size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
2556 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2557 nCurChildWindowSize : nCurWindowSize;
2559 return nCurWindowSize;
2563 virtual void GenSlidingWindowDecl( std::stringstream& ss )
const override
2565 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2568 if (it != mvSubArguments.begin())
2570 (*it)->GenSlidingWindowDecl(ss);
2575 virtual std::string GenSlidingWindowDeclRef(
bool nested =
false )
const override
2577 std::stringstream ss;
2580 ss << mSymName <<
"_" <<
mpCodeGen->BinFuncName() <<
"(";
2581 for (
size_t i = 0; i < mvSubArguments.size(); i++)
2585 mvSubArguments[i]->GenDeclRef(ss);
2591 if (mvSubArguments.size() != 2)
2592 throw Unhandled(__FILE__, __LINE__);
2593 bool bArgument1_NeedNested =
2594 mvSubArguments[0]->GetFormulaToken()->GetType()
2596 bool bArgument2_NeedNested =
2597 mvSubArguments[1]->GetFormulaToken()->GetType()
2601 Gen2(mvSubArguments[0]
2602 ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2604 ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2609 virtual std::string DumpOpName()
const override
2611 std::string t =
"_" +
mpCodeGen->BinFuncName();
2612 for (
const auto & rSubArgument : mvSubArguments)
2613 t += rSubArgument->DumpOpName();
2616 virtual void DumpInlineFun( std::set<std::string>& decls,
2617 std::set<std::string>& funs )
const override
2620 for (
const auto & rSubArgument : mvSubArguments)
2621 rSubArgument->DumpInlineFun(decls, funs);
2623 virtual bool IsEmpty()
const override
2625 for (
const auto & rSubArgument : mvSubArguments)
2626 if( !rSubArgument->IsEmpty())
2630 virtual ~DynamicKernelSoPArguments()
override
2635 err = clReleaseMemObject(
mpClmem2);
2643 std::shared_ptr<SlidingFunctionBase>
mpCodeGen;
2650 const std::string& ts,
const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
2653 return std::make_shared<DynamicKernelSoPArguments>(config, ts, ft, std::move(pCodeGen), nResultSize);
2656 template<
class Base>
2659 std::shared_ptr<SlidingFunctionBase>& pCodeGen,
2664 if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2668 return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(config, s, ft, pCodeGen, index);
2669 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2678 else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2680 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2683 else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2685 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2688 else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2690 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2695 ft->GetFormulaToken());
2698 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2700 return std::make_shared<ParallelReductionVectorRef<Base>>(config, s, ft, pCodeGen, index);
2702 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2705 DynamicKernelSoPArguments::DynamicKernelSoPArguments(
const ScCalcConfig& config,
2706 const std::string& s,
const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize ) :
2709 size_t nChildren = ft->Children.size();
2711 for (
size_t i = 0; i < nChildren; i++)
2715 throw Unhandled(__FILE__, __LINE__);
2718 throw Unhandled(__FILE__, __LINE__);
2720 std::stringstream tmpname;
2721 tmpname << s <<
"_" << i;
2722 std::string ts = tmpname.str();
2740 if( !pCodeGen->canHandleMultiVector())
2741 throw UnhandledToken((
"Function '" + pCodeGen->BinFuncName()
2742 +
"' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
2744 SAL_INFO(
"sc.opencl",
"multi-column DoubleRef");
2759 throw UnhandledToken((
"Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->
GetArrays().size()) +
")").c_str(), __FILE__, __LINE__);
2761 for (
size_t j = 0; j < pDVR->
GetArrays().size(); ++j)
2763 SAL_INFO(
"sc.opencl",
"i=" << i <<
" j=" << j <<
2764 " mpNumericArray=" << pDVR->
GetArrays()[j].mpNumericArray <<
2765 " mpStringArray=" << pDVR->
GetArrays()[j].mpStringArray <<
2766 " allStringsAreNull=" << (AllStringsAreNull(pDVR->
GetArrays()[j].mpStringArray, pDVR->
GetArrayLength())?
"YES":
"NO") <<
2767 " takeNumeric=" << (pCodeGen->takeNumeric()?
"YES":
"NO") <<
2768 " takeString=" << (pCodeGen->takeString()?
"YES":
"NO"));
2770 if (pDVR->
GetArrays()[j].mpNumericArray &&
2771 pCodeGen->takeNumeric() &&
2773 pCodeGen->takeString())
2776 SAL_INFO(
"sc.opencl",
"Numbers and strings");
2778 std::make_shared<DynamicKernelMixedSlidingArgument>(
mCalcConfig,
2781 else if (pDVR->
GetArrays()[j].mpNumericArray &&
2782 pCodeGen->takeNumeric() &&
2788 SAL_INFO(
"sc.opencl",
"Numbers (no strings or strings treated as zero)");
2793 else if (pDVR->
GetArrays()[j].mpNumericArray ==
nullptr &&
2794 pCodeGen->takeNumeric() &&
2800 SAL_INFO(
"sc.opencl",
"Only strings even if want numbers but should be treated as zero");
2805 else if (pDVR->
GetArrays()[j].mpStringArray &&
2806 pCodeGen->takeString())
2809 SAL_INFO(
"sc.opencl",
"Strings only");
2816 pDVR->
GetArrays()[j].mpNumericArray ==
nullptr)
2820 SAL_INFO(
"sc.opencl",
"Only empty cells");
2827 SAL_INFO(
"sc.opencl",
"Unhandled case, rejecting for OpenCL");
2828 throw UnhandledToken((
"Unhandled numbers/strings combination for '"
2829 + pCodeGen->BinFuncName() +
"'").c_str(), __FILE__, __LINE__);
2842 " takeNumeric=" << (pCodeGen->takeNumeric()?
"YES":
"NO") <<
2843 " takeString=" << (pCodeGen->takeString()?
"YES":
"NO"));
2846 pCodeGen->takeNumeric() &&
2848 pCodeGen->takeString())
2851 SAL_INFO(
"sc.opencl",
"Numbers and strings");
2853 std::make_shared<DynamicKernelMixedArgument>(
mCalcConfig,
2854 ts, ft->Children[i]));
2857 pCodeGen->takeNumeric() &&
2863 SAL_INFO(
"sc.opencl",
"Numbers (no strings or strings treated as zero)");
2869 pCodeGen->takeNumeric() &&
2875 SAL_INFO(
"sc.opencl",
"Only strings even if want numbers but should be treated as zero");
2881 pCodeGen->takeString())
2884 SAL_INFO(
"sc.opencl",
"Strings only");
2886 std::make_shared<DynamicKernelStringArgument>(
mCalcConfig,
2887 ts, ft->Children[i]));
2894 SAL_INFO(
"sc.opencl",
"Only empty cells");
2901 SAL_INFO(
"sc.opencl",
"Unhandled case, rejecting for OpenCL");
2902 throw UnhandledToken((
"Unhandled numbers/strings combination for '"
2903 + pCodeGen->BinFuncName() +
"'").c_str(), __FILE__, __LINE__);
2908 SAL_INFO(
"sc.opencl",
"Constant number case");
2910 std::make_shared<DynamicKernelConstantArgument>(
mCalcConfig, ts,
2914 && pCodeGen->takeString())
2916 SAL_INFO(
"sc.opencl",
"Constant string case");
2918 std::make_shared<ConstStringArgument>(
mCalcConfig, ts,
2923 SAL_INFO(
"sc.opencl",
"Unhandled operand, rejecting for OpenCL");
2924 throw UnhandledToken((
"unhandled operand " +
StackVarEnumToString(pChild->
GetType()) +
" for ocPush").c_str(), __FILE__, __LINE__);
3035 ft->Children[i], std::make_shared<OpPDuration>(), nResultSize));
3039 ft->Children[i], std::make_shared<OpSinh>(), nResultSize));
3043 ft->Children[i], std::make_shared<OpAbs>(), nResultSize));
3047 ft->Children[i], std::make_shared<OpPV>(), nResultSize));
3051 ft->Children[i], std::make_shared<OpSin>(), nResultSize));
3055 ft->Children[i], std::make_shared<OpTan>(), nResultSize));
3059 ft->Children[i], std::make_shared<OpTanH>(), nResultSize));
3063 ft->Children[i], std::make_shared<OpStandard>(), nResultSize));
3067 ft->Children[i], std::make_shared<OpWeibull>(), nResultSize));
3075 ft->Children[i], std::make_shared<OpDDB>(), nResultSize));
3079 ft->Children[i], std::make_shared<OpFV>(), nResultSize));
3083 ft->Children[i], std::make_shared<OpSumIfs>(), nResultSize));
3091 ft->Children[i], std::make_shared<OpKurt>(), nResultSize));
3099 ft->Children[i], std::make_shared<OpNormdist>(), nResultSize));
3103 ft->Children[i], std::make_shared<OpArcCos>(), nResultSize));
3107 ft->Children[i], std::make_shared<OpSqrt>(), nResultSize));
3111 ft->Children[i], std::make_shared<OpArcCosHyp>(), nResultSize));
3115 ft->Children[i], std::make_shared<OpNPV>(), nResultSize));
3119 ft->Children[i], std::make_shared<OpNormsdist>(), nResultSize));
3123 ft->Children[i], std::make_shared<OpNorminv>(), nResultSize));
3127 ft->Children[i], std::make_shared<OpNormsinv>(), nResultSize));
3131 ft->Children[i], std::make_shared<OpPermut>(), nResultSize));
3135 ft->Children[i], std::make_shared<OpPermutationA>(), nResultSize));
3139 ft->Children[i], std::make_shared<OpPhi>(), nResultSize));
3143 ft->Children[i], std::make_shared<OpIPMT>(), nResultSize));
3147 ft->Children[i], std::make_shared<OpConfidence>(), nResultSize));
3151 ft->Children[i], std::make_shared<OpIntercept>(), nResultSize));
3155 std::make_shared<OpDB>(), nResultSize));
3159 ft->Children[i], std::make_shared<OpLogInv>(), nResultSize));
3163 ft->Children[i], std::make_shared<OpArcCot>(), nResultSize));
3167 ft->Children[i], std::make_shared<OpCosh>(), nResultSize));
3171 ft->Children[i], std::make_shared<OpCritBinom>(), nResultSize));
3175 ft->Children[i], std::make_shared<OpArcCotHyp>(), nResultSize));
3179 ft->Children[i], std::make_shared<OpArcSin>(), nResultSize));
3183 ft->Children[i], std::make_shared<OpArcSinHyp>(), nResultSize));
3187 ft->Children[i], std::make_shared<OpArcTan>(), nResultSize));
3191 ft->Children[i], std::make_shared<OpArcTanH>(), nResultSize));
3195 ft->Children[i], std::make_shared<OpBitAnd>(), nResultSize));
3199 ft->Children[i], std::make_shared<OpForecast>(), nResultSize));
3203 ft->Children[i], std::make_shared<OpLogNormDist>(), nResultSize));
3211 ft->Children[i], std::make_shared<OpLn>(), nResultSize));
3215 ft->Children[i], std::make_shared<OpRound>(), nResultSize));
3219 ft->Children[i], std::make_shared<OpCot>(), nResultSize));
3223 ft->Children[i], std::make_shared<OpCoth>(), nResultSize));
3227 ft->Children[i], std::make_shared<OpFdist>(), nResultSize));
3231 ft->Children[i], std::make_shared<OpVar>(), nResultSize));
3240 ft->Children[i], std::make_shared<OpPower>(), nResultSize));
3244 ft->Children[i], std::make_shared<OpOdd>(), nResultSize));
3260 ft->Children[i], std::make_shared<OpFloor>(), nResultSize));
3268 ft->Children[i], std::make_shared<OpFTest>(), nResultSize));
3272 ft->Children[i], std::make_shared<OpB>(), nResultSize));
3276 ft->Children[i], std::make_shared<OpBetaDist>(), nResultSize));
3280 ft->Children[i], std::make_shared<OpCscH>(), nResultSize));
3284 ft->Children[i], std::make_shared<OpExp>(), nResultSize));
3288 ft->Children[i], std::make_shared<OpLog10>(), nResultSize));
3292 ft->Children[i], std::make_shared<OpExponDist>(), nResultSize));
3296 ft->Children[i], std::make_shared<OpAverageIfs>(), nResultSize));
3300 ft->Children[i], std::make_shared<OpCountIfs>(), nResultSize));
3304 ft->Children[i], std::make_shared<OpCombinA>(), nResultSize));
3308 ft->Children[i], std::make_shared<OpEven>(), nResultSize));
3312 ft->Children[i], std::make_shared<OpLog>(), nResultSize));
3316 ft->Children[i], std::make_shared<OpMod>(), nResultSize));
3320 ft->Children[i], std::make_shared<OpTrunc>(), nResultSize));
3324 ft->Children[i], std::make_shared<OpSkew>(), nResultSize));
3328 ft->Children[i], std::make_shared<OpArcTan2>(), nResultSize));
3332 ft->Children[i], std::make_shared<OpBitOr>(), nResultSize));
3336 ft->Children[i], std::make_shared<OpBitLshift>(), nResultSize));
3340 ft->Children[i], std::make_shared<OpBitRshift>(), nResultSize));
3344 ft->Children[i], std::make_shared<OpBitXor>(), nResultSize));
3352 ft->Children[i], std::make_shared<OpPoisson>(), nResultSize));
3356 ft->Children[i], std::make_shared<OpSumSQ>(), nResultSize));
3360 ft->Children[i], std::make_shared<OpSkewp>(), nResultSize));
3364 ft->Children[i], std::make_shared<OpBinomdist>(), nResultSize));
3368 ft->Children[i], std::make_shared<OpVarP>(), nResultSize));
3372 ft->Children[i], std::make_shared<OpCeil>(), nResultSize));
3376 ft->Children[i], std::make_shared<OpCombin>(), nResultSize));
3380 ft->Children[i], std::make_shared<OpDevSq>(), nResultSize));
3384 ft->Children[i], std::make_shared<OpStDev>(), nResultSize));
3388 ft->Children[i], std::make_shared<OpSlope>(), nResultSize));
3392 ft->Children[i], std::make_shared<OpSTEYX>(), nResultSize));
3396 ft->Children[i], std::make_shared<OpZTest>(), nResultSize));
3400 std::make_shared<DynamicKernelPiArgument>(
mCalcConfig, ts,
3405 std::make_shared<DynamicKernelRandomArgument>(
mCalcConfig, ts,
3410 ft->Children[i], std::make_shared<OpProduct>(), nResultSize));
3418 ft->Children[i], std::make_shared<OpSumX2MY2>(), nResultSize));
3422 ft->Children[i], std::make_shared<OpSumX2PY2>(), nResultSize));
3430 ft->Children[i], std::make_shared<OpTTest>(), nResultSize));
3434 ft->Children[i], std::make_shared<OpTDist>(), nResultSize));
3442 ft->Children[i], std::make_shared<OpSumXMY2>(), nResultSize));
3446 ft->Children[i], std::make_shared<OpStDevP>(), nResultSize));
3450 ft->Children[i], std::make_shared<OpCovar>(), nResultSize));
3454 ft->Children[i], std::make_shared<OpAnd>(), nResultSize));
3458 ft->Children[i], std::make_shared<OpVLookup>(), nResultSize));
3462 ft->Children[i], std::make_shared<OpOr>(), nResultSize));
3466 ft->Children[i], std::make_shared<OpNot>(), nResultSize));
3470 ft->Children[i], std::make_shared<OpXor>(), nResultSize));
3474 ft->Children[i], std::make_shared<OpDmax>(), nResultSize));
3478 ft->Children[i], std::make_shared<OpDmin>(), nResultSize));
3482 ft->Children[i], std::make_shared<OpDproduct>(), nResultSize));
3486 ft->Children[i], std::make_shared<OpDaverage>(), nResultSize));
3490 ft->Children[i], std::make_shared<OpDstdev>(), nResultSize));
3494 ft->Children[i], std::make_shared<OpDstdevp>(), nResultSize));
3498 ft->Children[i], std::make_shared<OpDsum>(), nResultSize));
3502 ft->Children[i], std::make_shared<OpDvar>(), nResultSize));
3506 ft->Children[i], std::make_shared<OpDvarp>(), nResultSize));
3510 ft->Children[i], std::make_shared<OpAverageIf>(), nResultSize));
3514 ft->Children[i], std::make_shared<OpDcount>(), nResultSize));
3518 ft->Children[i], std::make_shared<OpDcount2>(), nResultSize));
3522 ft->Children[i], std::make_shared<OpDeg>(), nResultSize));
3526 ft->Children[i], std::make_shared<OpRoundUp>(), nResultSize));
3530 ft->Children[i], std::make_shared<OpRoundDown>(), nResultSize));
3534 ft->Children[i], std::make_shared<OpInt>(), nResultSize));
3538 ft->Children[i], std::make_shared<OpRadians>(), nResultSize));
3542 ft->Children[i], std::make_shared<OpCountIf>(), nResultSize));
3546 ft->Children[i], std::make_shared<OpIsEven>(), nResultSize));
3550 ft->Children[i], std::make_shared<OpIsOdd>(), nResultSize));
3554 ft->Children[i], std::make_shared<OpFact>(), nResultSize));
3558 ft->Children[i], std::make_shared<OpMinA>(), nResultSize));
3562 ft->Children[i], std::make_shared<OpCountA>(), nResultSize));
3566 ft->Children[i], std::make_shared<OpMaxA>(), nResultSize));
3570 ft->Children[i], std::make_shared<OpAverageA>(), nResultSize));
3574 ft->Children[i], std::make_shared<OpVarA>(), nResultSize));
3578 ft->Children[i], std::make_shared<OpVarPA>(), nResultSize));
3582 ft->Children[i], std::make_shared<OpStDevA>(), nResultSize));
3586 ft->Children[i], std::make_shared<OpStDevPA>(), nResultSize));
3590 ft->Children[i], std::make_shared<OpSec>(), nResultSize));
3594 ft->Children[i], std::make_shared<OpSecH>(), nResultSize));
3598 ft->Children[i], std::make_shared<OpSumIf>(), nResultSize));
3602 ft->Children[i], std::make_shared<OpNegSub>(), nResultSize));
3606 ft->Children[i], std::make_shared<OpAveDev>(), nResultSize));
3610 ft->Children[i], std::make_shared<OpIf>(), nResultSize));
3613 if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getEffect")
3617 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCumipmt")
3621 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getNominal")
3625 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCumprinc")
3629 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getXnpv")
3633 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getPricemat")
3637 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getReceived")
3641 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getTbilleq")
3645 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getTbillprice")
3649 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getTbillyield")
3653 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getFvschedule")
3661 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getYielddisc")
3665 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getYieldmat")
3669 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getAccrintm")
3673 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupdaybs")
3677 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getDollarde")
3681 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getDollarfr")
3685 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupdays")
3689 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupdaysnc")
3693 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getDisc")
3697 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getIntrate")
3701 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getPrice")
3704 ft->Children[i], std::make_shared<OpPrice>(), nResultSize));
3706 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupnum")
3709 std::make_shared<OpCoupnum>(), nResultSize));
3721 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getAmorlinc")
3724 std::make_shared<OpAmorlinc>(), nResultSize));
3726 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getMduration")
3729 std::make_shared<OpMDuration>(), nResultSize));
3736 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getOddlprice")
3739 ft->Children[i], std::make_shared<OpOddlprice>(), nResultSize));
3741 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getOddlyield")
3744 std::make_shared<OpOddlyield>(), nResultSize));
3746 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getPricedisc")
3749 ft->Children[i], std::make_shared<OpPriceDisc>(), nResultSize));
3751 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCouppcd")
3754 std::make_shared<OpCouppcd>(), nResultSize));
3756 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getCoupncd")
3759 std::make_shared<OpCoupncd>(), nResultSize));
3761 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getAccrint")
3764 std::make_shared<OpAccrint>(), nResultSize));
3766 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getSqrtpi")
3769 std::make_shared<OpSqrtPi>(), nResultSize));
3771 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getConvert")
3774 std::make_shared<OpConvert>(), nResultSize));
3776 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getIseven")
3779 std::make_shared<OpIsEven>(), nResultSize));
3781 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getIsodd")
3784 std::make_shared<OpIsOdd>(), nResultSize));
3786 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getMround")
3789 std::make_shared<OpMROUND>(), nResultSize));
3791 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getQuotient")
3794 std::make_shared<OpQuotient>(), nResultSize));
3796 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getSeriessum")
3799 std::make_shared<OpSeriesSum>(), nResultSize));
3801 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getBesselj")
3804 std::make_shared<OpBesselj>(), nResultSize));
3806 else if (pChild->
GetExternal() ==
"com.sun.star.sheet.addin.Analysis.getGestep")
3809 std::make_shared<OpGestep>(), nResultSize));
3812 throw UnhandledToken(OUString(
"unhandled external " + pChild->
GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
3816 throw UnhandledToken(OUString(
"unhandled opcode "
3818 +
"(" + OUString::number(opc) +
")").toUtf8().getStr(), __FILE__, __LINE__);
3825 class DynamicKernel :
public CompiledFormula
3829 virtual ~DynamicKernel()
override;
3837 std::string
const & GetMD5();
3842 void CreateKernel();
3846 void Launch(
size_t nr );
3848 cl_mem GetResultBuffer()
const {
return mpResClmem; }
3873 mnResultSize(nResultSize) {}
3875 DynamicKernel::~DynamicKernel()
3892 void DynamicKernel::CodeGen()
3897 std::stringstream decl;
3900 decl <<
"#if __OPENCL_VERSION__ < 120\n";
3901 decl <<
"#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3906 decl <<
"#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3920 mSyms.DumpSlidingWindowFunctions(decl);
3923 decl <<
"(__global double *result";
3927 DK->GenSlidingWindowDecl(decl);
3929 decl <<
") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3930 DK->GenSlidingWindowDeclRef() <<
";\n}\n";
3934 (mKernelSignature[0] ==
'_'
3939 std::string
const & DynamicKernel::GetMD5()
3943 std::stringstream md5s;
3949 RTL_DIGEST_LENGTH_MD5);
3952 md5s << std::hex << static_cast<int>(i);
3960 void DynamicKernel::CreateKernel()
3974 static std::string lastOneKernelHash;
3975 static std::string lastSecondKernelHash;
3976 static cl_program lastOneProgram =
nullptr;
3977 static cl_program lastSecondProgram =
nullptr;
3978 std::string KernelHash = mKernelSignature + GetMD5();
3979 if (lastOneKernelHash == KernelHash && lastOneProgram)
3981 mpProgram = lastOneProgram;
3983 else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3985 mpProgram = lastSecondProgram;
3990 if (lastSecondProgram)
3992 SAL_INFO(
"sc.opencl",
"Releasing program " << lastSecondProgram);
3993 err = clReleaseProgram(lastSecondProgram);
3995 lastSecondProgram =
nullptr;
4005 mpProgram = clCreateProgramWithSource(kEnv.
mpkContext, 1,
4006 &src,
nullptr, &err);
4007 if (err != CL_SUCCESS)
4008 throw OpenCLError(
"clCreateProgramWithSource", err, __FILE__, __LINE__);
4009 SAL_INFO(
"sc.opencl",
"Created program " << mpProgram);
4011 err = clBuildProgram(mpProgram, 1,
4013 if (err != CL_SUCCESS)
4015 #if OSL_DEBUG_LEVEL > 0
4016 if (err == CL_BUILD_PROGRAM_FAILURE)
4018 cl_build_status stat;
4019 cl_int e = clGetProgramBuildInfo(
4021 CL_PROGRAM_BUILD_STATUS,
sizeof(cl_build_status),
4024 e != CL_SUCCESS,
"sc.opencl",
4025 "after CL_BUILD_PROGRAM_FAILURE,"
4026 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
4028 if (e == CL_SUCCESS)
4031 e = clGetProgramBuildInfo(
4033 CL_PROGRAM_BUILD_LOG, 0,
nullptr, &n);
4035 e != CL_SUCCESS || n == 0,
"sc.opencl",
4036 "after CL_BUILD_PROGRAM_FAILURE,"
4037 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
4039 if (e == CL_SUCCESS && n != 0)
4041 std::vector<char>
log(n);
4042 e = clGetProgramBuildInfo(
4044 CL_PROGRAM_BUILD_LOG, n,
log.data(),
nullptr);
4046 e != CL_SUCCESS || n == 0,
"sc.opencl",
4047 "after CL_BUILD_PROGRAM_FAILURE,"
4048 " clGetProgramBuildInfo("
4050 if (e == CL_SUCCESS)
4053 "CL_BUILD_PROGRAM_FAILURE, status " << stat
4054 <<
", log \"" <<
log.data() <<
"\"");
4060 SAL_WARN(
"sc.opencl",
"Program failed to build, aborting.");
4063 throw OpenCLError(
"clBuildProgram", err, __FILE__, __LINE__);
4066 SAL_INFO(
"sc.opencl",
"Built program " << mpProgram);
4070 (mKernelSignature + GetMD5()).c_str());
4072 lastSecondKernelHash = lastOneKernelHash;
4073 lastSecondProgram = lastOneProgram;
4074 lastOneKernelHash = KernelHash;
4077 mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
4078 if (err != CL_SUCCESS)
4079 throw OpenCLError(
"clCreateKernel", err, __FILE__, __LINE__);
4080 SAL_INFO(
"sc.opencl",
"Created kernel " <<
mpKernel <<
" with name " << kname <<
" in program " << mpProgram);
4083 void DynamicKernel::Launch(
size_t nr )
4091 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
4092 nr *
sizeof(
double),
nullptr, &err);
4093 if (CL_SUCCESS != err)
4094 throw OpenCLError(
"clCreateBuffer", err, __FILE__, __LINE__);
4095 SAL_INFO(
"sc.opencl",
"Created buffer " <<
mpResClmem <<
" size " << nr <<
"*" <<
sizeof(
double) <<
"=" << (nr*
sizeof(
double)));
4099 if (CL_SUCCESS != err)
4100 throw OpenCLError(
"clSetKernelArg", err, __FILE__, __LINE__);
4103 size_t global_work_size[] = { nr };
4106 global_work_size,
nullptr, 0,
nullptr,
nullptr);
4107 if (CL_SUCCESS != err)
4108 throw OpenCLError(
"clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
4110 if (CL_SUCCESS != err)
4111 throw OpenCLError(
"clFlush", err, __FILE__, __LINE__);
4117 template <
typename T>
4118 const DynamicKernelArgument* SymbolTable::DeclRefArg(
const ScCalcConfig& config,
4120 std::shared_ptr<SlidingFunctionBase> pCodeGen,
int nResultSize)
4123 ArgumentMap::iterator it =
mSymbols.find(ref);
4127 std::stringstream ss;
4132 return new_arg.get();
4136 return it->second.get();
4140 FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() :
4150 std::shared_ptr<DynamicKernel> DynamicKernel::create(
const ScCalcConfig& rConfig,
const ScTokenArray& rCode,
int nResultSize )
4154 std::vector<FormulaToken*> aTokenVector;
4155 std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
4157 while ((pCur = const_cast<FormulaToken*>(aCode.Next())) !=
nullptr)
4166 if( aTokenVector.empty())
4169 aTokenVector.pop_back();
4172 if (aHashMap.find(pTempFormula) == aHashMap.end())
4174 pCurNode->Children.push_back(aHashMap[pTempFormula]);
4179 std::make_shared<FormulaTreeNode>(pTempFormula);
4180 pCurNode->Children.push_back(pChildTreeNode);
4183 std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
4184 aHashMap[pCur] = pCurNode;
4186 aTokenVector.push_back(pCur);
4190 Root->Children.push_back(aHashMap[aTokenVector.back()]);
4192 auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
4197 pDynamicKernel->CodeGen();
4198 pDynamicKernel->CreateKernel();
4200 catch (
const UnhandledToken& ut)
4202 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: UnhandledToken: " << ut.mMessage <<
" at " << ut.mFile <<
":" << ut.mLineNumber);
4205 catch (
const InvalidParameterCount& ipc)
4207 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
4208 <<
" at " << ipc.mFile <<
":" << ipc.mLineNumber);
4211 catch (
const OpenCLError& oce)
4215 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: OpenCLError from " << oce.mFunction <<
": " <<
openclwrapper::errorString(oce.mError) <<
" at " << oce.mFile <<
":" << oce.mLineNumber);
4222 catch (
const Unhandled& uh)
4224 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: Unhandled at " << uh.mFile <<
":" << uh.mLineNumber);
4234 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: unexpected exception");
4238 return pDynamicKernel;
4243 class CLInterpreterResult
4253 CLInterpreterResult() : mpKernel(nullptr), mnGroupLength(0), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4254 CLInterpreterResult( DynamicKernel* pKernel,
SCROW nGroupLength ) :
4255 mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4257 bool isValid()
const {
return mpKernel !=
nullptr; }
4259 void fetchResultFromKernel()
4267 mpCLResBuf = mpKernel->GetResultBuffer();
4273 mpResBuf =
static_cast<double*
>(clEnqueueMapBuffer(kEnv.
mpkCmdQueue,
4275 CL_TRUE, CL_MAP_READ, 0,
4276 mnGroupLength *
sizeof(
double), 0,
nullptr,
nullptr,
4279 if (err != CL_SUCCESS)
4285 SAL_INFO(
"sc.opencl",
"Kernel results: cl_mem: " << mpResBuf <<
" (" << DebugPeekDoubles(mpResBuf, mnGroupLength) <<
")");
4301 err = clEnqueueUnmapMemObject(kEnv.
mpkCmdQueue, mpCLResBuf, mpResBuf, 0,
nullptr,
nullptr);
4303 if (err != CL_SUCCESS)
4313 class CLInterpreterContext
4321 explicit CLInterpreterContext(
SCROW nGroupLength)
4323 , mnGroupLength(nGroupLength) {}
4325 bool isValid()
const
4327 return mpKernel !=
nullptr;
4330 void setManagedKernel( std::shared_ptr<DynamicKernel> pKernel )
4332 mpKernelStore = std::move(pKernel);
4333 mpKernel = mpKernelStore.get();
4336 CLInterpreterResult launchKernel()
4339 return CLInterpreterResult();
4344 mpKernel->Launch(mnGroupLength);
4346 catch (
const UnhandledToken& ut)
4348 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: UnhandledToken: " << ut.mMessage <<
" at " << ut.mFile <<
":" << ut.mLineNumber);
4350 return CLInterpreterResult();
4352 catch (
const OpenCLError& oce)
4354 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: OpenCLError from " << oce.mFunction <<
": " <<
openclwrapper::errorString(oce.mError) <<
" at " << oce.mFile <<
":" << oce.mLineNumber);
4356 return CLInterpreterResult();
4358 catch (
const Unhandled& uh)
4360 SAL_INFO(
"sc.opencl",
"Dynamic formula compiler: Unhandled at " << uh.mFile <<
":" << uh.mLineNumber);
4362 return CLInterpreterResult();
4366 SAL_WARN(
"sc.opencl",
"Dynamic formula compiler: unexpected exception");
4368 return CLInterpreterResult();
4371 return CLInterpreterResult(mpKernel, mnGroupLength);
4376 CLInterpreterContext createCLInterpreterContext(
const ScCalcConfig& rConfig,
4379 CLInterpreterContext aCxt(xGroup->mnLength);
4381 aCxt.setManagedKernel(DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
4391 aComp.CompileTokenArray();
4394 bool waitForResults()
4401 if (err != CL_SUCCESS)
4404 return err == CL_SUCCESS;
4415 genRPNTokens(rDoc, rTopPos, rCode);
4420 CLInterpreterContext aCxt = createCLInterpreterContext(
maCalcConfig, xGroup, rCode);
4421 if (!aCxt.isValid())
4424 CLInterpreterResult aRes = aCxt.launchKernel();
4425 if (!aRes.isValid())
4428 if (!waitForResults())
4431 aRes.fetchResultFromKernel();
4433 return aRes.pushResultToDocument(rDoc, rTopPos);
Matrix data type that can store values of mixed types.
bool generatBinFromKernelSource(cl_program program, const char *clFileName)
const size_t count(pCandidateA->getBorderLines().size())
::boost::intrusive_ptr< ScFormulaCellGroup > ScFormulaCellGroupRef
sal_uInt64 kernelFailures
OUString getString() const
std::shared_ptr< DynamicKernelArgument > DynamicKernelArgumentRef
void SC_DLLPUBLIC SetFormulaResults(const ScAddress &rTopPos, const double *pResults, size_t nLen)
Set an array of numerical formula results to a group of contiguous formula cells. ...
cl_program mpArryPrograms[MAX_CLFILE_NUM]
void setKernelEnv(KernelEnv *envInfo)
std::shared_ptr< FormulaTreeNode > FormulaTreeNodeRef
SC_DLLPUBLIC formula::FormulaGrammar::Grammar GetGrammar() const
(Partially) abstract base class for an operand
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
::boost::intrusive_ptr< ScMatrix > ScMatrixRef
StringConversion meStringConversion
Reference< deployment::XPackageRegistry > create(Reference< deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, Reference< XComponentContext > const &xComponentContext)
static DynamicKernelArgumentRef SoPHelper(const ScCalcConfig &config, const std::string &ts, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > pCodeGen, int nResultSize)
Arguments that are actually compile-time constant string Currently, only the hash is passed...
::boost::spirit::classic::rule< ScannerT > argument
exports com.sun.star.chart2. data
bool buildProgramFromBinary(const char *buildOption, GPUEnv *gpuInfo, const char *filename, int idx)
Configuration options for formula interpreter.
static std::shared_ptr< DynamicKernelArgument > VectorRefFactory(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > &pCodeGen, int index)
#define SAL_WARN_IF(condition, area, stream)
#define SAL_INFO(area, stream)
cl_command_queue mpkCmdQueue
int uniform_int_distribution(int a, int b)
#define SAL_WARN(area, stream)
const char * errorString(cl_int nError)