LibreOffice Module sc (master)  1
formulagroupcl.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <formulagroup.hxx>
11 #include <formulagroupcl.hxx>
12 #include <document.hxx>
13 #include <formulacell.hxx>
14 #include <tokenarray.hxx>
15 #include <compiler.hxx>
16 #include <comphelper/random.hxx>
17 #include <formula/vectortoken.hxx>
18 #include <scmatrix.hxx>
19 #include <sal/log.hxx>
20 #include <rtl/math.hxx>
21 
22 #include <opencl/openclwrapper.hxx>
23 #include <opencl/OpenCLZone.hxx>
24 
25 #include "op_financial.hxx"
26 #include "op_database.hxx"
27 #include "op_math.hxx"
28 #include "op_logical.hxx"
29 #include "op_statistical.hxx"
30 #include "op_array.hxx"
31 #include "op_spreadsheet.hxx"
32 #include "op_addin.hxx"
33 
34 #include <com/sun/star/sheet/FormulaLanguage.hpp>
35 
36 // FIXME: The idea that somebody would bother to (now and then? once a year? once a month?) manually
37 // edit a source file and change the value of some #defined constant and run some ill-defined
38 // "correctness test" is of course ludicrous. Either things are checked in normal unit tests, in
39 // every 'make check', or not at all. The below comments are ridiculous.
40 
41 #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
42 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
43 
44 const char* const publicFunc =
45  "\n"
46  "#define IllegalArgument 502\n"
47  "#define IllegalFPOperation 503 // #NUM!\n"
48  "#define NoValue 519 // #VALUE!\n"
49  "#define NoConvergence 523\n"
50  "#define DivisionByZero 532 // #DIV/0!\n"
51  "#define NOTAVAILABLE 0x7fff // #N/A\n"
52  "\n"
53  "double CreateDoubleError(ulong nErr)\n"
54  "{\n"
55  // At least nVidia on Linux and Intel on Windows seem to ignore the argument to nan(),
56  // so using that would not propagate the type of error, work that around
57  // by directly constructing the proper IEEE double NaN value
58  // TODO: maybe use a better way to detect such systems?
59  " return as_double(0x7FF8000000000000+nErr);\n"
60 // " return nan(nErr);\n"
61  "}\n"
62  "\n"
63  "uint GetDoubleErrorValue(double fVal)\n"
64  "{\n"
65  " if (isfinite(fVal))\n"
66  " return 0;\n"
67  " if (isinf(fVal))\n"
68  " return IllegalFPOperation; // normal INF\n"
69  " if (as_ulong(fVal) & 0XFFFF0000u)\n"
70  " return NoValue; // just a normal NAN\n"
71  " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
72  "}\n"
73  "\n"
74  "double fsum_count(double a, double b, __private int *p) {\n"
75  " bool t = isnan(a);\n"
76  " (*p) += t?0:1;\n"
77  " return t?b:a+b;\n"
78  "}\n"
79  "double fmin_count(double a, double b, __private int *p) {\n"
80  " double result = fmin(a, b);\n"
81  " bool t = isnan(result);\n"
82  " (*p) += t?0:1;\n"
83  " return result;\n"
84  "}\n"
85  "double fmax_count(double a, double b, __private int *p) {\n"
86  " double result = fmax(a, b);\n"
87  " bool t = isnan(result);\n"
88  " (*p) += t?0:1;\n"
89  " return result;\n"
90  "}\n"
91  "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
92  "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
93  "double fsub(double a, double b) { return a-b; }\n"
94  "double fdiv(double a, double b) { return a/b; }\n"
95  "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
96  "int is_representable_integer(double a) {\n"
97  " long kMaxInt = (1L << 53) - 1;\n"
98  " if (a <= as_double(kMaxInt))\n"
99  " {\n"
100  " long nInt = as_long(a);\n"
101  " double fInt;\n"
102  " return (nInt <= kMaxInt &&\n"
103  " (!((fInt = as_double(nInt)) < a) && !(fInt > a)));\n"
104  " }\n"
105  " return 0;\n"
106  "}\n"
107  "int approx_equal(double a, double b) {\n"
108  " double e48 = 1.0 / (16777216.0 * 16777216.0);\n"
109  " double e44 = e48 * 16.0;\n"
110  " if (a == b)\n"
111  " return 1;\n"
112  " if (a == 0.0 || b == 0.0)\n"
113  " return 0;\n"
114  " double d = fabs(a - b);\n"
115  " if (!isfinite(d))\n"
116  " return 0; // Nan or Inf involved\n"
117  " if (d > ((a = fabs(a)) * e44) || d > ((b = fabs(b)) * e44))\n"
118  " return 0;\n"
119  " if (is_representable_integer(d) && is_representable_integer(a) && is_representable_integer(b))\n"
120  " return 0; // special case for representable integers.\n"
121  " return (d < a * e48 && d < b * e48);\n"
122  "}\n"
123  "double fsum_approx(double a, double b) {\n"
124  " if ( ((a < 0.0 && b > 0.0) || (b < 0.0 && a > 0.0))\n"
125  " && approx_equal( a, -b ) )\n"
126  " return 0.0;\n"
127  " return a + b;\n"
128  "}\n"
129  "double fsub_approx(double a, double b) {\n"
130  " if ( ((a < 0.0 && b < 0.0) || (a > 0.0 && b > 0.0)) && approx_equal( a, b ) )\n"
131  " return 0.0;\n"
132  " return a - b;\n"
133  "}\n"
134  ;
135 
136 #include <vector>
137 #include <map>
138 #include <iostream>
139 #include <algorithm>
140 
141 #include <rtl/digest.h>
142 
143 #include <memory>
144 
145 using namespace formula;
146 
147 namespace sc::opencl {
148 
149 namespace {
150 
151 std::string linenumberify(const std::string& s)
152 {
153  std::stringstream ss;
154  int linenumber = 1;
155  size_t start = 0;
156  size_t newline;
157  while ((newline = s.find('\n', start)) != std::string::npos)
158  {
159  ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
160  start = newline + 1;
161  }
162  if (start < s.size())
163  ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
164  return ss.str();
165 }
166 
167 bool AllStringsAreNull(const rtl_uString* const* pStringArray, size_t nLength)
168 {
169  if (pStringArray == nullptr)
170  return true;
171 
172  for (size_t i = 0; i < nLength; i++)
173  if (pStringArray[i] != nullptr)
174  return false;
175 
176  return true;
177 }
178 
179 OUString LimitedString( const OUString& str )
180 {
181  if( str.getLength() < 20 )
182  return "\"" + str + "\"";
183  else
184  return OUString::Concat("\"") + str.subView( 0, 20 ) + "\"...";
185 }
186 
187 // Returns formatted contents of the data (possibly shortened), to be used in debug output.
188 OUString DebugPeekData(const FormulaToken* ref, int doubleRefIndex = 0)
189 {
190  if (ref->GetType() == formula::svSingleVectorRef)
191  {
192  const formula::SingleVectorRefToken* pSVR =
193  static_cast<const formula::SingleVectorRefToken*>(ref);
194  OUStringBuffer buf = "SingleRef {";
195  for( size_t i = 0; i < std::min< size_t >( 4, pSVR->GetArrayLength()); ++i )
196  {
197  if( i != 0 )
198  buf.append( "," );
199  if( pSVR->GetArray().mpNumericArray != nullptr )
200  buf.append( pSVR->GetArray().mpNumericArray[ i ] );
201  else if( pSVR->GetArray().mpStringArray != nullptr )
202  buf.append( LimitedString( OUString( pSVR->GetArray().mpStringArray[ i ] )));
203  }
204  if( pSVR->GetArrayLength() > 4 )
205  buf.append( ",..." );
206  buf.append( "}" );
207  return buf.makeStringAndClear();
208  }
209  else if (ref->GetType() == formula::svDoubleVectorRef)
210  {
211  const formula::DoubleVectorRefToken* pDVR =
212  static_cast<const formula::DoubleVectorRefToken*>(ref);
213  OUStringBuffer buf = "DoubleRef {";
214  for( size_t i = 0; i < std::min< size_t >( 4, pDVR->GetArrayLength()); ++i )
215  {
216  if( i != 0 )
217  buf.append( "," );
218  if( pDVR->GetArrays()[doubleRefIndex].mpNumericArray != nullptr )
219  buf.append( pDVR->GetArrays()[doubleRefIndex].mpNumericArray[ i ] );
220  else if( pDVR->GetArrays()[doubleRefIndex].mpStringArray != nullptr )
221  buf.append( LimitedString( OUString( pDVR->GetArrays()[doubleRefIndex].mpStringArray[ i ] )));
222  }
223  if( pDVR->GetArrayLength() > 4 )
224  buf.append( ",..." );
225  buf.append( "}" );
226  return buf.makeStringAndClear();
227  }
228  else if (ref->GetType() == formula::svString)
229  {
230  return "String " + LimitedString( ref->GetString().getString());
231  }
232  else if (ref->GetType() == formula::svDouble)
233  {
234  return OUString::number(ref->GetDouble());
235  }
236  else
237  {
238  return "?";
239  }
240 }
241 
242 // Returns formatted contents of a doubles buffer, to be used in debug output.
243 OUString DebugPeekDoubles(const double* data, int size)
244 {
245  OUStringBuffer buf = "{";
246  for( int i = 0; i < std::min( 4, size ); ++i )
247  {
248  if( i != 0 )
249  buf.append( "," );
250  buf.append( data[ i ] );
251  }
252  if( size > 4 )
253  buf.append( ",..." );
254  buf.append( "}" );
255  return buf.makeStringAndClear();
256 }
257 
258 } // anonymous namespace
259 
261 size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
262 {
263  OpenCLZone zone;
264  FormulaToken* ref = mFormulaTree->GetFormulaToken();
265  double* pHostBuffer = nullptr;
266  size_t szHostBuffer = 0;
267  if (ref->GetType() == formula::svSingleVectorRef)
268  {
269  const formula::SingleVectorRefToken* pSVR =
270  static_cast<const formula::SingleVectorRefToken*>(ref);
271 
272  SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
273 
274  pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
275  szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
276  }
277  else if (ref->GetType() == formula::svDoubleVectorRef)
278  {
279  const formula::DoubleVectorRefToken* pDVR =
280  static_cast<const formula::DoubleVectorRefToken*>(ref);
281 
282  SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
283 
284  pHostBuffer = const_cast<double*>(
285  pDVR->GetArrays()[mnIndex].mpNumericArray);
286  szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
287  }
288  else
289  {
290  throw Unhandled(__FILE__, __LINE__);
291  }
292 
295  cl_int err;
296  if (pHostBuffer)
297  {
298  mpClmem = clCreateBuffer(kEnv.mpkContext,
299  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
300  szHostBuffer,
301  pHostBuffer, &err);
302  if (CL_SUCCESS != err)
303  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
304  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
305  }
306  else
307  {
308  if (szHostBuffer == 0)
309  szHostBuffer = sizeof(double); // a dummy small value
310  // Marshal as a buffer of NANs
311  mpClmem = clCreateBuffer(kEnv.mpkContext,
312  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
313  szHostBuffer, nullptr, &err);
314  if (CL_SUCCESS != err)
315  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
316  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
317 
318  double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
319  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
320  szHostBuffer, 0, nullptr, nullptr, &err));
321  if (CL_SUCCESS != err)
322  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
323 
324  for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
325  rtl::math::setNan(&pNanBuffer[i]);
326  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
327  pNanBuffer, 0, nullptr, nullptr);
328  // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
329  if (CL_SUCCESS != err)
330  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
331  }
332 
333  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref, mnIndex) << ")");
334  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
335  if (CL_SUCCESS != err)
336  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
337  return 1;
338 }
339 
344 
349 
350 namespace {
351 
352 class ConstStringArgument : public DynamicKernelArgument
353 {
354 public:
355  ConstStringArgument( const ScCalcConfig& config, const std::string& s,
356  const FormulaTreeNodeRef& ft ) :
357  DynamicKernelArgument(config, s, ft) { }
359  virtual void GenDecl( std::stringstream& ss ) const override
360  {
361  ss << "unsigned " << mSymName;
362  }
363  virtual void GenDeclRef( std::stringstream& ss ) const override
364  {
365  ss << GenSlidingWindowDeclRef();
366  }
367  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
368  {
369  GenDecl(ss);
370  }
371  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
372  {
373  std::stringstream ss;
374  if (GetFormulaToken()->GetType() != formula::svString)
375  throw Unhandled(__FILE__, __LINE__);
376  FormulaToken* Tok = GetFormulaToken();
377  ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
378  return ss.str();
379  }
380  virtual size_t GetWindowSize() const override
381  {
382  return 1;
383  }
385  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
386  {
387  OpenCLZone zone;
388  FormulaToken* ref = mFormulaTree->GetFormulaToken();
389  cl_uint hashCode = 0;
390  if (ref->GetType() != formula::svString)
391  {
392  throw Unhandled(__FILE__, __LINE__);
393  }
394 
395  const OUString s = ref->GetString().getString().toAsciiUpperCase();
396  hashCode = s.hashCode();
397 
398  // Pass the scalar result back to the rest of the formula kernel
399  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode << "(" << DebugPeekData(ref) << ")" );
400  cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), static_cast<void*>(&hashCode));
401  if (CL_SUCCESS != err)
402  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
403  return 1;
404  }
405 };
406 
408 class DynamicKernelConstantArgument : public DynamicKernelArgument
409 {
410 public:
411  DynamicKernelConstantArgument( const ScCalcConfig& config, const std::string& s,
412  const FormulaTreeNodeRef& ft ) :
413  DynamicKernelArgument(config, s, ft) { }
415  virtual void GenDecl( std::stringstream& ss ) const override
416  {
417  ss << "double " << mSymName;
418  }
419  virtual void GenDeclRef( std::stringstream& ss ) const override
420  {
421  ss << mSymName;
422  }
423  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
424  {
425  GenDecl(ss);
426  }
427  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
428  {
429  if (GetFormulaToken()->GetType() != formula::svDouble)
430  throw Unhandled(__FILE__, __LINE__);
431  return mSymName;
432  }
433  virtual size_t GetWindowSize() const override
434  {
435  return 1;
436  }
437  double GetDouble() const
438  {
439  FormulaToken* Tok = GetFormulaToken();
440  if (Tok->GetType() != formula::svDouble)
441  throw Unhandled(__FILE__, __LINE__);
442  return Tok->GetDouble();
443  }
445  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
446  {
447  OpenCLZone zone;
448  double tmp = GetDouble();
449  // Pass the scalar result back to the rest of the formula kernel
450  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
451  cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
452  if (CL_SUCCESS != err)
453  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
454  return 1;
455  }
456 };
457 
458 class DynamicKernelPiArgument : public DynamicKernelArgument
459 {
460 public:
461  DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
462  const FormulaTreeNodeRef& ft ) :
463  DynamicKernelArgument(config, s, ft) { }
465  virtual void GenDecl( std::stringstream& ss ) const override
466  {
467  ss << "double " << mSymName;
468  }
469  virtual void GenDeclRef( std::stringstream& ss ) const override
470  {
471  ss << "3.14159265358979";
472  }
473  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
474  {
475  GenDecl(ss);
476  }
477  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
478  {
479  return mSymName;
480  }
481  virtual size_t GetWindowSize() const override
482  {
483  return 1;
484  }
486  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
487  {
488  OpenCLZone zone;
489  double tmp = 0.0;
490  // Pass the scalar result back to the rest of the formula kernel
491  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp << " (PI)");
492  cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
493  if (CL_SUCCESS != err)
494  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
495  return 1;
496  }
497 };
498 
499 class DynamicKernelRandomArgument : public DynamicKernelArgument
500 {
501 public:
502  DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
503  const FormulaTreeNodeRef& ft ) :
504  DynamicKernelArgument(config, s, ft) { }
506  virtual void GenDecl( std::stringstream& ss ) const override
507  {
508  ss << "double " << mSymName;
509  }
510  virtual void GenDeclRef( std::stringstream& ss ) const override
511  {
512  ss << mSymName;
513  }
514  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
515  {
516  ss << "int " << mSymName;
517  }
518  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
519  {
520  return mSymName + "_Random(" + mSymName + ")";
521  }
522  virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
523  {
524  // This string is from the pi_opencl_kernel.i file as
525  // generated when building the Random123 examples. Unused
526  // stuff has been removed, and the actual kernel is not the
527  // same as in the totally different use case of that example,
528  // of course. Only the code that calculates the counter-based
529  // random number and what it needs is left.
530  ss << "\
531 \n\
532 #ifndef DEFINED_RANDOM123_STUFF\n\
533 #define DEFINED_RANDOM123_STUFF\n\
534 \n\
535 /*\n\
536 Copyright 2010-2011, D. E. Shaw Research.\n\
537 All rights reserved.\n\
538 \n\
539 Redistribution and use in source and binary forms, with or without\n\
540 modification, are permitted provided that the following conditions are\n\
541 met:\n\
542 \n\
543 * Redistributions of source code must retain the above copyright\n\
544  notice, this list of conditions, and the following disclaimer.\n\
545 \n\
546 * Redistributions in binary form must reproduce the above copyright\n\
547  notice, this list of conditions, and the following disclaimer in the\n\
548  documentation and/or other materials provided with the distribution.\n\
549 \n\
550 * Neither the name of D. E. Shaw Research nor the names of its\n\
551  contributors may be used to endorse or promote products derived from\n\
552  this software without specific prior written permission.\n\
553 \n\
554 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
555 \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
556 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
557 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
558 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
559 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
560 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
561 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
562 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
563 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
564 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
565 */\n\
566 \n\
567 typedef uint uint32_t;\n\
568 struct r123array2x32\n\
569 {\n\
570  uint32_t v[2];\n\
571 };\n\
572 enum r123_enum_threefry32x2\n\
573 {\n\
574  R_32x2_0_0 = 13,\n\
575  R_32x2_1_0 = 15,\n\
576  R_32x2_2_0 = 26,\n\
577  R_32x2_3_0 = 6,\n\
578  R_32x2_4_0 = 17,\n\
579  R_32x2_5_0 = 29,\n\
580  R_32x2_6_0 = 16,\n\
581  R_32x2_7_0 = 24\n\
582 };\n\
583 inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
584  __attribute__ ((always_inline));\n\
585 inline uint32_t\n\
586 RotL_32 (uint32_t x, unsigned int N)\n\
587 {\n\
588  return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
589 }\n\
590 \n\
591 typedef struct r123array2x32 threefry2x32_ctr_t;\n\
592 typedef struct r123array2x32 threefry2x32_key_t;\n\
593 typedef struct r123array2x32 threefry2x32_ukey_t;\n\
594 inline threefry2x32_key_t\n\
595 threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
596 {\n\
597  return uk;\n\
598 }\n\
599 \n\
600 inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
601  threefry2x32_ctr_t in,\n\
602  threefry2x32_key_t k)\n\
603  __attribute__ ((always_inline));\n\
604 inline threefry2x32_ctr_t\n\
605 threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
606  threefry2x32_key_t k)\n\
607 {\n\
608  threefry2x32_ctr_t X;\n\
609  uint32_t ks[2 + 1];\n\
610  int i;\n\
611  ks[2] = 0x1BD11BDA;\n\
612  for (i = 0; i < 2; i++) {\n\
613  ks[i] = k.v[i];\n\
614  X.v[i] = in.v[i];\n\
615  ks[2] ^= k.v[i];\n\
616  }\n\
617  X.v[0] += ks[0];\n\
618  X.v[1] += ks[1];\n\
619  if (Nrounds > 0) {\n\
620  X.v[0] += X.v[1];\n\
621  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
622  X.v[1] ^= X.v[0];\n\
623  }\n\
624  if (Nrounds > 1) {\n\
625  X.v[0] += X.v[1];\n\
626  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
627  X.v[1] ^= X.v[0];\n\
628  }\n\
629  if (Nrounds > 2) {\n\
630  X.v[0] += X.v[1];\n\
631  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
632  X.v[1] ^= X.v[0];\n\
633  }\n\
634  if (Nrounds > 3) {\n\
635  X.v[0] += X.v[1];\n\
636  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
637  X.v[1] ^= X.v[0];\n\
638  }\n\
639  if (Nrounds > 3) {\n\
640  X.v[0] += ks[1];\n\
641  X.v[1] += ks[2];\n\
642  X.v[1] += 1;\n\
643  }\n\
644  if (Nrounds > 4) {\n\
645  X.v[0] += X.v[1];\n\
646  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
647  X.v[1] ^= X.v[0];\n\
648  }\n\
649  if (Nrounds > 5) {\n\
650  X.v[0] += X.v[1];\n\
651  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
652  X.v[1] ^= X.v[0];\n\
653  }\n\
654  if (Nrounds > 6) {\n\
655  X.v[0] += X.v[1];\n\
656  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
657  X.v[1] ^= X.v[0];\n\
658  }\n\
659  if (Nrounds > 7) {\n\
660  X.v[0] += X.v[1];\n\
661  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
662  X.v[1] ^= X.v[0];\n\
663  }\n\
664  if (Nrounds > 7) {\n\
665  X.v[0] += ks[2];\n\
666  X.v[1] += ks[0];\n\
667  X.v[1] += 2;\n\
668  }\n\
669  if (Nrounds > 8) {\n\
670  X.v[0] += X.v[1];\n\
671  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
672  X.v[1] ^= X.v[0];\n\
673  }\n\
674  if (Nrounds > 9) {\n\
675  X.v[0] += X.v[1];\n\
676  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
677  X.v[1] ^= X.v[0];\n\
678  }\n\
679  if (Nrounds > 10) {\n\
680  X.v[0] += X.v[1];\n\
681  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
682  X.v[1] ^= X.v[0];\n\
683  }\n\
684  if (Nrounds > 11) {\n\
685  X.v[0] += X.v[1];\n\
686  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
687  X.v[1] ^= X.v[0];\n\
688  }\n\
689  if (Nrounds > 11) {\n\
690  X.v[0] += ks[0];\n\
691  X.v[1] += ks[1];\n\
692  X.v[1] += 3;\n\
693  }\n\
694  if (Nrounds > 12) {\n\
695  X.v[0] += X.v[1];\n\
696  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
697  X.v[1] ^= X.v[0];\n\
698  }\n\
699  if (Nrounds > 13) {\n\
700  X.v[0] += X.v[1];\n\
701  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
702  X.v[1] ^= X.v[0];\n\
703  }\n\
704  if (Nrounds > 14) {\n\
705  X.v[0] += X.v[1];\n\
706  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
707  X.v[1] ^= X.v[0];\n\
708  }\n\
709  if (Nrounds > 15) {\n\
710  X.v[0] += X.v[1];\n\
711  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
712  X.v[1] ^= X.v[0];\n\
713  }\n\
714  if (Nrounds > 15) {\n\
715  X.v[0] += ks[1];\n\
716  X.v[1] += ks[2];\n\
717  X.v[1] += 4;\n\
718  }\n\
719  if (Nrounds > 16) {\n\
720  X.v[0] += X.v[1];\n\
721  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
722  X.v[1] ^= X.v[0];\n\
723  }\n\
724  if (Nrounds > 17) {\n\
725  X.v[0] += X.v[1];\n\
726  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
727  X.v[1] ^= X.v[0];\n\
728  }\n\
729  if (Nrounds > 18) {\n\
730  X.v[0] += X.v[1];\n\
731  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
732  X.v[1] ^= X.v[0];\n\
733  }\n\
734  if (Nrounds > 19) {\n\
735  X.v[0] += X.v[1];\n\
736  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
737  X.v[1] ^= X.v[0];\n\
738  }\n\
739  if (Nrounds > 19) {\n\
740  X.v[0] += ks[2];\n\
741  X.v[1] += ks[0];\n\
742  X.v[1] += 5;\n\
743  }\n\
744  if (Nrounds > 20) {\n\
745  X.v[0] += X.v[1];\n\
746  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
747  X.v[1] ^= X.v[0];\n\
748  }\n\
749  if (Nrounds > 21) {\n\
750  X.v[0] += X.v[1];\n\
751  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
752  X.v[1] ^= X.v[0];\n\
753  }\n\
754  if (Nrounds > 22) {\n\
755  X.v[0] += X.v[1];\n\
756  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
757  X.v[1] ^= X.v[0];\n\
758  }\n\
759  if (Nrounds > 23) {\n\
760  X.v[0] += X.v[1];\n\
761  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
762  X.v[1] ^= X.v[0];\n\
763  }\n\
764  if (Nrounds > 23) {\n\
765  X.v[0] += ks[0];\n\
766  X.v[1] += ks[1];\n\
767  X.v[1] += 6;\n\
768  }\n\
769  if (Nrounds > 24) {\n\
770  X.v[0] += X.v[1];\n\
771  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
772  X.v[1] ^= X.v[0];\n\
773  }\n\
774  if (Nrounds > 25) {\n\
775  X.v[0] += X.v[1];\n\
776  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
777  X.v[1] ^= X.v[0];\n\
778  }\n\
779  if (Nrounds > 26) {\n\
780  X.v[0] += X.v[1];\n\
781  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
782  X.v[1] ^= X.v[0];\n\
783  }\n\
784  if (Nrounds > 27) {\n\
785  X.v[0] += X.v[1];\n\
786  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
787  X.v[1] ^= X.v[0];\n\
788  }\n\
789  if (Nrounds > 27) {\n\
790  X.v[0] += ks[1];\n\
791  X.v[1] += ks[2];\n\
792  X.v[1] += 7;\n\
793  }\n\
794  if (Nrounds > 28) {\n\
795  X.v[0] += X.v[1];\n\
796  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
797  X.v[1] ^= X.v[0];\n\
798  }\n\
799  if (Nrounds > 29) {\n\
800  X.v[0] += X.v[1];\n\
801  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
802  X.v[1] ^= X.v[0];\n\
803  }\n\
804  if (Nrounds > 30) {\n\
805  X.v[0] += X.v[1];\n\
806  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
807  X.v[1] ^= X.v[0];\n\
808  }\n\
809  if (Nrounds > 31) {\n\
810  X.v[0] += X.v[1];\n\
811  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
812  X.v[1] ^= X.v[0];\n\
813  }\n\
814  if (Nrounds > 31) {\n\
815  X.v[0] += ks[2];\n\
816  X.v[1] += ks[0];\n\
817  X.v[1] += 8;\n\
818  }\n\
819  return X;\n\
820 }\n\
821 \n\
822 enum r123_enum_threefry2x32\n\
823 { threefry2x32_rounds = 20 };\n\
824 inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
825  threefry2x32_key_t k)\n\
826  __attribute__ ((always_inline));\n\
827 inline threefry2x32_ctr_t\n\
828 threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
829 {\n\
830  return threefry2x32_R (threefry2x32_rounds, in, k);\n\
831 }\n\
832 #endif\n\
833 \n\
834 ";
835  ss << "double " << mSymName << "_Random (int seed)\n\
836 {\n\
837  unsigned tid = get_global_id(0);\n\
838  threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
839  threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
840  c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
841  const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
842  const double halffactor = 0.5*factor;\n\
843  return c.v[0] * factor + halffactor;\n\
844 }\n\
845 ";
846  }
847  virtual size_t GetWindowSize() const override
848  {
849  return 1;
850  }
852  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
853  {
854  OpenCLZone zone;
855  cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32);
856  // Pass the scalar result back to the rest of the formula kernel
857  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed << "(RANDOM)");
858  cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), static_cast<void*>(&seed));
859  if (CL_SUCCESS != err)
860  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
861  return 1;
862  }
863 };
864 
866 class DynamicKernelStringArgument : public VectorRef
867 {
868 public:
869  DynamicKernelStringArgument( const ScCalcConfig& config, const std::string& s,
870  const FormulaTreeNodeRef& ft, int index = 0 ) :
871  VectorRef(config, s, ft, index) { }
872 
873  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
875  virtual void GenDecl( std::stringstream& ss ) const override
876  {
877  ss << "__global unsigned int *" << mSymName;
878  }
879  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
880  {
881  DynamicKernelStringArgument::GenDecl(ss);
882  }
883  virtual size_t Marshal( cl_kernel, int, int, cl_program ) override;
884 };
885 
886 }
887 
889 size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
890 {
891  OpenCLZone zone;
892  FormulaToken* ref = mFormulaTree->GetFormulaToken();
893 
896  cl_int err;
898  size_t nStrings = 0;
899  if (ref->GetType() == formula::svSingleVectorRef)
900  {
901  const formula::SingleVectorRefToken* pSVR =
902  static_cast<const formula::SingleVectorRefToken*>(ref);
903  nStrings = pSVR->GetArrayLength();
904  vRef = pSVR->GetArray();
905  }
906  else if (ref->GetType() == formula::svDoubleVectorRef)
907  {
908  const formula::DoubleVectorRefToken* pDVR =
909  static_cast<const formula::DoubleVectorRefToken*>(ref);
910  nStrings = pDVR->GetArrayLength();
911  vRef = pDVR->GetArrays()[mnIndex];
912  }
913  size_t szHostBuffer = nStrings * sizeof(cl_int);
914  cl_uint* pHashBuffer = nullptr;
915 
916  if (vRef.mpStringArray != nullptr)
917  {
918  // Marshal strings. Right now we pass hashes of these string
919  mpClmem = clCreateBuffer(kEnv.mpkContext,
920  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
921  szHostBuffer, nullptr, &err);
922  if (CL_SUCCESS != err)
923  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
924  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
925 
926  pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
927  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
928  szHostBuffer, 0, nullptr, nullptr, &err));
929  if (CL_SUCCESS != err)
930  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
931 
932  for (size_t i = 0; i < nStrings; i++)
933  {
934  if (vRef.mpStringArray[i])
935  {
936  const OUString tmp(vRef.mpStringArray[i]);
937  pHashBuffer[i] = tmp.hashCode();
938  }
939  else
940  {
941  pHashBuffer[i] = 0;
942  }
943  }
944  }
945  else
946  {
947  if (nStrings == 0)
948  szHostBuffer = sizeof(cl_int); // a dummy small value
949  // Marshal as a buffer of NANs
950  mpClmem = clCreateBuffer(kEnv.mpkContext,
951  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
952  szHostBuffer, nullptr, &err);
953  if (CL_SUCCESS != err)
954  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
955  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
956 
957  pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
958  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
959  szHostBuffer, 0, nullptr, nullptr, &err));
960  if (CL_SUCCESS != err)
961  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
962 
963  for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
964  pHashBuffer[i] = 0;
965  }
966  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
967  pHashBuffer, 0, nullptr, nullptr);
968  if (CL_SUCCESS != err)
969  throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
970 
971  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref,mnIndex) << ")");
972  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
973  if (CL_SUCCESS != err)
974  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
975  return 1;
976 }
977 
978 namespace {
979 
981 class DynamicKernelMixedArgument : public VectorRef
982 {
983 public:
984  DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
985  const FormulaTreeNodeRef& ft ) :
986  VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
987  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
988  {
989  VectorRef::GenSlidingWindowDecl(ss);
990  ss << ", ";
991  mStringArgument.GenSlidingWindowDecl(ss);
992  }
993  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
995  virtual void GenDecl( std::stringstream& ss ) const override
996  {
997  VectorRef::GenDecl(ss);
998  ss << ", ";
999  mStringArgument.GenDecl(ss);
1000  }
1001  virtual void GenDeclRef( std::stringstream& ss ) const override
1002  {
1003  VectorRef::GenDeclRef(ss);
1004  ss << ",";
1005  mStringArgument.GenDeclRef(ss);
1006  }
1007  virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
1008  {
1009  std::stringstream ss;
1010  ss << "(!isnan(" << VectorRef::GenSlidingWindowDeclRef();
1011  ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
1012  ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1013  ss << ")";
1014  return ss.str();
1015  }
1016  virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
1017  {
1018  std::stringstream ss;
1019  ss << VectorRef::GenSlidingWindowDeclRef();
1020  return ss.str();
1021  }
1022  virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
1023  {
1024  std::stringstream ss;
1025  ss << mStringArgument.GenSlidingWindowDeclRef();
1026  return ss.str();
1027  }
1028  virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
1029  {
1030  int i = VectorRef::Marshal(k, argno, vw, p);
1031  i += mStringArgument.Marshal(k, argno + i, vw, p);
1032  return i;
1033  }
1034 
1035 protected:
1036  DynamicKernelStringArgument mStringArgument;
1037 };
1038 
1042 template<class Base>
1043 class DynamicKernelSlidingArgument : public Base
1044 {
1045 public:
1046  DynamicKernelSlidingArgument(const ScCalcConfig& config, const std::string& s,
1047  const FormulaTreeNodeRef& ft,
1048  const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
1049  : Base(config, s, ft, index)
1050  , mpCodeGen(CodeGen)
1051  {
1052  FormulaToken* t = ft->GetFormulaToken();
1053  if (t->GetType() != formula::svDoubleVectorRef)
1054  throw Unhandled(__FILE__, __LINE__);
1055  mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1058  }
1059 
1060  // Should only be called by SumIfs. Yikes!
1061  virtual bool NeedParallelReduction() const
1062  {
1063  assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
1064  return GetWindowSize() > 100 &&
1065  ((GetStartFixed() && GetEndFixed()) ||
1066  (!GetStartFixed() && !GetEndFixed()));
1067  }
1068 
1069  virtual void GenSlidingWindowFunction( std::stringstream& ) { }
1070 
1071  std::string GenSlidingWindowDeclRef( bool nested = false ) const
1072  {
1073  size_t nArrayLength = mpDVR->GetArrayLength();
1074  std::stringstream ss;
1075  if (!bIsStartFixed && !bIsEndFixed)
1076  {
1077  if (nested)
1078  ss << "((i+gid0) <" << nArrayLength << "?";
1079  ss << Base::GetName() << "[i + gid0]";
1080  if (nested)
1081  ss << ":NAN)";
1082  }
1083  else
1084  {
1085  if (nested)
1086  ss << "(i <" << nArrayLength << "?";
1087  ss << Base::GetName() << "[i]";
1088  if (nested)
1089  ss << ":NAN)";
1090  }
1091  return ss.str();
1092  }
1094  size_t GenReductionLoopHeader(
1095  std::stringstream& ss, bool& needBody )
1096  {
1097  assert(mpDVR);
1098  size_t nCurWindowSize = mpDVR->GetRefRowSize();
1099 
1100  {
1101  if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1102  {
1103  ss << "for (int i = ";
1104  ss << "gid0; i < " << mpDVR->GetArrayLength();
1105  ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
1106  needBody = true;
1107  return nCurWindowSize;
1108  }
1109  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1110  {
1111  ss << "for (int i = ";
1112  ss << "0; i < " << mpDVR->GetArrayLength();
1113  ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
1114  needBody = true;
1115  return nCurWindowSize;
1116  }
1117  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1118  {
1119  ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1120  ss << "{int i;\n\t";
1121  std::stringstream temp1, temp2;
1122  int outLoopSize = UNROLLING_FACTOR;
1123  if (nCurWindowSize / outLoopSize != 0)
1124  {
1125  ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1126  for (int count = 0; count < outLoopSize; count++)
1127  {
1128  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1129  if (count == 0)
1130  {
1131  temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1132  temp1 << "){\n\t\t";
1133  temp1 << "tmp = legalize(";
1134  temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1135  temp1 << ", tmp);\n\t\t\t";
1136  temp1 << "}\n\t";
1137  }
1138  ss << temp1.str();
1139  }
1140  ss << "}\n\t";
1141  }
1142  // The residual of mod outLoopSize
1143  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1144  {
1145  ss << "i = " << count << ";\n\t";
1146  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1147  {
1148  temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1149  temp2 << "){\n\t\t";
1150  temp2 << "tmp = legalize(";
1151  temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1152  temp2 << ", tmp);\n\t\t\t";
1153  temp2 << "}\n\t";
1154  }
1155  ss << temp2.str();
1156  }
1157  ss << "}\n";
1158  needBody = false;
1159  return nCurWindowSize;
1160  }
1161  // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1162  else
1163  {
1164  ss << "\n\t";
1165  ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1166  ss << "{int i;\n\t";
1167  std::stringstream temp1, temp2;
1168  int outLoopSize = UNROLLING_FACTOR;
1169  if (nCurWindowSize / outLoopSize != 0)
1170  {
1171  ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1172  for (int count = 0; count < outLoopSize; count++)
1173  {
1174  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1175  if (count == 0)
1176  {
1177  temp1 << "if(i < " << mpDVR->GetArrayLength();
1178  temp1 << "){\n\t\t";
1179  temp1 << "tmp = legalize(";
1180  temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1181  temp1 << ", tmp);\n\t\t\t";
1182  temp1 << "}\n\t";
1183  }
1184  ss << temp1.str();
1185  }
1186  ss << "}\n\t";
1187  }
1188  // The residual of mod outLoopSize
1189  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1190  {
1191  ss << "i = " << count << ";\n\t";
1192  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1193  {
1194  temp2 << "if(i < " << mpDVR->GetArrayLength();
1195  temp2 << "){\n\t\t";
1196  temp2 << "tmp = legalize(";
1197  temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1198  temp2 << ", tmp);\n\t\t\t";
1199  temp2 << "}\n\t";
1200  }
1201  ss << temp2.str();
1202  }
1203  ss << "}\n";
1204  needBody = false;
1205  return nCurWindowSize;
1206  }
1207  }
1208  }
1209 
1210  size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1211 
1212  size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1213 
1214  bool GetStartFixed() const { return bIsStartFixed; }
1215 
1216  bool GetEndFixed() const { return bIsEndFixed; }
1217 
1218 protected:
1221  // from parent nodes
1222  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
1223 };
1224 
1226 class DynamicKernelMixedSlidingArgument : public VectorRef
1227 {
1228 public:
1229  DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
1230  const FormulaTreeNodeRef& ft, const std::shared_ptr<SlidingFunctionBase>& CodeGen,
1231  int index ) :
1232  VectorRef(config, s, ft),
1233  mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
1234  mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
1235  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
1236  {
1237  mDoubleArgument.GenSlidingWindowDecl(ss);
1238  ss << ", ";
1239  mStringArgument.GenSlidingWindowDecl(ss);
1240  }
1241  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
1243  virtual void GenDecl( std::stringstream& ss ) const override
1244  {
1245  mDoubleArgument.GenDecl(ss);
1246  ss << ", ";
1247  mStringArgument.GenDecl(ss);
1248  }
1249  virtual void GenDeclRef( std::stringstream& ss ) const override
1250  {
1251  mDoubleArgument.GenDeclRef(ss);
1252  ss << ",";
1253  mStringArgument.GenDeclRef(ss);
1254  }
1255  virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
1256  {
1257  std::stringstream ss;
1258  ss << "(!isnan(" << mDoubleArgument.GenSlidingWindowDeclRef();
1259  ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
1260  ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1261  ss << ")";
1262  return ss.str();
1263  }
1264  virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
1265  {
1266  std::stringstream ss;
1267  ss << mDoubleArgument.GenSlidingWindowDeclRef();
1268  return ss.str();
1269  }
1270  virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
1271  {
1272  std::stringstream ss;
1273  ss << mStringArgument.GenSlidingWindowDeclRef();
1274  return ss.str();
1275  }
1276  virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
1277  {
1278  int i = mDoubleArgument.Marshal(k, argno, vw, p);
1279  i += mStringArgument.Marshal(k, argno + i, vw, p);
1280  return i;
1281  }
1282 
1283 protected:
1284  DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
1285  DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
1286 };
1287 
1289 class SymbolTable
1290 {
1291 public:
1292  typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1293  // This avoids instability caused by using pointer as the key type
1294  SymbolTable() : mCurId(0) { }
1295  template <class T>
1296  const DynamicKernelArgument* DeclRefArg(const ScCalcConfig& config, const FormulaTreeNodeRef&,
1297  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize);
1299  void DumpSlidingWindowFunctions( std::stringstream& ss )
1300  {
1301  for (auto const& argument : mParams)
1302  {
1303  argument->GenSlidingWindowFunction(ss);
1304  ss << "\n";
1305  }
1306  }
1309  void Marshal( cl_kernel, int, cl_program );
1310 
1311 private:
1312  unsigned int mCurId;
1313  ArgumentMap mSymbols;
1314  std::vector<DynamicKernelArgumentRef> mParams;
1315 };
1316 
1317 }
1318 
1319 void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
1320 {
1321  int i = 1; //The first argument is reserved for results
1322  for (auto const& argument : mParams)
1323  {
1324  i += argument->Marshal(k, i, nVectorWidth, pProgram);
1325  }
1326 }
1327 
1328 namespace {
1329 
1332 template<class Base>
1333 class ParallelReductionVectorRef : public Base
1334 {
1335 public:
1336  ParallelReductionVectorRef(const ScCalcConfig& config, const std::string& s,
1337  const FormulaTreeNodeRef& ft,
1338  const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
1339  : Base(config, s, ft, index)
1340  , mpCodeGen(CodeGen)
1341  , mpClmem2(nullptr)
1342  {
1343  FormulaToken* t = ft->GetFormulaToken();
1344  if (t->GetType() != formula::svDoubleVectorRef)
1345  throw Unhandled(__FILE__, __LINE__);
1346  mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1349  }
1350 
1352  virtual void GenSlidingWindowFunction( std::stringstream& ss );
1353 
1354  virtual std::string GenSlidingWindowDeclRef( bool ) const
1355  {
1356  std::stringstream ss;
1357  if (!bIsStartFixed && !bIsEndFixed)
1358  ss << Base::GetName() << "[i + gid0]";
1359  else
1360  ss << Base::GetName() << "[i]";
1361  return ss.str();
1362  }
1363 
1365  size_t GenReductionLoopHeader(
1366  std::stringstream& ss, int nResultSize, bool& needBody );
1367 
1368  virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram );
1369 
1370  ~ParallelReductionVectorRef()
1371  {
1372  if (mpClmem2)
1373  {
1374  cl_int err;
1375  err = clReleaseMemObject(mpClmem2);
1376  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
1377  mpClmem2 = nullptr;
1378  }
1379  }
1380 
1381  size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1382 
1383  size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1384 
1385  bool GetStartFixed() const { return bIsStartFixed; }
1386 
1387  bool GetEndFixed() const { return bIsEndFixed; }
1388 
1389 protected:
1390  bool bIsStartFixed, bIsEndFixed;
1392  // from parent nodes
1393  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
1394  // controls whether to invoke the reduction kernel during marshaling or not
1395  cl_mem mpClmem2;
1396 };
1397 
1398 class Reduction : public SlidingFunctionBase
1399 {
1401 public:
1402  explicit Reduction(int nResultSize) : mnResultSize(nResultSize) {}
1403 
1404  typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1405  typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1406  typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1407 
1408  virtual bool HandleNaNArgument( std::stringstream&, unsigned, SubArguments& ) const
1409  {
1410  return false;
1411  }
1412 
1413  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1414  const std::string& sSymName, SubArguments& vSubArguments ) override
1415  {
1416  ss << "\ndouble " << sSymName;
1417  ss << "_" << BinFuncName() << "(";
1418  for (size_t i = 0; i < vSubArguments.size(); i++)
1419  {
1420  if (i)
1421  ss << ", ";
1422  vSubArguments[i]->GenSlidingWindowDecl(ss);
1423  }
1424  ss << ") {\n";
1425  ss << "double tmp = " << GetBottom() << ";\n";
1426  ss << "int gid0 = get_global_id(0);\n";
1427  if (isAverage() || isMinOrMax())
1428  ss << "int nCount = 0;\n";
1429  ss << "double tmpBottom;\n";
1430  unsigned i = vSubArguments.size();
1431  while (i--)
1432  {
1433  if (NumericRange* NR =
1434  dynamic_cast<NumericRange*>(vSubArguments[i].get()))
1435  {
1436  bool needBody;
1437  NR->GenReductionLoopHeader(ss, needBody);
1438  if (!needBody)
1439  continue;
1440  }
1441  else if (ParallelNumericRange* PNR =
1442  dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
1443  {
1444  //did not handle yet
1445  bool bNeedBody = false;
1446  PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
1447  if (!bNeedBody)
1448  continue;
1449  }
1450  else if (StringRange* SR =
1451  dynamic_cast<StringRange*>(vSubArguments[i].get()))
1452  {
1453  //did not handle yet
1454  bool needBody;
1455  SR->GenReductionLoopHeader(ss, needBody);
1456  if (!needBody)
1457  continue;
1458  }
1459  else
1460  {
1461  FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1462  assert(pCur);
1464 
1465  if (pCur->GetType() == formula::svSingleVectorRef ||
1466  pCur->GetType() == formula::svDouble)
1467  {
1468  ss << "{\n";
1469  }
1470  }
1471  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1472  {
1473  bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
1474 
1475  ss << "tmpBottom = " << GetBottom() << ";\n";
1476 
1477  if (!bNanHandled)
1478  {
1479  ss << "if (isnan(";
1480  ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1481  ss << "))\n";
1482  if (ZeroReturnZero())
1483  ss << " return 0;\n";
1484  else
1485  {
1486  ss << " tmp = ";
1487  ss << Gen2("tmpBottom", "tmp") << ";\n";
1488  }
1489  ss << "else\n";
1490  }
1491  ss << "{";
1492  ss << " tmp = ";
1493  ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1494  ss << ";\n";
1495  ss << " }\n";
1496  ss << "}\n";
1497  }
1498  else
1499  {
1500  ss << "tmp = ";
1501  ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1502  ss << ";\n";
1503  }
1504  }
1505  if (isAverage())
1506  ss <<
1507  "if (nCount==0)\n"
1508  " return CreateDoubleError(DivisionByZero);\n";
1509  else if (isMinOrMax())
1510  ss <<
1511  "if (nCount==0)\n"
1512  " return 0;\n";
1513  ss << "return tmp";
1514  if (isAverage())
1515  ss << "*pow((double)nCount,-1.0)";
1516  ss << ";\n}";
1517  }
1518  virtual bool isAverage() const { return false; }
1519  virtual bool isMinOrMax() const { return false; }
1520  virtual bool takeString() const override { return false; }
1521  virtual bool takeNumeric() const override { return true; }
1522 };
1523 
1524 // Strictly binary operators
1525 class Binary : public SlidingFunctionBase
1526 {
1527 public:
1528  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1529  const std::string& sSymName, SubArguments& vSubArguments ) override
1530  {
1531  ss << "\ndouble " << sSymName;
1532  ss << "_" << BinFuncName() << "(";
1533  assert(vSubArguments.size() == 2);
1534  for (size_t i = 0; i < vSubArguments.size(); i++)
1535  {
1536  if (i)
1537  ss << ", ";
1538  vSubArguments[i]->GenSlidingWindowDecl(ss);
1539  }
1540  ss << ") {\n\t";
1541  ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1542  ss << "double tmp = ";
1543  ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(),
1544  vSubArguments[1]->GenSlidingWindowDeclRef()) << ";\n\t";
1545  ss << "return tmp;\n}";
1546  }
1547  virtual bool takeString() const override { return true; }
1548  virtual bool takeNumeric() const override { return true; }
1549 };
1550 
1551 class SumOfProduct : public SlidingFunctionBase
1552 {
1553 public:
1554  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1555  const std::string& sSymName, SubArguments& vSubArguments ) override
1556  {
1557  size_t nCurWindowSize = 0;
1558  FormulaToken* tmpCur = nullptr;
1559  const formula::DoubleVectorRefToken* pCurDVR = nullptr;
1560  ss << "\ndouble " << sSymName;
1561  ss << "_" << BinFuncName() << "(";
1562  for (size_t i = 0; i < vSubArguments.size(); i++)
1563  {
1564  if (i)
1565  ss << ",";
1566  vSubArguments[i]->GenSlidingWindowDecl(ss);
1567  size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1568  nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1569  nCurChildWindowSize : nCurWindowSize;
1570  tmpCur = vSubArguments[i]->GetFormulaToken();
1571  if (ocPush == tmpCur->GetOpCode())
1572  {
1573 
1574  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1575  if (pCurDVR->IsStartFixed() != pCurDVR->IsEndFixed())
1576  throw Unhandled(__FILE__, __LINE__);
1577  }
1578  }
1579  ss << ") {\n";
1580  ss << " double tmp = 0.0;\n";
1581  ss << " int gid0 = get_global_id(0);\n";
1582 
1583  ss << "\tint i;\n\t";
1584  ss << "int currentCount0;\n";
1585  for (size_t i = 0; i < vSubArguments.size() - 1; i++)
1586  ss << "int currentCount" << i + 1 << ";\n";
1587  std::stringstream temp3, temp4;
1588  int outLoopSize = UNROLLING_FACTOR;
1589  if (nCurWindowSize / outLoopSize != 0)
1590  {
1591  ss << "for(int outLoop=0; outLoop<" <<
1592  nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1593  for (int count = 0; count < outLoopSize; count++)
1594  {
1595  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
1596  if (count == 0)
1597  {
1598  for (size_t i = 0; i < vSubArguments.size(); i++)
1599  {
1600  tmpCur = vSubArguments[i]->GetFormulaToken();
1601  if (ocPush == tmpCur->GetOpCode())
1602  {
1603  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1604  if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1605  {
1606  temp3 << " currentCount";
1607  temp3 << i;
1608  temp3 << " =i+gid0+1;\n";
1609  }
1610  else
1611  {
1612  temp3 << " currentCount";
1613  temp3 << i;
1614  temp3 << " =i+1;\n";
1615  }
1616  }
1617  }
1618 
1619  temp3 << "tmp = fsum(";
1620  for (size_t i = 0; i < vSubArguments.size(); i++)
1621  {
1622  if (i)
1623  temp3 << "*";
1624  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1625  {
1626  temp3 << "(";
1627  temp3 << "(currentCount";
1628  temp3 << i;
1629  temp3 << ">";
1630  if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1632  {
1633  const formula::SingleVectorRefToken* pSVR =
1634  static_cast<const formula::SingleVectorRefToken*>
1635  (vSubArguments[i]->GetFormulaToken());
1636  temp3 << pSVR->GetArrayLength();
1637  temp3 << ")||isnan(" << vSubArguments[i]
1638  ->GenSlidingWindowDeclRef();
1639  temp3 << ")?0:";
1640  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1641  temp3 << ")";
1642  }
1643  else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1645  {
1646  const formula::DoubleVectorRefToken* pSVR =
1647  static_cast<const formula::DoubleVectorRefToken*>
1648  (vSubArguments[i]->GetFormulaToken());
1649  temp3 << pSVR->GetArrayLength();
1650  temp3 << ")||isnan(" << vSubArguments[i]
1651  ->GenSlidingWindowDeclRef(true);
1652  temp3 << ")?0:";
1653  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1654  temp3 << ")";
1655  }
1656 
1657  }
1658  else
1659  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1660  }
1661  temp3 << ", tmp);\n\t";
1662  }
1663  ss << temp3.str();
1664  }
1665  ss << "}\n\t";
1666  }
1667  //The residual of mod outLoopSize
1668  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize;
1669  count < nCurWindowSize; count++)
1670  {
1671  ss << "i =" << count << ";\n";
1672  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1673  {
1674  for (size_t i = 0; i < vSubArguments.size(); i++)
1675  {
1676  tmpCur = vSubArguments[i]->GetFormulaToken();
1677  if (ocPush == tmpCur->GetOpCode())
1678  {
1679  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1680  if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1681  {
1682  temp4 << " currentCount";
1683  temp4 << i;
1684  temp4 << " =i+gid0+1;\n";
1685  }
1686  else
1687  {
1688  temp4 << " currentCount";
1689  temp4 << i;
1690  temp4 << " =i+1;\n";
1691  }
1692  }
1693  }
1694 
1695  temp4 << "tmp = fsum(";
1696  for (size_t i = 0; i < vSubArguments.size(); i++)
1697  {
1698  if (i)
1699  temp4 << "*";
1700  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1701  {
1702  temp4 << "(";
1703  temp4 << "(currentCount";
1704  temp4 << i;
1705  temp4 << ">";
1706  if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1708  {
1709  const formula::SingleVectorRefToken* pSVR =
1710  static_cast<const formula::SingleVectorRefToken*>
1711  (vSubArguments[i]->GetFormulaToken());
1712  temp4 << pSVR->GetArrayLength();
1713  temp4 << ")||isnan(" << vSubArguments[i]
1714  ->GenSlidingWindowDeclRef();
1715  temp4 << ")?0:";
1716  temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
1717  temp4 << ")";
1718  }
1719  else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1721  {
1722  const formula::DoubleVectorRefToken* pSVR =
1723  static_cast<const formula::DoubleVectorRefToken*>
1724  (vSubArguments[i]->GetFormulaToken());
1725  temp4 << pSVR->GetArrayLength();
1726  temp4 << ")||isnan(" << vSubArguments[i]
1727  ->GenSlidingWindowDeclRef(true);
1728  temp4 << ")?0:";
1729  temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1730  temp4 << ")";
1731  }
1732 
1733  }
1734  else
1735  {
1736  temp4 << vSubArguments[i]
1737  ->GenSlidingWindowDeclRef(true);
1738  }
1739  }
1740  temp4 << ", tmp);\n\t";
1741  }
1742  ss << temp4.str();
1743  }
1744  ss << "return tmp;\n";
1745  ss << "}";
1746  }
1747  virtual bool takeString() const override { return false; }
1748  virtual bool takeNumeric() const override { return true; }
1749 };
1750 
1752 class OpNop : public Reduction
1753 {
1754 public:
1755  explicit OpNop(int nResultSize) : Reduction(nResultSize) {}
1756 
1757  virtual std::string GetBottom() override { return "0"; }
1758  virtual std::string Gen2( const std::string& lhs, const std::string& ) const override
1759  {
1760  return lhs;
1761  }
1762  virtual std::string BinFuncName() const override { return "nop"; }
1763 };
1764 
1765 class OpCount : public Reduction
1766 {
1767 public:
1768  explicit OpCount(int nResultSize) : Reduction(nResultSize) {}
1769 
1770  virtual std::string GetBottom() override { return "0"; }
1771  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1772  {
1773  std::stringstream ss;
1774  ss << "(isnan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
1775  return ss.str();
1776  }
1777  virtual std::string BinFuncName() const override { return "fcount"; }
1778  virtual bool canHandleMultiVector() const override { return true; }
1779 };
1780 
1781 class OpEqual : public Binary
1782 {
1783 public:
1784  virtual std::string GetBottom() override { return "0"; }
1785  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1786  {
1787  std::stringstream ss;
1788  ss << "strequal(" << lhs << "," << rhs << ")";
1789  return ss.str();
1790  }
1791  virtual std::string BinFuncName() const override { return "eq"; }
1792 };
1793 
1794 class OpLessEqual : public Binary
1795 {
1796 public:
1797  virtual std::string GetBottom() override { return "0"; }
1798  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1799  {
1800  std::stringstream ss;
1801  ss << "(" << lhs << "<=" << rhs << ")";
1802  return ss.str();
1803  }
1804  virtual std::string BinFuncName() const override { return "leq"; }
1805 };
1806 
1807 class OpLess : public Binary
1808 {
1809 public:
1810  virtual std::string GetBottom() override { return "0"; }
1811  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1812  {
1813  std::stringstream ss;
1814  ss << "(" << lhs << "<" << rhs << ")";
1815  return ss.str();
1816  }
1817  virtual std::string BinFuncName() const override { return "less"; }
1818 };
1819 
1820 class OpGreater : public Binary
1821 {
1822 public:
1823  virtual std::string GetBottom() override { return "0"; }
1824  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1825  {
1826  std::stringstream ss;
1827  ss << "(" << lhs << ">" << rhs << ")";
1828  return ss.str();
1829  }
1830  virtual std::string BinFuncName() const override { return "gt"; }
1831 };
1832 
1833 class OpSum : public Reduction
1834 {
1835 public:
1836  explicit OpSum(int nResultSize) : Reduction(nResultSize) {}
1837 
1838  virtual std::string GetBottom() override { return "0"; }
1839  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1840  {
1841  std::stringstream ss;
1842  ss << "fsum_approx((" << lhs << "),(" << rhs << "))";
1843  return ss.str();
1844  }
1845  virtual std::string BinFuncName() const override { return "fsum"; }
1846  // All arguments are simply summed, so it doesn't matter if SvDoubleVector is split.
1847  virtual bool canHandleMultiVector() const override { return true; }
1848 };
1849 
1850 class OpAverage : public Reduction
1851 {
1852 public:
1853  explicit OpAverage(int nResultSize) : Reduction(nResultSize) {}
1854 
1855  virtual std::string GetBottom() override { return "0"; }
1856  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1857  {
1858  std::stringstream ss;
1859  ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
1860  return ss.str();
1861  }
1862  virtual std::string BinFuncName() const override { return "average"; }
1863  virtual bool isAverage() const override { return true; }
1864  virtual bool canHandleMultiVector() const override { return true; }
1865 };
1866 
1867 class OpSub : public Reduction
1868 {
1869 public:
1870  explicit OpSub(int nResultSize) : Reduction(nResultSize) {}
1871 
1872  virtual std::string GetBottom() override { return "0"; }
1873  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1874  {
1875  return "fsub_approx(" + lhs + "," + rhs + ")";
1876  }
1877  virtual std::string BinFuncName() const override { return "fsub"; }
1878 };
1879 
1880 class OpMul : public Reduction
1881 {
1882 public:
1883  explicit OpMul(int nResultSize) : Reduction(nResultSize) {}
1884 
1885  virtual std::string GetBottom() override { return "1"; }
1886  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1887  {
1888  return lhs + "*" + rhs;
1889  }
1890  virtual std::string BinFuncName() const override { return "fmul"; }
1891  virtual bool ZeroReturnZero() override { return true; }
1892 };
1893 
1895 class OpDiv : public Reduction
1896 {
1897 public:
1898  explicit OpDiv(int nResultSize) : Reduction(nResultSize) {}
1899 
1900  virtual std::string GetBottom() override { return "1.0"; }
1901  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1902  {
1903  return "(" + rhs + "==0 ? CreateDoubleError(DivisionByZero) : (" + lhs + "/" + rhs + ") )";
1904  }
1905  virtual std::string BinFuncName() const override { return "fdiv"; }
1906 
1907  virtual bool HandleNaNArgument( std::stringstream& ss, unsigned argno, SubArguments& vSubArguments ) const override
1908  {
1909  if (argno == 1)
1910  {
1911  ss <<
1912  "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
1913  " return CreateDoubleError(DivisionByZero);\n"
1914  "}\n";
1915  return true;
1916  }
1917  else if (argno == 0)
1918  {
1919  ss <<
1920  "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") &&\n"
1921  " !(isnan(" << vSubArguments[1]->GenSlidingWindowDeclRef() << ") || " << vSubArguments[1]->GenSlidingWindowDeclRef() << " == 0)) {\n"
1922  " return 0;\n"
1923  "}\n";
1924  }
1925  return false;
1926  }
1927 
1928 };
1929 
1930 class OpMin : public Reduction
1931 {
1932 public:
1933  explicit OpMin(int nResultSize) : Reduction(nResultSize) {}
1934 
1935  virtual std::string GetBottom() override { return "NAN"; }
1936  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1937  {
1938  return "fmin_count(" + lhs + "," + rhs + ", &nCount)";
1939  }
1940  virtual std::string BinFuncName() const override { return "min"; }
1941  virtual bool isMinOrMax() const override { return true; }
1942  virtual bool canHandleMultiVector() const override { return true; }
1943 };
1944 
1945 class OpMax : public Reduction
1946 {
1947 public:
1948  explicit OpMax(int nResultSize) : Reduction(nResultSize) {}
1949 
1950  virtual std::string GetBottom() override { return "NAN"; }
1951  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1952  {
1953  return "fmax_count(" + lhs + "," + rhs + ", &nCount)";
1954  }
1955  virtual std::string BinFuncName() const override { return "max"; }
1956  virtual bool isMinOrMax() const override { return true; }
1957  virtual bool canHandleMultiVector() const override { return true; }
1958 };
1959 
1960 class OpSumProduct : public SumOfProduct
1961 {
1962 public:
1963  virtual std::string GetBottom() override { return "0"; }
1964  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1965  {
1966  return lhs + "*" + rhs;
1967  }
1968  virtual std::string BinFuncName() const override { return "fsop"; }
1969 };
1970 
1971 template<class Base>
1972 void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
1973 {
1974  if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1975  {
1976  std::string name = Base::GetName();
1977  ss << "__kernel void " << name;
1978  ss << "_reduction(__global double* A, "
1979  "__global double *result,int arrayLength,int windowSize){\n";
1980  ss << " double tmp, current_result =" <<
1981  mpCodeGen->GetBottom();
1982  ss << ";\n";
1983  ss << " int writePos = get_group_id(1);\n";
1984  ss << " int lidx = get_local_id(0);\n";
1985  ss << " __local double shm_buf[256];\n";
1986  if (mpDVR->IsStartFixed())
1987  ss << " int offset = 0;\n";
1988  else // if (!mpDVR->IsStartFixed())
1989  ss << " int offset = get_group_id(1);\n";
1990  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1991  ss << " int end = windowSize;\n";
1992  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1993  ss << " int end = offset + windowSize;\n";
1994  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1995  ss << " int end = windowSize + get_group_id(1);\n";
1996  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1997  ss << " int end = windowSize;\n";
1998  ss << " end = min(end, arrayLength);\n";
1999 
2000  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2001  ss << " int loop = arrayLength/512 + 1;\n";
2002  ss << " for (int l=0; l<loop; l++){\n";
2003  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2004  ss << " int loopOffset = l*512;\n";
2005  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2006  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2007  "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
2008  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2009  "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
2010  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2011  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2012  "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
2013  ss << " shm_buf[lidx] = tmp;\n";
2014  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2015  ss << " for (int i = 128; i >0; i/=2) {\n";
2016  ss << " if (lidx < i)\n";
2017  ss << " shm_buf[lidx] = ";
2018  // Special case count
2019  if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2020  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2021  else
2022  ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
2023  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2024  ss << " }\n";
2025  ss << " if (lidx == 0)\n";
2026  ss << " current_result =";
2027  if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2028  ss << "current_result + shm_buf[0]";
2029  else
2030  ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
2031  ss << ";\n";
2032  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2033  ss << " }\n";
2034  ss << " if (lidx == 0)\n";
2035  ss << " result[writePos] = current_result;\n";
2036  ss << "}\n";
2037  }
2038  else
2039  {
2040  std::string name = Base::GetName();
2041  /*sum reduction*/
2042  ss << "__kernel void " << name << "_sum";
2043  ss << "_reduction(__global double* A, "
2044  "__global double *result,int arrayLength,int windowSize){\n";
2045  ss << " double tmp, current_result =" <<
2046  mpCodeGen->GetBottom();
2047  ss << ";\n";
2048  ss << " int writePos = get_group_id(1);\n";
2049  ss << " int lidx = get_local_id(0);\n";
2050  ss << " __local double shm_buf[256];\n";
2051  if (mpDVR->IsStartFixed())
2052  ss << " int offset = 0;\n";
2053  else // if (!mpDVR->IsStartFixed())
2054  ss << " int offset = get_group_id(1);\n";
2055  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2056  ss << " int end = windowSize;\n";
2057  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2058  ss << " int end = offset + windowSize;\n";
2059  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2060  ss << " int end = windowSize + get_group_id(1);\n";
2061  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2062  ss << " int end = windowSize;\n";
2063  ss << " end = min(end, arrayLength);\n";
2064  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2065  ss << " int loop = arrayLength/512 + 1;\n";
2066  ss << " for (int l=0; l<loop; l++){\n";
2067  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2068  ss << " int loopOffset = l*512;\n";
2069  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2070  ss << " tmp = legalize(";
2071  ss << "(A[loopOffset + lidx + offset]+ tmp)";
2072  ss << ", tmp);\n";
2073  ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
2074  ss << ", tmp);\n";
2075  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2076  ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
2077  ss << ", tmp);\n";
2078  ss << " shm_buf[lidx] = tmp;\n";
2079  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2080  ss << " for (int i = 128; i >0; i/=2) {\n";
2081  ss << " if (lidx < i)\n";
2082  ss << " shm_buf[lidx] = ";
2083  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2084  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2085  ss << " }\n";
2086  ss << " if (lidx == 0)\n";
2087  ss << " current_result =";
2088  ss << "current_result + shm_buf[0]";
2089  ss << ";\n";
2090  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2091  ss << " }\n";
2092  ss << " if (lidx == 0)\n";
2093  ss << " result[writePos] = current_result;\n";
2094  ss << "}\n";
2095  /*count reduction*/
2096  ss << "__kernel void " << name << "_count";
2097  ss << "_reduction(__global double* A, "
2098  "__global double *result,int arrayLength,int windowSize){\n";
2099  ss << " double tmp, current_result =" <<
2100  mpCodeGen->GetBottom();
2101  ss << ";\n";
2102  ss << " int writePos = get_group_id(1);\n";
2103  ss << " int lidx = get_local_id(0);\n";
2104  ss << " __local double shm_buf[256];\n";
2105  if (mpDVR->IsStartFixed())
2106  ss << " int offset = 0;\n";
2107  else // if (!mpDVR->IsStartFixed())
2108  ss << " int offset = get_group_id(1);\n";
2109  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2110  ss << " int end = windowSize;\n";
2111  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2112  ss << " int end = offset + windowSize;\n";
2113  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2114  ss << " int end = windowSize + get_group_id(1);\n";
2115  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2116  ss << " int end = windowSize;\n";
2117  ss << " end = min(end, arrayLength);\n";
2118  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2119  ss << " int loop = arrayLength/512 + 1;\n";
2120  ss << " for (int l=0; l<loop; l++){\n";
2121  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2122  ss << " int loopOffset = l*512;\n";
2123  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2124  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2125  ss << ", tmp);\n";
2126  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
2127  ss << ", tmp);\n";
2128  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2129  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2130  ss << ", tmp);\n";
2131  ss << " shm_buf[lidx] = tmp;\n";
2132  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2133  ss << " for (int i = 128; i >0; i/=2) {\n";
2134  ss << " if (lidx < i)\n";
2135  ss << " shm_buf[lidx] = ";
2136  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2137  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2138  ss << " }\n";
2139  ss << " if (lidx == 0)\n";
2140  ss << " current_result =";
2141  ss << "current_result + shm_buf[0];";
2142  ss << ";\n";
2143  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2144  ss << " }\n";
2145  ss << " if (lidx == 0)\n";
2146  ss << " result[writePos] = current_result;\n";
2147  ss << "}\n";
2148  }
2149 
2150 }
2151 
2152 template<class Base>
2153 size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
2154  std::stringstream& ss, int nResultSize, bool& needBody )
2155 {
2156  assert(mpDVR);
2157  size_t nCurWindowSize = mpDVR->GetRefRowSize();
2158  std::string temp = Base::GetName() + "[gid0]";
2159  ss << "tmp = ";
2160  // Special case count
2161  if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
2162  {
2163  ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
2164  ss << "nCount = nCount-1;\n";
2165  ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
2166  ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
2167  }
2168  else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2169  ss << temp << "+ tmp";
2170  else
2171  ss << mpCodeGen->Gen2(temp, "tmp");
2172  ss << ";\n\t";
2173  needBody = false;
2174  return nCurWindowSize;
2175 }
2176 
2177 template<class Base>
2178 size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
2179 {
2180  assert(Base::mpClmem == nullptr);
2181 
2182  OpenCLZone zone;
2185  cl_int err;
2186  size_t nInput = mpDVR->GetArrayLength();
2187  size_t nCurWindowSize = mpDVR->GetRefRowSize();
2188  // create clmem buffer
2189  if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
2190  throw Unhandled(__FILE__, __LINE__);
2191  double* pHostBuffer = const_cast<double*>(
2192  mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
2193  size_t szHostBuffer = nInput * sizeof(double);
2194  Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
2195  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
2196  szHostBuffer,
2197  pHostBuffer, &err);
2198  SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
2199 
2200  mpClmem2 = clCreateBuffer(kEnv.mpkContext,
2201  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
2202  sizeof(double) * w, nullptr, nullptr);
2203  if (CL_SUCCESS != err)
2204  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2205  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
2206 
2207  // reproduce the reduction function name
2208  std::string kernelName;
2209  if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
2210  kernelName = Base::GetName() + "_reduction";
2211  else
2212  kernelName = Base::GetName() + "_sum_reduction";
2213  cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2214  if (err != CL_SUCCESS)
2215  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2216  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
2217 
2218  // set kernel arg of reduction kernel
2219  // TODO(Wei Wei): use unique name for kernel
2220  cl_mem buf = Base::GetCLBuffer();
2221  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
2222  err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
2223  static_cast<void*>(&buf));
2224  if (CL_SUCCESS != err)
2225  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2226 
2227  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
2228  err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
2229  if (CL_SUCCESS != err)
2230  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2231 
2232  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
2233  err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
2234  if (CL_SUCCESS != err)
2235  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2236 
2237  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
2238  err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2239  if (CL_SUCCESS != err)
2240  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2241 
2242  // set work group size and execute
2243  size_t global_work_size[] = { 256, static_cast<size_t>(w) };
2244  size_t const local_work_size[] = { 256, 1 };
2245  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2246  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2247  global_work_size, local_work_size, 0, nullptr, nullptr);
2248  if (CL_SUCCESS != err)
2249  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2250  err = clFinish(kEnv.mpkCmdQueue);
2251  if (CL_SUCCESS != err)
2252  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2253  if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
2254  {
2255  /*average need more reduction kernel for count computing*/
2256  std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
2257  double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
2258  mpClmem2,
2259  CL_TRUE, CL_MAP_READ, 0,
2260  sizeof(double) * w, 0, nullptr, nullptr,
2261  &err));
2262  if (err != CL_SUCCESS)
2263  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
2264 
2265  for (int i = 0; i < w; i++)
2266  pAllBuffer[i] = resbuf[i];
2267  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
2268  if (err != CL_SUCCESS)
2269  throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
2270 
2271  kernelName = Base::GetName() + "_count_reduction";
2272  redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2273  if (err != CL_SUCCESS)
2274  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2275  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
2276 
2277  // set kernel arg of reduction kernel
2278  buf = Base::GetCLBuffer();
2279  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
2280  err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
2281  static_cast<void*>(&buf));
2282  if (CL_SUCCESS != err)
2283  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2284 
2285  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
2286  err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
2287  if (CL_SUCCESS != err)
2288  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2289 
2290  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
2291  err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
2292  if (CL_SUCCESS != err)
2293  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2294 
2295  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
2296  err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2297  if (CL_SUCCESS != err)
2298  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2299 
2300  // set work group size and execute
2301  size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
2302  size_t const local_work_size1[] = { 256, 1 };
2303  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2304  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2305  global_work_size1, local_work_size1, 0, nullptr, nullptr);
2306  if (CL_SUCCESS != err)
2307  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2308  err = clFinish(kEnv.mpkCmdQueue);
2309  if (CL_SUCCESS != err)
2310  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2311  resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
2312  mpClmem2,
2313  CL_TRUE, CL_MAP_READ, 0,
2314  sizeof(double) * w, 0, nullptr, nullptr,
2315  &err));
2316  if (err != CL_SUCCESS)
2317  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
2318  for (int i = 0; i < w; i++)
2319  pAllBuffer[i + w] = resbuf[i];
2320  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
2321  // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
2322  if (CL_SUCCESS != err)
2323  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
2324  if (mpClmem2)
2325  {
2326  err = clReleaseMemObject(mpClmem2);
2327  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
2328  mpClmem2 = nullptr;
2329  }
2330  mpClmem2 = clCreateBuffer(kEnv.mpkContext,
2331  cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
2332  w * sizeof(double) * 2, pAllBuffer.get(), &err);
2333  if (CL_SUCCESS != err)
2334  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2335  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
2336  }
2337  // set kernel arg
2338  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2339  err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
2340  if (CL_SUCCESS != err)
2341  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2342  return 1;
2343 }
2344 
2345 struct SumIfsArgs
2346 {
2347  explicit SumIfsArgs(cl_mem x) : mCLMem(x), mConst(0.0) { }
2348  explicit SumIfsArgs(double x) : mCLMem(nullptr), mConst(x) { }
2349  cl_mem mCLMem;
2350  double mConst;
2351 };
2352 
2354 class DynamicKernelSoPArguments : public DynamicKernelArgument
2355 {
2356 public:
2357  typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
2358 
2359  DynamicKernelSoPArguments( const ScCalcConfig& config,
2360  const std::string& s, const FormulaTreeNodeRef& ft,
2361  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize );
2362 
2364  virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) override
2365  {
2366  OpenCLZone zone;
2367  unsigned i = 0;
2368  for (const auto& rxSubArgument : mvSubArguments)
2369  {
2370  i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram);
2371  }
2372  if (dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
2373  {
2376  cl_int err;
2377  cl_mem pClmem2;
2378 
2379  std::vector<cl_mem> vclmem;
2380  for (const auto& rxSubArgument : mvSubArguments)
2381  {
2382  if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2383  vclmem.push_back(VR->GetCLBuffer());
2384  else
2385  vclmem.push_back(nullptr);
2386  }
2387  pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2388  sizeof(double) * nVectorWidth, nullptr, &err);
2389  if (CL_SUCCESS != err)
2390  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2391  SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2392 
2393  std::string kernelName = "GeoMean_reduction";
2394  cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2395  if (err != CL_SUCCESS)
2396  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2397  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2398 
2399  // set kernel arg of reduction kernel
2400  for (size_t j = 0; j < vclmem.size(); j++)
2401  {
2402  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
2403  err = clSetKernelArg(redKernel, j,
2404  vclmem[j] ? sizeof(cl_mem) : sizeof(double),
2405  static_cast<void*>(&vclmem[j]));
2406  if (CL_SUCCESS != err)
2407  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2408  }
2409  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
2410  err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&pClmem2));
2411  if (CL_SUCCESS != err)
2412  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2413 
2414  // set work group size and execute
2415  size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
2416  size_t const local_work_size[] = { 256, 1 };
2417  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2418  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2419  global_work_size, local_work_size, 0, nullptr, nullptr);
2420  if (CL_SUCCESS != err)
2421  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2422  err = clFinish(kEnv.mpkCmdQueue);
2423  if (CL_SUCCESS != err)
2424  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2425 
2426  // Pass pClmem2 to the "real" kernel
2427  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
2428  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&pClmem2));
2429  if (CL_SUCCESS != err)
2430  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2431  }
2432  if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
2433  {
2436  cl_int err;
2437  DynamicKernelArgument* Arg = mvSubArguments[0].get();
2438  DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
2439  static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
2440  mpClmem2 = nullptr;
2441 
2442  if (OpSumCodeGen->NeedReductionKernel())
2443  {
2444  size_t nInput = slidingArgPtr->GetArrayLength();
2445  size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
2446  std::vector<SumIfsArgs> vclmem;
2447 
2448  for (const auto& rxSubArgument : mvSubArguments)
2449  {
2450  if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2451  vclmem.emplace_back(VR->GetCLBuffer());
2452  else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(rxSubArgument.get()))
2453  vclmem.emplace_back(CA->GetDouble());
2454  else
2455  vclmem.emplace_back(nullptr);
2456  }
2457  mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2458  sizeof(double) * nVectorWidth, nullptr, &err);
2459  if (CL_SUCCESS != err)
2460  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2461  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2462 
2463  std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
2464  cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2465  if (err != CL_SUCCESS)
2466  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2467  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2468 
2469  // set kernel arg of reduction kernel
2470  for (size_t j = 0; j < vclmem.size(); j++)
2471  {
2472  if (vclmem[j].mCLMem)
2473  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
2474  else
2475  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
2476  err = clSetKernelArg(redKernel, j,
2477  vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
2478  vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
2479  static_cast<void*>(&vclmem[j].mConst));
2480  if (CL_SUCCESS != err)
2481  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2482  }
2483  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
2484  err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2485  if (CL_SUCCESS != err)
2486  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2487 
2488  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
2489  err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), static_cast<void*>(&nInput));
2490  if (CL_SUCCESS != err)
2491  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2492 
2493  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
2494  err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2495  if (CL_SUCCESS != err)
2496  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2497  // set work group size and execute
2498  size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
2499  size_t const local_work_size[] = { 256, 1 };
2500  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2501  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2502  global_work_size, local_work_size, 0, nullptr, nullptr);
2503  if (CL_SUCCESS != err)
2504  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2505 
2506  err = clFinish(kEnv.mpkCmdQueue);
2507  if (CL_SUCCESS != err)
2508  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2509 
2510  SAL_INFO("sc.opencl", "Releasing kernel " << redKernel);
2511  err = clReleaseKernel(redKernel);
2512  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
2513 
2514  // Pass mpClmem2 to the "real" kernel
2515  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2516  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2517  if (CL_SUCCESS != err)
2518  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2519  }
2520  }
2521  return i;
2522  }
2523 
2524  virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
2525  {
2526  for (DynamicKernelArgumentRef & rArg : mvSubArguments)
2527  rArg->GenSlidingWindowFunction(ss);
2528  mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2529  }
2530  virtual void GenDeclRef( std::stringstream& ss ) const override
2531  {
2532  for (size_t i = 0; i < mvSubArguments.size(); i++)
2533  {
2534  if (i)
2535  ss << ",";
2536  mvSubArguments[i]->GenDeclRef(ss);
2537  }
2538  }
2539  virtual void GenDecl( std::stringstream& ss ) const override
2540  {
2541  for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2542  ++it)
2543  {
2544  if (it != mvSubArguments.begin())
2545  ss << ", ";
2546  (*it)->GenDecl(ss);
2547  }
2548  }
2549 
2550  virtual size_t GetWindowSize() const override
2551  {
2552  size_t nCurWindowSize = 0;
2553  for (const auto & rSubArgument : mvSubArguments)
2554  {
2555  size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
2556  nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2557  nCurChildWindowSize : nCurWindowSize;
2558  }
2559  return nCurWindowSize;
2560  }
2561 
2563  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
2564  {
2565  for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2566  ++it)
2567  {
2568  if (it != mvSubArguments.begin())
2569  ss << ", ";
2570  (*it)->GenSlidingWindowDecl(ss);
2571  }
2572  }
2575  virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const override
2576  {
2577  std::stringstream ss;
2578  if (!nested)
2579  {
2580  ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
2581  for (size_t i = 0; i < mvSubArguments.size(); i++)
2582  {
2583  if (i)
2584  ss << ", ";
2585  mvSubArguments[i]->GenDeclRef(ss);
2586  }
2587  ss << ")";
2588  }
2589  else
2590  {
2591  if (mvSubArguments.size() != 2)
2592  throw Unhandled(__FILE__, __LINE__);
2593  bool bArgument1_NeedNested =
2594  mvSubArguments[0]->GetFormulaToken()->GetType()
2596  bool bArgument2_NeedNested =
2597  mvSubArguments[1]->GetFormulaToken()->GetType()
2599  ss << "(";
2600  ss << mpCodeGen->
2601  Gen2(mvSubArguments[0]
2602  ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2603  mvSubArguments[1]
2604  ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2605  ss << ")";
2606  }
2607  return ss.str();
2608  }
2609  virtual std::string DumpOpName() const override
2610  {
2611  std::string t = "_" + mpCodeGen->BinFuncName();
2612  for (const auto & rSubArgument : mvSubArguments)
2613  t += rSubArgument->DumpOpName();
2614  return t;
2615  }
2616  virtual void DumpInlineFun( std::set<std::string>& decls,
2617  std::set<std::string>& funs ) const override
2618  {
2619  mpCodeGen->BinInlineFun(decls, funs);
2620  for (const auto & rSubArgument : mvSubArguments)
2621  rSubArgument->DumpInlineFun(decls, funs);
2622  }
2623  virtual bool IsEmpty() const override
2624  {
2625  for (const auto & rSubArgument : mvSubArguments)
2626  if( !rSubArgument->IsEmpty())
2627  return false;
2628  return true;
2629  }
2630  virtual ~DynamicKernelSoPArguments() override
2631  {
2632  if (mpClmem2)
2633  {
2634  cl_int err;
2635  err = clReleaseMemObject(mpClmem2);
2636  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
2637  mpClmem2 = nullptr;
2638  }
2639  }
2640 
2641 private:
2642  SubArgumentsType mvSubArguments;
2643  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
2644  cl_mem mpClmem2;
2645 };
2646 
2647 }
2648 
2650  const std::string& ts, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
2651  int nResultSize )
2652 {
2653  return std::make_shared<DynamicKernelSoPArguments>(config, ts, ft, std::move(pCodeGen), nResultSize);
2654 }
2655 
2656 template<class Base>
2657 static std::shared_ptr<DynamicKernelArgument> VectorRefFactory( const ScCalcConfig& config, const std::string& s,
2658  const FormulaTreeNodeRef& ft,
2659  std::shared_ptr<SlidingFunctionBase>& pCodeGen,
2660  int index )
2661 {
2662  //Black lists ineligible classes here ..
2663  // SUMIFS does not perform parallel reduction at DoubleVectorRef level
2664  if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2665  {
2666  // coverity[identical_branches] - only identical if Base happens to be VectorRef
2667  if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
2668  return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(config, s, ft, pCodeGen, index);
2669  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2670  }
2671  // AVERAGE is not supported yet
2672  //Average has been supported by reduction kernel
2673  /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
2674  {
2675  return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2676  }*/
2677  // MUL is not supported yet
2678  else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2679  {
2680  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2681  }
2682  // Sub is not a reduction per se
2683  else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2684  {
2685  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2686  }
2687  // Only child class of Reduction is supported
2688  else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2689  {
2690  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2691  }
2692 
2693  const formula::DoubleVectorRefToken* pDVR =
2694  static_cast<const formula::DoubleVectorRefToken*>(
2695  ft->GetFormulaToken());
2696  // Window being too small to justify a parallel reduction
2697  if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
2698  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2699  if (pDVR->IsStartFixed() == pDVR->IsEndFixed())
2700  return std::make_shared<ParallelReductionVectorRef<Base>>(config, s, ft, pCodeGen, index);
2701  else // Other cases are not supported as well
2702  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2703 }
2704 
2705 DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
2706  const std::string& s, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize ) :
2707  DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(nullptr)
2708 {
2709  size_t nChildren = ft->Children.size();
2710 
2711  for (size_t i = 0; i < nChildren; i++)
2712  {
2713  FormulaTreeNodeRef rChild = ft->Children[i];
2714  if (!rChild)
2715  throw Unhandled(__FILE__, __LINE__);
2716  FormulaToken* pChild = rChild->GetFormulaToken();
2717  if (!pChild)
2718  throw Unhandled(__FILE__, __LINE__);
2719  OpCode opc = pChild->GetOpCode();
2720  std::stringstream tmpname;
2721  tmpname << s << "_" << i;
2722  std::string ts = tmpname.str();
2723  switch (opc)
2724  {
2725  case ocPush:
2726  if (pChild->GetType() == formula::svDoubleVectorRef)
2727  {
2728  const formula::DoubleVectorRefToken* pDVR =
2729  static_cast<const formula::DoubleVectorRefToken*>(pChild);
2730 
2731  // The code below will split one svDoubleVectorRef into one subargument
2732  // for each column of data, and then all these subarguments will be later
2733  // passed to the code generating the function. Most of the code then
2734  // simply treats each subargument as one argument to the function, and thus
2735  // could break in this case.
2736  // As a simple solution, simply prevent this case, unless the code in question
2737  // explicitly claims it will handle this situation properly.
2738  if( pDVR->GetArrays().size() > 1 )
2739  {
2740  if( !pCodeGen->canHandleMultiVector())
2741  throw UnhandledToken(("Function '" + pCodeGen->BinFuncName()
2742  + "' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
2743 
2744  SAL_INFO("sc.opencl", "multi-column DoubleRef");
2745 
2746  }
2747 
2748  // FIXME: The Right Thing to do would be to compare the accumulated kernel
2749  // parameter size against the CL_DEVICE_MAX_PARAMETER_SIZE of the device, but
2750  // let's just do this sanity check for now. The kernel compilation will
2751  // hopefully fail anyway if the size of parameters exceeds the limit and this
2752  // sanity check is just to make us bail out a bit earlier.
2753 
2754  // The number 50 comes from the fact that the minimum size of
2755  // CL_DEVICE_MAX_PARAMETER_SIZE is 256, which for 32-bit code probably means 64
2756  // of them. Round down a bit.
2757 
2758  if (pDVR->GetArrays().size() > 50)
2759  throw UnhandledToken(("Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->GetArrays().size()) + ")").c_str(), __FILE__, __LINE__);
2760 
2761  for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
2762  {
2763  SAL_INFO("sc.opencl", "i=" << i << " j=" << j <<
2764  " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
2765  " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
2766  " allStringsAreNull=" << (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength())?"YES":"NO") <<
2767  " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2768  " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2769 
2770  if (pDVR->GetArrays()[j].mpNumericArray &&
2771  pCodeGen->takeNumeric() &&
2772  pDVR->GetArrays()[j].mpStringArray &&
2773  pCodeGen->takeString())
2774  {
2775  // Function takes numbers or strings, there are both
2776  SAL_INFO("sc.opencl", "Numbers and strings");
2777  mvSubArguments.push_back(
2778  std::make_shared<DynamicKernelMixedSlidingArgument>(mCalcConfig,
2779  ts, ft->Children[i], mpCodeGen, j));
2780  }
2781  else if (pDVR->GetArrays()[j].mpNumericArray &&
2782  pCodeGen->takeNumeric() &&
2783  (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
2784  {
2785  // Function takes numbers, and either there
2786  // are no strings, or there are strings but
2787  // they are to be treated as zero
2788  SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2789  mvSubArguments.push_back(
2790  VectorRefFactory<VectorRef>(mCalcConfig,
2791  ts, ft->Children[i], mpCodeGen, j));
2792  }
2793  else if (pDVR->GetArrays()[j].mpNumericArray == nullptr &&
2794  pCodeGen->takeNumeric() &&
2795  pDVR->GetArrays()[j].mpStringArray &&
2797  {
2798  // Function takes numbers, and there are only
2799  // strings, but they are to be treated as zero
2800  SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2801  mvSubArguments.push_back(
2802  VectorRefFactory<VectorRef>(mCalcConfig,
2803  ts, ft->Children[i], mpCodeGen, j));
2804  }
2805  else if (pDVR->GetArrays()[j].mpStringArray &&
2806  pCodeGen->takeString())
2807  {
2808  // There are strings, and the function takes strings.
2809  SAL_INFO("sc.opencl", "Strings only");
2810  mvSubArguments.push_back(
2812  <DynamicKernelStringArgument>(mCalcConfig,
2813  ts, ft->Children[i], mpCodeGen, j));
2814  }
2815  else if (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) &&
2816  pDVR->GetArrays()[j].mpNumericArray == nullptr)
2817  {
2818  // There are only empty cells. Push as an
2819  // array of NANs
2820  SAL_INFO("sc.opencl", "Only empty cells");
2821  mvSubArguments.push_back(
2822  VectorRefFactory<VectorRef>(mCalcConfig,
2823  ts, ft->Children[i], mpCodeGen, j));
2824  }
2825  else
2826  {
2827  SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2828  throw UnhandledToken(("Unhandled numbers/strings combination for '"
2829  + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2830  }
2831  }
2832  }
2833  else if (pChild->GetType() == formula::svSingleVectorRef)
2834  {
2835  const formula::SingleVectorRefToken* pSVR =
2836  static_cast<const formula::SingleVectorRefToken*>(pChild);
2837 
2838  SAL_INFO("sc.opencl", "i=" << i <<
2839  " mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
2840  " mpStringArray=" << pSVR->GetArray().mpStringArray <<
2841  " allStringsAreNull=" << (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength())?"YES":"NO") <<
2842  " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2843  " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2844 
2845  if (pSVR->GetArray().mpNumericArray &&
2846  pCodeGen->takeNumeric() &&
2847  pSVR->GetArray().mpStringArray &&
2848  pCodeGen->takeString())
2849  {
2850  // Function takes numbers or strings, there are both
2851  SAL_INFO("sc.opencl", "Numbers and strings");
2852  mvSubArguments.push_back(
2853  std::make_shared<DynamicKernelMixedArgument>(mCalcConfig,
2854  ts, ft->Children[i]));
2855  }
2856  else if (pSVR->GetArray().mpNumericArray &&
2857  pCodeGen->takeNumeric() &&
2859  {
2860  // Function takes numbers, and either there
2861  // are no strings, or there are strings but
2862  // they are to be treated as zero
2863  SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2864  mvSubArguments.push_back(
2865  std::make_shared<VectorRef>(mCalcConfig, ts,
2866  ft->Children[i]));
2867  }
2868  else if (pSVR->GetArray().mpNumericArray == nullptr &&
2869  pCodeGen->takeNumeric() &&
2870  pSVR->GetArray().mpStringArray &&
2872  {
2873  // Function takes numbers, and there are only
2874  // strings, but they are to be treated as zero
2875  SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2876  mvSubArguments.push_back(
2877  std::make_shared<VectorRef>(mCalcConfig, ts,
2878  ft->Children[i]));
2879  }
2880  else if (pSVR->GetArray().mpStringArray &&
2881  pCodeGen->takeString())
2882  {
2883  // There are strings, and the function takes strings.
2884  SAL_INFO("sc.opencl", "Strings only");
2885  mvSubArguments.push_back(
2886  std::make_shared<DynamicKernelStringArgument>(mCalcConfig,
2887  ts, ft->Children[i]));
2888  }
2889  else if (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) &&
2890  pSVR->GetArray().mpNumericArray == nullptr)
2891  {
2892  // There are only empty cells. Push as an
2893  // array of NANs
2894  SAL_INFO("sc.opencl", "Only empty cells");
2895  mvSubArguments.push_back(
2896  std::make_shared<VectorRef>(mCalcConfig, ts,
2897  ft->Children[i]));
2898  }
2899  else
2900  {
2901  SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2902  throw UnhandledToken(("Unhandled numbers/strings combination for '"
2903  + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2904  }
2905  }
2906  else if (pChild->GetType() == formula::svDouble)
2907  {
2908  SAL_INFO("sc.opencl", "Constant number case");
2909  mvSubArguments.push_back(
2910  std::make_shared<DynamicKernelConstantArgument>(mCalcConfig, ts,
2911  ft->Children[i]));
2912  }
2913  else if (pChild->GetType() == formula::svString
2914  && pCodeGen->takeString())
2915  {
2916  SAL_INFO("sc.opencl", "Constant string case");
2917  mvSubArguments.push_back(
2918  std::make_shared<ConstStringArgument>(mCalcConfig, ts,
2919  ft->Children[i]));
2920  }
2921  else
2922  {
2923  SAL_INFO("sc.opencl", "Unhandled operand, rejecting for OpenCL");
2924  throw UnhandledToken(("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str(), __FILE__, __LINE__);
2925  }
2926  break;
2927  case ocDiv:
2928  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDiv>(nResultSize), nResultSize));
2929  break;
2930  case ocMul:
2931  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMul>(nResultSize), nResultSize));
2932  break;
2933  case ocSub:
2934  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSub>(nResultSize), nResultSize));
2935  break;
2936  case ocAdd:
2937  case ocSum:
2938  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSum>(nResultSize), nResultSize));
2939  break;
2940  case ocAverage:
2941  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAverage>(nResultSize), nResultSize));
2942  break;
2943  case ocMin:
2944  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMin>(nResultSize), nResultSize));
2945  break;
2946  case ocMax:
2947  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMax>(nResultSize), nResultSize));
2948  break;
2949  case ocCount:
2950  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCount>(nResultSize), nResultSize));
2951  break;
2952  case ocSumProduct:
2953  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSumProduct>(), nResultSize));
2954  break;
2955  case ocIRR:
2956  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIRR>(), nResultSize));
2957  break;
2958  case ocMIRR:
2959  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMIRR>(), nResultSize));
2960  break;
2961  case ocPMT:
2962  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPMT>(), nResultSize));
2963  break;
2964  case ocRate:
2965  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIntrate>(), nResultSize));
2966  break;
2967  case ocRRI:
2968  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRRI>(), nResultSize));
2969  break;
2970  case ocPpmt:
2971  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPPMT>(), nResultSize));
2972  break;
2973  case ocFisher:
2974  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisher>(), nResultSize));
2975  break;
2976  case ocFisherInv:
2977  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisherInv>(), nResultSize));
2978  break;
2979  case ocGamma:
2980  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGamma>(), nResultSize));
2981  break;
2982  case ocSLN:
2983  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSLN>(), nResultSize));
2984  break;
2985  case ocGammaLn:
2986  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGammaLn>(), nResultSize));
2987  break;
2988  case ocGauss:
2989  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGauss>(), nResultSize));
2990  break;
2991  /*case ocGeoMean:
2992  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGeoMean));
2993  break;*/
2994  case ocHarMean:
2995  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpHarMean>(), nResultSize));
2996  break;
2997  case ocLessEqual:
2998  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLessEqual>(), nResultSize));
2999  break;
3000  case ocLess:
3001  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLess>(), nResultSize));
3002  break;
3003  case ocEqual:
3004  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEqual>(), nResultSize));
3005  break;
3006  case ocGreater:
3007  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGreater>(), nResultSize));
3008  break;
3009  case ocSYD:
3010  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSYD>(), nResultSize));
3011  break;
3012  case ocCorrel:
3013  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCorrel>(), nResultSize));
3014  break;
3015  case ocCos:
3016  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCos>(), nResultSize));
3017  break;
3018  case ocNegBinomVert :
3019  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNegbinomdist>(), nResultSize));
3020  break;
3021  case ocPearson:
3022  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPearson>(), nResultSize));
3023  break;
3024  case ocRSQ:
3025  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRsq>(), nResultSize));
3026  break;
3027  case ocCosecant:
3028  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCsc>(), nResultSize));
3029  break;
3030  case ocISPMT:
3031  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpISPMT>(), nResultSize));
3032  break;
3033  case ocPDuration:
3034  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3035  ft->Children[i], std::make_shared<OpPDuration>(), nResultSize));
3036  break;
3037  case ocSinHyp:
3038  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3039  ft->Children[i], std::make_shared<OpSinh>(), nResultSize));
3040  break;
3041  case ocAbs:
3042  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3043  ft->Children[i], std::make_shared<OpAbs>(), nResultSize));
3044  break;
3045  case ocPV:
3046  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3047  ft->Children[i], std::make_shared<OpPV>(), nResultSize));
3048  break;
3049  case ocSin:
3050  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3051  ft->Children[i], std::make_shared<OpSin>(), nResultSize));
3052  break;
3053  case ocTan:
3054  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3055  ft->Children[i], std::make_shared<OpTan>(), nResultSize));
3056  break;
3057  case ocTanHyp:
3058  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3059  ft->Children[i], std::make_shared<OpTanH>(), nResultSize));
3060  break;
3061  case ocStandard:
3062  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3063  ft->Children[i], std::make_shared<OpStandard>(), nResultSize));
3064  break;
3065  case ocWeibull:
3066  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3067  ft->Children[i], std::make_shared<OpWeibull>(), nResultSize));
3068  break;
3069  /*case ocMedian:
3070  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3071  ft->Children[i],std::make_shared<OpMedian));
3072  break;*/
3073  case ocDDB:
3074  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3075  ft->Children[i], std::make_shared<OpDDB>(), nResultSize));
3076  break;
3077  case ocFV:
3078  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3079  ft->Children[i], std::make_shared<OpFV>(), nResultSize));
3080  break;
3081  case ocSumIfs:
3082  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3083  ft->Children[i], std::make_shared<OpSumIfs>(), nResultSize));
3084  break;
3085  /*case ocVBD:
3086  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3087  ft->Children[i],std::make_shared<OpVDB));
3088  break;*/
3089  case ocKurt:
3090  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3091  ft->Children[i], std::make_shared<OpKurt>(), nResultSize));
3092  break;
3093  /*case ocNper:
3094  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3095  ft->Children[i], std::make_shared<OpNper));
3096  break;*/
3097  case ocNormDist:
3098  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3099  ft->Children[i], std::make_shared<OpNormdist>(), nResultSize));
3100  break;
3101  case ocArcCos:
3102  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3103  ft->Children[i], std::make_shared<OpArcCos>(), nResultSize));
3104  break;
3105  case ocSqrt:
3106  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3107  ft->Children[i], std::make_shared<OpSqrt>(), nResultSize));
3108  break;
3109  case ocArcCosHyp:
3110  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3111  ft->Children[i], std::make_shared<OpArcCosHyp>(), nResultSize));
3112  break;
3113  case ocNPV:
3114  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3115  ft->Children[i], std::make_shared<OpNPV>(), nResultSize));
3116  break;
3117  case ocStdNormDist:
3118  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3119  ft->Children[i], std::make_shared<OpNormsdist>(), nResultSize));
3120  break;
3121  case ocNormInv:
3122  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3123  ft->Children[i], std::make_shared<OpNorminv>(), nResultSize));
3124  break;
3125  case ocSNormInv:
3126  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3127  ft->Children[i], std::make_shared<OpNormsinv>(), nResultSize));
3128  break;
3129  case ocPermut:
3130  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3131  ft->Children[i], std::make_shared<OpPermut>(), nResultSize));
3132  break;
3133  case ocPermutationA:
3134  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3135  ft->Children[i], std::make_shared<OpPermutationA>(), nResultSize));
3136  break;
3137  case ocPhi:
3138  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3139  ft->Children[i], std::make_shared<OpPhi>(), nResultSize));
3140  break;
3141  case ocIpmt:
3142  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3143  ft->Children[i], std::make_shared<OpIPMT>(), nResultSize));
3144  break;
3145  case ocConfidence:
3146  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3147  ft->Children[i], std::make_shared<OpConfidence>(), nResultSize));
3148  break;
3149  case ocIntercept:
3150  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3151  ft->Children[i], std::make_shared<OpIntercept>(), nResultSize));
3152  break;
3153  case ocDB:
3154  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3155  std::make_shared<OpDB>(), nResultSize));
3156  break;
3157  case ocLogInv:
3158  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3159  ft->Children[i], std::make_shared<OpLogInv>(), nResultSize));
3160  break;
3161  case ocArcCot:
3162  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3163  ft->Children[i], std::make_shared<OpArcCot>(), nResultSize));
3164  break;
3165  case ocCosHyp:
3166  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3167  ft->Children[i], std::make_shared<OpCosh>(), nResultSize));
3168  break;
3169  case ocCritBinom:
3170  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3171  ft->Children[i], std::make_shared<OpCritBinom>(), nResultSize));
3172  break;
3173  case ocArcCotHyp:
3174  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3175  ft->Children[i], std::make_shared<OpArcCotHyp>(), nResultSize));
3176  break;
3177  case ocArcSin:
3178  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3179  ft->Children[i], std::make_shared<OpArcSin>(), nResultSize));
3180  break;
3181  case ocArcSinHyp:
3182  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3183  ft->Children[i], std::make_shared<OpArcSinHyp>(), nResultSize));
3184  break;
3185  case ocArcTan:
3186  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3187  ft->Children[i], std::make_shared<OpArcTan>(), nResultSize));
3188  break;
3189  case ocArcTanHyp:
3190  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3191  ft->Children[i], std::make_shared<OpArcTanH>(), nResultSize));
3192  break;
3193  case ocBitAnd:
3194  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3195  ft->Children[i], std::make_shared<OpBitAnd>(), nResultSize));
3196  break;
3197  case ocForecast:
3198  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3199  ft->Children[i], std::make_shared<OpForecast>(), nResultSize));
3200  break;
3201  case ocLogNormDist:
3202  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3203  ft->Children[i], std::make_shared<OpLogNormDist>(), nResultSize));
3204  break;
3205  /*case ocGammaDist:
3206  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3207  ft->Children[i], std::make_shared<OpGammaDist));
3208  break;*/
3209  case ocLn:
3210  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3211  ft->Children[i], std::make_shared<OpLn>(), nResultSize));
3212  break;
3213  case ocRound:
3214  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3215  ft->Children[i], std::make_shared<OpRound>(), nResultSize));
3216  break;
3217  case ocCot:
3218  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3219  ft->Children[i], std::make_shared<OpCot>(), nResultSize));
3220  break;
3221  case ocCotHyp:
3222  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3223  ft->Children[i], std::make_shared<OpCoth>(), nResultSize));
3224  break;
3225  case ocFDist:
3226  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3227  ft->Children[i], std::make_shared<OpFdist>(), nResultSize));
3228  break;
3229  case ocVar:
3230  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3231  ft->Children[i], std::make_shared<OpVar>(), nResultSize));
3232  break;
3233  /*case ocChiDist:
3234  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3235  ft->Children[i],std::make_shared<OpChiDist));
3236  break;*/
3237  case ocPow:
3238  case ocPower:
3239  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3240  ft->Children[i], std::make_shared<OpPower>(), nResultSize));
3241  break;
3242  case ocOdd:
3243  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3244  ft->Children[i], std::make_shared<OpOdd>(), nResultSize));
3245  break;
3246  /*case ocChiSqDist:
3247  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3248  ft->Children[i],std::make_shared<OpChiSqDist));
3249  break;
3250  case ocChiSqInv:
3251  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3252  ft->Children[i],std::make_shared<OpChiSqInv));
3253  break;
3254  case ocGammaInv:
3255  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3256  ft->Children[i], std::make_shared<OpGammaInv));
3257  break;*/
3258  case ocFloor:
3259  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3260  ft->Children[i], std::make_shared<OpFloor>(), nResultSize));
3261  break;
3262  /*case ocFInv:
3263  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3264  ft->Children[i], std::make_shared<OpFInv));
3265  break;*/
3266  case ocFTest:
3267  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3268  ft->Children[i], std::make_shared<OpFTest>(), nResultSize));
3269  break;
3270  case ocB:
3271  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3272  ft->Children[i], std::make_shared<OpB>(), nResultSize));
3273  break;
3274  case ocBetaDist:
3275  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3276  ft->Children[i], std::make_shared<OpBetaDist>(), nResultSize));
3277  break;
3278  case ocCosecantHyp:
3279  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3280  ft->Children[i], std::make_shared<OpCscH>(), nResultSize));
3281  break;
3282  case ocExp:
3283  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3284  ft->Children[i], std::make_shared<OpExp>(), nResultSize));
3285  break;
3286  case ocLog10:
3287  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3288  ft->Children[i], std::make_shared<OpLog10>(), nResultSize));
3289  break;
3290  case ocExpDist:
3291  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3292  ft->Children[i], std::make_shared<OpExponDist>(), nResultSize));
3293  break;
3294  case ocAverageIfs:
3295  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3296  ft->Children[i], std::make_shared<OpAverageIfs>(), nResultSize));
3297  break;
3298  case ocCountIfs:
3299  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3300  ft->Children[i], std::make_shared<OpCountIfs>(), nResultSize));
3301  break;
3302  case ocCombinA:
3303  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3304  ft->Children[i], std::make_shared<OpCombinA>(), nResultSize));
3305  break;
3306  case ocEven:
3307  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3308  ft->Children[i], std::make_shared<OpEven>(), nResultSize));
3309  break;
3310  case ocLog:
3311  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3312  ft->Children[i], std::make_shared<OpLog>(), nResultSize));
3313  break;
3314  case ocMod:
3315  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3316  ft->Children[i], std::make_shared<OpMod>(), nResultSize));
3317  break;
3318  case ocTrunc:
3319  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3320  ft->Children[i], std::make_shared<OpTrunc>(), nResultSize));
3321  break;
3322  case ocSkew:
3323  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3324  ft->Children[i], std::make_shared<OpSkew>(), nResultSize));
3325  break;
3326  case ocArcTan2:
3327  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3328  ft->Children[i], std::make_shared<OpArcTan2>(), nResultSize));
3329  break;
3330  case ocBitOr:
3331  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3332  ft->Children[i], std::make_shared<OpBitOr>(), nResultSize));
3333  break;
3334  case ocBitLshift:
3335  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3336  ft->Children[i], std::make_shared<OpBitLshift>(), nResultSize));
3337  break;
3338  case ocBitRshift:
3339  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3340  ft->Children[i], std::make_shared<OpBitRshift>(), nResultSize));
3341  break;
3342  case ocBitXor:
3343  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3344  ft->Children[i], std::make_shared<OpBitXor>(), nResultSize));
3345  break;
3346  /*case ocChiInv:
3347  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3348  ft->Children[i],std::make_shared<OpChiInv));
3349  break;*/
3350  case ocPoissonDist:
3351  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3352  ft->Children[i], std::make_shared<OpPoisson>(), nResultSize));
3353  break;
3354  case ocSumSQ:
3355  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3356  ft->Children[i], std::make_shared<OpSumSQ>(), nResultSize));
3357  break;
3358  case ocSkewp:
3359  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3360  ft->Children[i], std::make_shared<OpSkewp>(), nResultSize));
3361  break;
3362  case ocBinomDist:
3363  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3364  ft->Children[i], std::make_shared<OpBinomdist>(), nResultSize));
3365  break;
3366  case ocVarP:
3367  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3368  ft->Children[i], std::make_shared<OpVarP>(), nResultSize));
3369  break;
3370  case ocCeil:
3371  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3372  ft->Children[i], std::make_shared<OpCeil>(), nResultSize));
3373  break;
3374  case ocCombin:
3375  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3376  ft->Children[i], std::make_shared<OpCombin>(), nResultSize));
3377  break;
3378  case ocDevSq:
3379  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3380  ft->Children[i], std::make_shared<OpDevSq>(), nResultSize));
3381  break;
3382  case ocStDev:
3383  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3384  ft->Children[i], std::make_shared<OpStDev>(), nResultSize));
3385  break;
3386  case ocSlope:
3387  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3388  ft->Children[i], std::make_shared<OpSlope>(), nResultSize));
3389  break;
3390  case ocSTEYX:
3391  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3392  ft->Children[i], std::make_shared<OpSTEYX>(), nResultSize));
3393  break;
3394  case ocZTest:
3395  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3396  ft->Children[i], std::make_shared<OpZTest>(), nResultSize));
3397  break;
3398  case ocPi:
3399  mvSubArguments.push_back(
3400  std::make_shared<DynamicKernelPiArgument>(mCalcConfig, ts,
3401  ft->Children[i]));
3402  break;
3403  case ocRandom:
3404  mvSubArguments.push_back(
3405  std::make_shared<DynamicKernelRandomArgument>(mCalcConfig, ts,
3406  ft->Children[i]));
3407  break;
3408  case ocProduct:
3409  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3410  ft->Children[i], std::make_shared<OpProduct>(), nResultSize));
3411  break;
3412  /*case ocHypGeomDist:
3413  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3414  ft->Children[i],std::make_shared<OpHypGeomDist));
3415  break;*/
3416  case ocSumX2MY2:
3417  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3418  ft->Children[i], std::make_shared<OpSumX2MY2>(), nResultSize));
3419  break;
3420  case ocSumX2DY2:
3421  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3422  ft->Children[i], std::make_shared<OpSumX2PY2>(), nResultSize));
3423  break;
3424  /*case ocBetaInv:
3425  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3426  ft->Children[i],std::make_shared<OpBetainv));
3427  break;*/
3428  case ocTTest:
3429  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3430  ft->Children[i], std::make_shared<OpTTest>(), nResultSize));
3431  break;
3432  case ocTDist:
3433  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3434  ft->Children[i], std::make_shared<OpTDist>(), nResultSize));
3435  break;
3436  /*case ocTInv:
3437  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3438  ft->Children[i], std::make_shared<OpTInv));
3439  break;*/
3440  case ocSumXMY2:
3441  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3442  ft->Children[i], std::make_shared<OpSumXMY2>(), nResultSize));
3443  break;
3444  case ocStDevP:
3445  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3446  ft->Children[i], std::make_shared<OpStDevP>(), nResultSize));
3447  break;
3448  case ocCovar:
3449  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3450  ft->Children[i], std::make_shared<OpCovar>(), nResultSize));
3451  break;
3452  case ocAnd:
3453  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3454  ft->Children[i], std::make_shared<OpAnd>(), nResultSize));
3455  break;
3456  case ocVLookup:
3457  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3458  ft->Children[i], std::make_shared<OpVLookup>(), nResultSize));
3459  break;
3460  case ocOr:
3461  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3462  ft->Children[i], std::make_shared<OpOr>(), nResultSize));
3463  break;
3464  case ocNot:
3465  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3466  ft->Children[i], std::make_shared<OpNot>(), nResultSize));
3467  break;
3468  case ocXor:
3469  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3470  ft->Children[i], std::make_shared<OpXor>(), nResultSize));
3471  break;
3472  case ocDBMax:
3473  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3474  ft->Children[i], std::make_shared<OpDmax>(), nResultSize));
3475  break;
3476  case ocDBMin:
3477  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3478  ft->Children[i], std::make_shared<OpDmin>(), nResultSize));
3479  break;
3480  case ocDBProduct:
3481  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3482  ft->Children[i], std::make_shared<OpDproduct>(), nResultSize));
3483  break;
3484  case ocDBAverage:
3485  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3486  ft->Children[i], std::make_shared<OpDaverage>(), nResultSize));
3487  break;
3488  case ocDBStdDev:
3489  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3490  ft->Children[i], std::make_shared<OpDstdev>(), nResultSize));
3491  break;
3492  case ocDBStdDevP:
3493  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3494  ft->Children[i], std::make_shared<OpDstdevp>(), nResultSize));
3495  break;
3496  case ocDBSum:
3497  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3498  ft->Children[i], std::make_shared<OpDsum>(), nResultSize));
3499  break;
3500  case ocDBVar:
3501  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3502  ft->Children[i], std::make_shared<OpDvar>(), nResultSize));
3503  break;
3504  case ocDBVarP:
3505  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3506  ft->Children[i], std::make_shared<OpDvarp>(), nResultSize));
3507  break;
3508  case ocAverageIf:
3509  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3510  ft->Children[i], std::make_shared<OpAverageIf>(), nResultSize));
3511  break;
3512  case ocDBCount:
3513  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3514  ft->Children[i], std::make_shared<OpDcount>(), nResultSize));
3515  break;
3516  case ocDBCount2:
3517  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3518  ft->Children[i], std::make_shared<OpDcount2>(), nResultSize));
3519  break;
3520  case ocDeg:
3521  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3522  ft->Children[i], std::make_shared<OpDeg>(), nResultSize));
3523  break;
3524  case ocRoundUp:
3525  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3526  ft->Children[i], std::make_shared<OpRoundUp>(), nResultSize));
3527  break;
3528  case ocRoundDown:
3529  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3530  ft->Children[i], std::make_shared<OpRoundDown>(), nResultSize));
3531  break;
3532  case ocInt:
3533  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3534  ft->Children[i], std::make_shared<OpInt>(), nResultSize));
3535  break;
3536  case ocRad:
3537  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3538  ft->Children[i], std::make_shared<OpRadians>(), nResultSize));
3539  break;
3540  case ocCountIf:
3541  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3542  ft->Children[i], std::make_shared<OpCountIf>(), nResultSize));
3543  break;
3544  case ocIsEven:
3545  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3546  ft->Children[i], std::make_shared<OpIsEven>(), nResultSize));
3547  break;
3548  case ocIsOdd:
3549  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3550  ft->Children[i], std::make_shared<OpIsOdd>(), nResultSize));
3551  break;
3552  case ocFact:
3553  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3554  ft->Children[i], std::make_shared<OpFact>(), nResultSize));
3555  break;
3556  case ocMinA:
3557  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3558  ft->Children[i], std::make_shared<OpMinA>(), nResultSize));
3559  break;
3560  case ocCount2:
3561  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3562  ft->Children[i], std::make_shared<OpCountA>(), nResultSize));
3563  break;
3564  case ocMaxA:
3565  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3566  ft->Children[i], std::make_shared<OpMaxA>(), nResultSize));
3567  break;
3568  case ocAverageA:
3569  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3570  ft->Children[i], std::make_shared<OpAverageA>(), nResultSize));
3571  break;
3572  case ocVarA:
3573  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3574  ft->Children[i], std::make_shared<OpVarA>(), nResultSize));
3575  break;
3576  case ocVarPA:
3577  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3578  ft->Children[i], std::make_shared<OpVarPA>(), nResultSize));
3579  break;
3580  case ocStDevA:
3581  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3582  ft->Children[i], std::make_shared<OpStDevA>(), nResultSize));
3583  break;
3584  case ocStDevPA:
3585  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3586  ft->Children[i], std::make_shared<OpStDevPA>(), nResultSize));
3587  break;
3588  case ocSecant:
3589  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3590  ft->Children[i], std::make_shared<OpSec>(), nResultSize));
3591  break;
3592  case ocSecantHyp:
3593  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3594  ft->Children[i], std::make_shared<OpSecH>(), nResultSize));
3595  break;
3596  case ocSumIf:
3597  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3598  ft->Children[i], std::make_shared<OpSumIf>(), nResultSize));
3599  break;
3600  case ocNegSub:
3601  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3602  ft->Children[i], std::make_shared<OpNegSub>(), nResultSize));
3603  break;
3604  case ocAveDev:
3605  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3606  ft->Children[i], std::make_shared<OpAveDev>(), nResultSize));
3607  break;
3608  case ocIf:
3609  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3610  ft->Children[i], std::make_shared<OpIf>(), nResultSize));
3611  break;
3612  case ocExternal:
3613  if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getEffect")
3614  {
3615  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEffective>(), nResultSize));
3616  }
3617  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumipmt")
3618  {
3619  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumipmt>(), nResultSize));
3620  }
3621  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getNominal")
3622  {
3623  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNominal>(), nResultSize));
3624  }
3625  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumprinc")
3626  {
3627  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumprinc>(), nResultSize));
3628  }
3629  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXnpv")
3630  {
3631  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpXNPV>(), nResultSize));
3632  }
3633  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricemat")
3634  {
3635  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPriceMat>(), nResultSize));
3636  }
3637  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getReceived")
3638  {
3639  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpReceived>(), nResultSize));
3640  }
3641  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbilleq")
3642  {
3643  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbilleq>(), nResultSize));
3644  }
3645  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillprice")
3646  {
3647  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillprice>(), nResultSize));
3648  }
3649  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillyield")
3650  {
3651  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillyield>(), nResultSize));
3652  }
3653  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getFvschedule")
3654  {
3655  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFvschedule>(), nResultSize));
3656  }
3657  /*else if ( pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYield")
3658  {
3659  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYield));
3660  }*/
3661  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYielddisc")
3662  {
3663  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYielddisc>(), nResultSize));
3664  }
3665  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYieldmat")
3666  {
3667  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYieldmat>(), nResultSize));
3668  }
3669  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrintm")
3670  {
3671  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAccrintm>(), nResultSize));
3672  }
3673  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaybs")
3674  {
3675  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaybs>(), nResultSize));
3676  }
3677  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarde")
3678  {
3679  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarde>(), nResultSize));
3680  }
3681  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarfr")
3682  {
3683  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarfr>(), nResultSize));
3684  }
3685  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdays")
3686  {
3687  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdays>(), nResultSize));
3688  }
3689  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaysnc")
3690  {
3691  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaysnc>(), nResultSize));
3692  }
3693  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDisc")
3694  {
3695  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDISC>(), nResultSize));
3696  }
3697  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIntrate")
3698  {
3699  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpINTRATE>(), nResultSize));
3700  }
3701  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPrice")
3702  {
3703  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3704  ft->Children[i], std::make_shared<OpPrice>(), nResultSize));
3705  }
3706  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupnum")
3707  {
3708  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3709  std::make_shared<OpCoupnum>(), nResultSize));
3710  }
3711  /*else if pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDuration"))
3712  {
3713  mvSubArguments.push_back(
3714  SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDuration_ADD));
3715  }*/
3716  /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmordegrc")
3717  {
3718  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3719  std::make_shared<OpAmordegrc, nResultSize));
3720  }*/
3721  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmorlinc")
3722  {
3723  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3724  std::make_shared<OpAmorlinc>(), nResultSize));
3725  }
3726  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMduration")
3727  {
3728  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3729  std::make_shared<OpMDuration>(), nResultSize));
3730  }
3731  /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXirr")
3732  {
3733  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3734  std::make_shared<OpXirr, nResultSize));
3735  }*/
3736  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlprice")
3737  {
3738  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3739  ft->Children[i], std::make_shared<OpOddlprice>(), nResultSize));
3740  }
3741  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlyield")
3742  {
3743  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3744  std::make_shared<OpOddlyield>(), nResultSize));
3745  }
3746  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricedisc")
3747  {
3748  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3749  ft->Children[i], std::make_shared<OpPriceDisc>(), nResultSize));
3750  }
3751  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCouppcd")
3752  {
3753  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3754  std::make_shared<OpCouppcd>(), nResultSize));
3755  }
3756  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupncd")
3757  {
3758  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3759  std::make_shared<OpCoupncd>(), nResultSize));
3760  }
3761  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrint")
3762  {
3763  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3764  std::make_shared<OpAccrint>(), nResultSize));
3765  }
3766  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSqrtpi")
3767  {
3768  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3769  std::make_shared<OpSqrtPi>(), nResultSize));
3770  }
3771  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getConvert")
3772  {
3773  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3774  std::make_shared<OpConvert>(), nResultSize));
3775  }
3776  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIseven")
3777  {
3778  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3779  std::make_shared<OpIsEven>(), nResultSize));
3780  }
3781  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIsodd")
3782  {
3783  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3784  std::make_shared<OpIsOdd>(), nResultSize));
3785  }
3786  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMround")
3787  {
3788  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3789  std::make_shared<OpMROUND>(), nResultSize));
3790  }
3791  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getQuotient")
3792  {
3793  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3794  std::make_shared<OpQuotient>(), nResultSize));
3795  }
3796  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSeriessum")
3797  {
3798  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3799  std::make_shared<OpSeriesSum>(), nResultSize));
3800  }
3801  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getBesselj")
3802  {
3803  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3804  std::make_shared<OpBesselj>(), nResultSize));
3805  }
3806  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getGestep")
3807  {
3808  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3809  std::make_shared<OpGestep>(), nResultSize));
3810  }
3811  else
3812  throw UnhandledToken(OUString("unhandled external " + pChild->GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
3813  break;
3814 
3815  default:
3816  throw UnhandledToken(OUString("unhandled opcode "
3817  + formula::FormulaCompiler().GetOpCodeMap(com::sun::star::sheet::FormulaLanguage::ENGLISH)->getSymbol(opc)
3818  + "(" + OUString::number(opc) + ")").toUtf8().getStr(), __FILE__, __LINE__);
3819  }
3820  }
3821 }
3822 
3823 namespace {
3824 
3825 class DynamicKernel : public CompiledFormula
3826 {
3827 public:
3828  DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize );
3829  virtual ~DynamicKernel() override;
3830 
3831  static std::shared_ptr<DynamicKernel> create( const ScCalcConfig& config, const ScTokenArray& rCode, int nResultSize );
3832 
3834  void CodeGen();
3835 
3837  std::string const & GetMD5();
3838 
3842  void CreateKernel();
3843 
3846  void Launch( size_t nr );
3847 
3848  cl_mem GetResultBuffer() const { return mpResClmem; }
3849 
3850 private:
3853  SymbolTable mSyms;
3855  std::string mFullProgramSrc;
3856  cl_program mpProgram;
3857  cl_kernel mpKernel;
3858  cl_mem mpResClmem; // Results
3859  std::set<std::string> inlineDecl;
3860  std::set<std::string> inlineFun;
3861 
3862  int mnResultSize;
3863 };
3864 
3865 }
3866 
3867 DynamicKernel::DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize ) :
3868  mCalcConfig(config),
3869  mpRoot(r),
3870  mpProgram(nullptr),
3871  mpKernel(nullptr),
3872  mpResClmem(nullptr),
3873  mnResultSize(nResultSize) {}
3874 
3875 DynamicKernel::~DynamicKernel()
3876 {
3877  cl_int err;
3878  if (mpResClmem)
3879  {
3880  err = clReleaseMemObject(mpResClmem);
3881  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
3882  }
3883  if (mpKernel)
3884  {
3885  SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
3886  err = clReleaseKernel(mpKernel);
3887  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
3888  }
3889  // mpProgram is not going to be released here -- it's cached.
3890 }
3891 
3892 void DynamicKernel::CodeGen()
3893 {
3894  // Traverse the tree of expression and declare symbols used
3895  const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, std::make_shared<OpNop>(mnResultSize), mnResultSize);
3896 
3897  std::stringstream decl;
3898  if (openclwrapper::gpuEnv.mnKhrFp64Flag)
3899  {
3900  decl << "#if __OPENCL_VERSION__ < 120\n";
3901  decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3902  decl << "#endif\n";
3903  }
3904  else if (openclwrapper::gpuEnv.mnAmdFp64Flag)
3905  {
3906  decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3907  }
3908  // preambles
3909  decl << publicFunc;
3910  DK->DumpInlineFun(inlineDecl, inlineFun);
3911  for (const auto& rItem : inlineDecl)
3912  {
3913  decl << rItem;
3914  }
3915 
3916  for (const auto& rItem : inlineFun)
3917  {
3918  decl << rItem;
3919  }
3920  mSyms.DumpSlidingWindowFunctions(decl);
3921  mKernelSignature = DK->DumpOpName();
3922  decl << "__kernel void DynamicKernel" << mKernelSignature;
3923  decl << "(__global double *result";
3924  if( !DK->IsEmpty())
3925  {
3926  decl << ", ";
3927  DK->GenSlidingWindowDecl(decl);
3928  }
3929  decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3930  DK->GenSlidingWindowDeclRef() << ";\n}\n";
3931  mFullProgramSrc = decl.str();
3932  SAL_INFO(
3933  "sc.opencl.source",
3934  (mKernelSignature[0] == '_'
3935  ? mKernelSignature.substr(1, std::string::npos) : mKernelSignature)
3936  << " program to be compiled:\n" << linenumberify(mFullProgramSrc));
3937 }
3938 
3939 std::string const & DynamicKernel::GetMD5()
3940 {
3941  if (mKernelHash.empty())
3942  {
3943  std::stringstream md5s;
3944  // Compute MD5SUM of kernel body to obtain the name
3945  sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
3946  rtl_digest_MD5(
3947  mFullProgramSrc.c_str(),
3948  mFullProgramSrc.length(), result,
3949  RTL_DIGEST_LENGTH_MD5);
3950  for (sal_uInt8 i : result)
3951  {
3952  md5s << std::hex << static_cast<int>(i);
3953  }
3954  mKernelHash = md5s.str();
3955  }
3956  return mKernelHash;
3957 }
3958 
3960 void DynamicKernel::CreateKernel()
3961 {
3962  if (mpKernel)
3963  // already created.
3964  return;
3965 
3966  cl_int err;
3967  std::string kname = "DynamicKernel" + mKernelSignature;
3968  // Compile kernel here!!!
3969 
3970  OpenCLZone zone;
3973  const char* src = mFullProgramSrc.c_str();
3974  static std::string lastOneKernelHash;
3975  static std::string lastSecondKernelHash;
3976  static cl_program lastOneProgram = nullptr;
3977  static cl_program lastSecondProgram = nullptr;
3978  std::string KernelHash = mKernelSignature + GetMD5();
3979  if (lastOneKernelHash == KernelHash && lastOneProgram)
3980  {
3981  mpProgram = lastOneProgram;
3982  }
3983  else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3984  {
3985  mpProgram = lastSecondProgram;
3986  }
3987  else
3988  { // doesn't match the last compiled formula.
3989 
3990  if (lastSecondProgram)
3991  {
3992  SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
3993  err = clReleaseProgram(lastSecondProgram);
3994  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << openclwrapper::errorString(err));
3995  lastSecondProgram = nullptr;
3996  }
3998  &openclwrapper::gpuEnv, KernelHash.c_str(), 0))
3999  {
4000  mpProgram = openclwrapper::gpuEnv.mpArryPrograms[0];
4002  }
4003  else
4004  {
4005  mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
4006  &src, nullptr, &err);
4007  if (err != CL_SUCCESS)
4008  throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
4009  SAL_INFO("sc.opencl", "Created program " << mpProgram);
4010 
4011  err = clBuildProgram(mpProgram, 1,
4012  &openclwrapper::gpuEnv.mpDevID, "", nullptr, nullptr);
4013  if (err != CL_SUCCESS)
4014  {
4015 #if OSL_DEBUG_LEVEL > 0
4016  if (err == CL_BUILD_PROGRAM_FAILURE)
4017  {
4018  cl_build_status stat;
4019  cl_int e = clGetProgramBuildInfo(
4020  mpProgram, openclwrapper::gpuEnv.mpDevID,
4021  CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
4022  &stat, nullptr);
4023  SAL_WARN_IF(
4024  e != CL_SUCCESS, "sc.opencl",
4025  "after CL_BUILD_PROGRAM_FAILURE,"
4026  " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
4027  " fails with " << openclwrapper::errorString(e));
4028  if (e == CL_SUCCESS)
4029  {
4030  size_t n;
4031  e = clGetProgramBuildInfo(
4032  mpProgram, openclwrapper::gpuEnv.mpDevID,
4033  CL_PROGRAM_BUILD_LOG, 0, nullptr, &n);
4034  SAL_WARN_IF(
4035  e != CL_SUCCESS || n == 0, "sc.opencl",
4036  "after CL_BUILD_PROGRAM_FAILURE,"
4037  " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
4038  " fails with " << openclwrapper::errorString(e) << ", n=" << n);
4039  if (e == CL_SUCCESS && n != 0)
4040  {
4041  std::vector<char> log(n);
4042  e = clGetProgramBuildInfo(
4043  mpProgram, openclwrapper::gpuEnv.mpDevID,
4044  CL_PROGRAM_BUILD_LOG, n, log.data(), nullptr);
4045  SAL_WARN_IF(
4046  e != CL_SUCCESS || n == 0, "sc.opencl",
4047  "after CL_BUILD_PROGRAM_FAILURE,"
4048  " clGetProgramBuildInfo("
4049  "CL_PROGRAM_BUILD_LOG) fails with " << openclwrapper::errorString(e));
4050  if (e == CL_SUCCESS)
4051  SAL_WARN(
4052  "sc.opencl",
4053  "CL_BUILD_PROGRAM_FAILURE, status " << stat
4054  << ", log \"" << log.data() << "\"");
4055  }
4056  }
4057  }
4058 #endif
4059 #ifdef DBG_UTIL
4060  SAL_WARN("sc.opencl", "Program failed to build, aborting.");
4061  abort(); // make sure errors such as typos don't accidentally go unnoticed
4062 #else
4063  throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
4064 #endif
4065  }
4066  SAL_INFO("sc.opencl", "Built program " << mpProgram);
4067 
4068  // Generate binary out of compiled kernel.
4070  (mKernelSignature + GetMD5()).c_str());
4071  }
4072  lastSecondKernelHash = lastOneKernelHash;
4073  lastSecondProgram = lastOneProgram;
4074  lastOneKernelHash = KernelHash;
4075  lastOneProgram = mpProgram;
4076  }
4077  mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
4078  if (err != CL_SUCCESS)
4079  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
4080  SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
4081 }
4082 
4083 void DynamicKernel::Launch( size_t nr )
4084 {
4085  OpenCLZone zone;
4088  cl_int err;
4089  // The results
4090  mpResClmem = clCreateBuffer(kEnv.mpkContext,
4091  cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
4092  nr * sizeof(double), nullptr, &err);
4093  if (CL_SUCCESS != err)
4094  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
4095  SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
4096 
4097  SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem << " (result)");
4098  err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), static_cast<void*>(&mpResClmem));
4099  if (CL_SUCCESS != err)
4100  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
4101  // The rest of buffers
4102  mSyms.Marshal(mpKernel, nr, mpProgram);
4103  size_t global_work_size[] = { nr };
4104  SAL_INFO("sc.opencl", "Enqueuing kernel " << mpKernel);
4105  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, nullptr,
4106  global_work_size, nullptr, 0, nullptr, nullptr);
4107  if (CL_SUCCESS != err)
4108  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
4109  err = clFlush(kEnv.mpkCmdQueue);
4110  if (CL_SUCCESS != err)
4111  throw OpenCLError("clFlush", err, __FILE__, __LINE__);
4112 }
4113 
4114 // Symbol lookup. If there is no such symbol created, allocate one
4115 // kernel with argument with unique name and return so.
4116 // The template argument T must be a subclass of DynamicKernelArgument
4117 template <typename T>
4118 const DynamicKernelArgument* SymbolTable::DeclRefArg(const ScCalcConfig& config,
4119  const FormulaTreeNodeRef& t,
4120  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize)
4121 {
4122  FormulaToken* ref = t->GetFormulaToken();
4123  ArgumentMap::iterator it = mSymbols.find(ref);
4124  if (it == mSymbols.end())
4125  {
4126  // Allocate new symbols
4127  std::stringstream ss;
4128  ss << "tmp" << mCurId++;
4129  DynamicKernelArgumentRef new_arg = std::make_shared<T>(config, ss.str(), t, std::move(pCodeGen), nResultSize);
4130  mSymbols[ref] = new_arg;
4131  mParams.push_back(new_arg);
4132  return new_arg.get();
4133  }
4134  else
4135  {
4136  return it->second.get();
4137  }
4138 }
4139 
4140 FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() :
4142 
4144 
4146 {
4147  return nullptr;
4148 }
4149 
4150 std::shared_ptr<DynamicKernel> DynamicKernel::create( const ScCalcConfig& rConfig, const ScTokenArray& rCode, int nResultSize )
4151 {
4152  // Constructing "AST"
4153  FormulaTokenIterator aCode(rCode);
4154  std::vector<FormulaToken*> aTokenVector;
4155  std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
4156  FormulaToken* pCur;
4157  while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != nullptr)
4158  {
4159  OpCode eOp = pCur->GetOpCode();
4160  if (eOp != ocPush)
4161  {
4162  FormulaTreeNodeRef pCurNode = std::make_shared<FormulaTreeNode>(pCur);
4164  for (sal_uInt8 i = 0; i < nParamCount; i++)
4165  {
4166  if( aTokenVector.empty())
4167  return nullptr;
4168  FormulaToken* pTempFormula = aTokenVector.back();
4169  aTokenVector.pop_back();
4170  if (pTempFormula->GetOpCode() != ocPush)
4171  {
4172  if (aHashMap.find(pTempFormula) == aHashMap.end())
4173  return nullptr;
4174  pCurNode->Children.push_back(aHashMap[pTempFormula]);
4175  }
4176  else
4177  {
4178  FormulaTreeNodeRef pChildTreeNode =
4179  std::make_shared<FormulaTreeNode>(pTempFormula);
4180  pCurNode->Children.push_back(pChildTreeNode);
4181  }
4182  }
4183  std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
4184  aHashMap[pCur] = pCurNode;
4185  }
4186  aTokenVector.push_back(pCur);
4187  }
4188 
4189  FormulaTreeNodeRef Root = std::make_shared<FormulaTreeNode>(nullptr);
4190  Root->Children.push_back(aHashMap[aTokenVector.back()]);
4191 
4192  auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
4193 
4194  // OpenCL source code generation and kernel compilation
4195  try
4196  {
4197  pDynamicKernel->CodeGen();
4198  pDynamicKernel->CreateKernel();
4199  }
4200  catch (const UnhandledToken& ut)
4201  {
4202  SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4203  return nullptr;
4204  }
4205  catch (const InvalidParameterCount& ipc)
4206  {
4207  SAL_INFO("sc.opencl", "Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
4208  << " at " << ipc.mFile << ":" << ipc.mLineNumber);
4209  return nullptr;
4210  }
4211  catch (const OpenCLError& oce)
4212  {
4213  // I think OpenCLError exceptions are actually exceptional (unexpected), so do use SAL_WARN
4214  // here.
4215  SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4216 
4217  // OpenCLError used to go to the catch-all below, and not delete pDynamicKernel. Was that
4218  // intentional, should we not do it here then either?
4220  return nullptr;
4221  }
4222  catch (const Unhandled& uh)
4223  {
4224  SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
4225 
4226  // Unhandled used to go to the catch-all below, and not delete pDynamicKernel. Was that
4227  // intentional, should we not do it here then either?
4229  return nullptr;
4230  }
4231  catch (...)
4232  {
4233  // FIXME: Do we really want to catch random exceptions here?
4234  SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
4236  return nullptr;
4237  }
4238  return pDynamicKernel;
4239 }
4240 
4241 namespace {
4242 
4243 class CLInterpreterResult
4244 {
4245  DynamicKernel* mpKernel;
4246 
4248 
4249  cl_mem mpCLResBuf;
4250  double* mpResBuf;
4251 
4252 public:
4253  CLInterpreterResult() : mpKernel(nullptr), mnGroupLength(0), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4254  CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
4255  mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4256 
4257  bool isValid() const { return mpKernel != nullptr; }
4258 
4259  void fetchResultFromKernel()
4260  {
4261  if (!isValid())
4262  return;
4263 
4264  OpenCLZone zone;
4265 
4266  // Map results back
4267  mpCLResBuf = mpKernel->GetResultBuffer();
4268 
4271 
4272  cl_int err;
4273  mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
4274  mpCLResBuf,
4275  CL_TRUE, CL_MAP_READ, 0,
4276  mnGroupLength * sizeof(double), 0, nullptr, nullptr,
4277  &err));
4278 
4279  if (err != CL_SUCCESS)
4280  {
4281  SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << openclwrapper::errorString(err));
4282  mpResBuf = nullptr;
4283  return;
4284  }
4285  SAL_INFO("sc.opencl", "Kernel results: cl_mem: " << mpResBuf << " (" << DebugPeekDoubles(mpResBuf, mnGroupLength) << ")");
4286  }
4287 
4288  bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
4289  {
4290  if (!mpResBuf)
4291  return false;
4292 
4293  OpenCLZone zone;
4294 
4295  rDoc.SetFormulaResults(rTopPos, mpResBuf, mnGroupLength);
4296 
4299 
4300  cl_int err;
4301  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, nullptr, nullptr);
4302 
4303  if (err != CL_SUCCESS)
4304  {
4305  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
4306  return false;
4307  }
4308 
4309  return true;
4310  }
4311 };
4312 
4313 class CLInterpreterContext
4314 {
4315  std::shared_ptr<DynamicKernel> mpKernelStore;
4316  DynamicKernel* mpKernel;
4317 
4319 
4320 public:
4321  explicit CLInterpreterContext(SCROW nGroupLength)
4322  : mpKernel(nullptr)
4323  , mnGroupLength(nGroupLength) {}
4324 
4325  bool isValid() const
4326  {
4327  return mpKernel != nullptr;
4328  }
4329 
4330  void setManagedKernel( std::shared_ptr<DynamicKernel> pKernel )
4331  {
4332  mpKernelStore = std::move(pKernel);
4333  mpKernel = mpKernelStore.get();
4334  }
4335 
4336  CLInterpreterResult launchKernel()
4337  {
4338  if (!isValid())
4339  return CLInterpreterResult();
4340 
4341  try
4342  {
4343  // Run the kernel.
4344  mpKernel->Launch(mnGroupLength);
4345  }
4346  catch (const UnhandledToken& ut)
4347  {
4348  SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4350  return CLInterpreterResult();
4351  }
4352  catch (const OpenCLError& oce)
4353  {
4354  SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4356  return CLInterpreterResult();
4357  }
4358  catch (const Unhandled& uh)
4359  {
4360  SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
4362  return CLInterpreterResult();
4363  }
4364  catch (...)
4365  {
4366  SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
4368  return CLInterpreterResult();
4369  }
4370 
4371  return CLInterpreterResult(mpKernel, mnGroupLength);
4372  }
4373 };
4374 
4375 
4376 CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
4377  const ScFormulaCellGroupRef& xGroup, const ScTokenArray& rCode )
4378 {
4379  CLInterpreterContext aCxt(xGroup->mnLength);
4380 
4381  aCxt.setManagedKernel(DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
4382 
4383  return aCxt;
4384 }
4385 
4386 void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
4387 {
4388  ScCompiler aComp(rDoc, rTopPos, rCode, rDoc.GetGrammar());
4389  // Disable special ordering for jump commands for the OpenCL interpreter.
4390  aComp.EnableJumpCommandReorder(false);
4391  aComp.CompileTokenArray(); // Regenerate RPN tokens.
4392 }
4393 
4394 bool waitForResults()
4395 {
4396  OpenCLZone zone;
4399 
4400  cl_int err = clFinish(kEnv.mpkCmdQueue);
4401  if (err != CL_SUCCESS)
4402  SAL_WARN("sc.opencl", "clFinish failed: " << openclwrapper::errorString(err));
4403 
4404  return err == CL_SUCCESS;
4405 }
4406 
4407 }
4408 
4410  const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
4411  ScTokenArray& rCode )
4412 {
4413  MergeCalcConfig(rDoc);
4414 
4415  genRPNTokens(rDoc, rTopPos, rCode);
4416 
4417  if( rCode.GetCodeLen() == 0 )
4418  return false;
4419 
4420  CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
4421  if (!aCxt.isValid())
4422  return false;
4423 
4424  CLInterpreterResult aRes = aCxt.launchKernel();
4425  if (!aRes.isValid())
4426  return false;
4427 
4428  if (!waitForResults())
4429  return false;
4430 
4431  aRes.fetchResultFromKernel();
4432 
4433  return aRes.pushResultToDocument(rDoc, rTopPos);
4434 }
4435 
4436 } // namespace sc::opencl
4437 
4438 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Matrix data type that can store values of mixed types.
Definition: scmatrix.hxx:112
ocDBCount
bool generatBinFromKernelSource(cl_program program, const char *clFileName)
ocStDevP
ocVarPA
ocSqrt
ocBitAnd
ocPermutationA
ocBinomDist
const size_t count(pCandidateA->getBorderLines().size())
::boost::intrusive_ptr< ScFormulaCellGroup > ScFormulaCellGroupRef
Definition: types.hxx:43
sal_uInt64 kernelFailures
ocEven
DynamicKernelSlidingArgument< VectorRef > mDoubleArgument
OUString getString() const
std::string mFullProgramSrc
std::shared_ptr< DynamicKernelArgument > DynamicKernelArgumentRef
Definition: opbase.hxx:151
std::string mKernelSignature
ocSumSQ
double mConst
ocArcCotHyp
ocCosecantHyp
void SC_DLLPUBLIC SetFormulaResults(const ScAddress &rTopPos, const double *pResults, size_t nLen)
Set an array of numerical formula results to a group of contiguous formula cells. ...
Definition: documen8.cxx:402
ocVar
const VectorRefArray & GetArray() const
ocPi
ocDevSq
Failed in marshaling.
Definition: opbase.hxx:41
Inconsistent state.
Definition: opbase.hxx:53
ocCos
ocPDuration
cl_program mpArryPrograms[MAX_CLFILE_NUM]
std::set< std::string > inlineDecl
ocPpmt
ocDBMin
ocIntercept
SCROW mnGroupLength
ocDBAverage
ocB
unsigned int mCurId
Abstract base class for vectorised formula group interpreters, plus a global instance factory...
sal_Int64 n
cl_mem mpClmem2
ocCosecant
virtual bool interpret(ScDocument &rDoc, const ScAddress &rTopPos, ScFormulaCellGroupRef &xGroup, ScTokenArray &rCode) override
const std::vector< VectorRefArray > & GetArrays() const
ocDBSum
ocExp
ocSLN
void setKernelEnv(KernelEnv *envInfo)
ocLogNormDist
sal_uInt16 GetCodeLen() const
void MergeCalcConfig(const ScDocument &rDoc)
Merge global and document specific settings.
std::shared_ptr< FormulaTreeNode > FormulaTreeNodeRef
Definition: opbase.hxx:81
virtual double GetDouble() const
ocPV
ocProduct
OpCode GetOpCode() const
SymbolTable mSyms
ocPearson
SC_DLLPUBLIC formula::FormulaGrammar::Grammar GetGrammar() const
Definition: document.hxx:983
(Partially) abstract base class for an operand
Definition: opbase.hxx:101
ocCount
ocRad
ocMin
ocGauss
ocSinHyp
ocCount2
cl_kernel mpKernel
for managed kernel instance.
ocSTEYX
ocIf
ocLogInv
ocSYD
ocFisherInv
tuple log
#define SAL_MAX_UINT32
virtual ScMatrixRef inverseMatrix(const ScMatrix &rMat) override
ocPhi
char sal_uInt16 & nParamCount
Definition: callform.cxx:53
ocDBProduct
ocNegSub
#define REDUCE_THRESHOLD
=1+"1" or =1+"x" give 1
ocRSQ
virtual const OUString & GetExternal() const
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
ocBetaDist
ocRound
OpCode
ocSumProduct
ocAbs
ocStDevA
ocForecast
ocWeibull
ocGreater
ocSNormInv
ocIsOdd
ocEqual
::boost::intrusive_ptr< ScMatrix > ScMatrixRef
Definition: types.hxx:25
ocNot
StringConversion meStringConversion
Definition: calcconfig.hxx:54
Reference< deployment::XPackageRegistry > create(Reference< deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, Reference< XComponentContext > const &xComponentContext)
std::string StackVarEnumToString(StackVar const e)
ocCot
ocCorrel
ocBitLshift
ScCalcConfig mCalcConfig
ocDDB
ocVarP
std::shared_ptr< DynamicKernel > mpKernelStore
ocSumIfs
ocArcTanHyp
err
ocFisher
ocSumX2MY2
ocDBMax
ocAverageA
ocPower
ocNormInv
ocBitOr
const formula::DoubleVectorRefToken * mpDVR
Base
ocFTest
ocArcCos
ocLog
int mnResultSize
ocFV
int i
ocSkew
ocNPV
cl_mem mCLMem
ocMul
ocCosHyp
ocArcCosHyp
static DynamicKernelArgumentRef SoPHelper(const ScCalcConfig &config, const std::string &ts, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > pCodeGen, int nResultSize)
ocDBVar
ocPush
std::set< std::string > inlineFun
std::shared_ptr< SlidingFunctionBase > mpCodeGen
std::string mKernelHash
ocSub
ocTDist
#define VR
Definition: xlformula.cxx:62
void EnableJumpCommandReorder(bool bEnable)
ocSecantHyp
ocFloor
ocPoissonDist
ocMIRR
ocSlope
ocExternal
ocAverageIfs
ocMaxA
ocLog10
size
ocCeil
Arguments that are actually compile-time constant string Currently, only the hash is passed...
::boost::spirit::classic::rule< ScannerT > argument
ocMax
SubArgumentsType mvSubArguments
double * mpResBuf
ocInt
bool bIsEndFixed
ocDBStdDevP
ocSkewp
ocSin
tuple index
ArgumentMap mSymbols
ocVLookup
ocCritBinom
ocRate
ocLess
sal_uInt8 GetParamCount() const
exports com.sun.star.chart2. data
XPropertyListType t
bool buildProgramFromBinary(const char *buildOption, GPUEnv *gpuInfo, const char *filename, int idx)
ocNegBinomVert
const double * mpNumericArray
FormulaTreeNodeRef mpRoot
ocSumIf
Configuration options for formula interpreter.
Definition: calcconfig.hxx:43
ocVarA
ocISPMT
ocDBCount2
ocOdd
ocOr
ocBitRshift
static std::shared_ptr< DynamicKernelArgument > VectorRefFactory(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > &pCodeGen, int index)
DynamicKernelStringArgument mStringArgument
ocCotHyp
sal_Int32 SCROW
Definition: types.hxx:17
ocFact
ocCombin
ocAdd
ocRoundUp
ocStDevPA
ocSumX2DY2
ocArcTan
bool bIsStartFixed
ocDB
ocAverage
ocHarMean
#define UNROLLING_FACTOR
ocMod
#define SAL_WARN_IF(condition, area, stream)
rtl_uString ** mpStringArray
unsigned char sal_uInt8
ocRandom
ocDBVarP
virtual const svl::SharedString & GetString() const
ocArcSin
cl_mem mpResClmem
ocAnd
#define SAL_INFO(area, stream)
ocArcCot
ocLn
cl_command_queue mpkCmdQueue
ocStdNormDist
int uniform_int_distribution(int a, int b)
tuple config
ocCountIfs
cl_mem mpCLResBuf
const char * name
ocGammaLn
ocRoundDown
ocRRI
ocBitXor
ocSumXMY2
std::vector< DynamicKernelArgumentRef > mParams
ocStandard
const char *const publicFunc
ocDiv
ocAveDev
ocGamma
ocPow
ocTrunc
cl_program mpProgram
Any result
ocIRR
#define SAL_WARN(area, stream)
ocNormDist
ocSecant
const char * errorString(cl_int nError)
ocPMT
ocDeg
ocFDist
ocPermut
ocTanHyp
sal_Int32 nLength
ocLessEqual
ocConfidence
size_t GetArrayLength() const