LibreOffice Module sc (master)  1
formulagroupcl.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <formulagroup.hxx>
11 #include <formulagroupcl.hxx>
12 #include <document.hxx>
13 #include <formulacell.hxx>
14 #include <tokenarray.hxx>
15 #include <compiler.hxx>
16 #include <comphelper/random.hxx>
17 #include <formula/vectortoken.hxx>
18 #include <scmatrix.hxx>
19 #include <sal/log.hxx>
20 
21 #include <opencl/openclwrapper.hxx>
22 #include <opencl/OpenCLZone.hxx>
23 
24 #include "op_financial.hxx"
25 #include "op_database.hxx"
26 #include "op_math.hxx"
27 #include "op_logical.hxx"
28 #include "op_statistical.hxx"
29 #include "op_array.hxx"
30 #include "op_spreadsheet.hxx"
31 #include "op_addin.hxx"
32 
33 #include <limits>
34 
35 #include <com/sun/star/sheet/FormulaLanguage.hpp>
36 
37 // FIXME: The idea that somebody would bother to (now and then? once a year? once a month?) manually
38 // edit a source file and change the value of some #defined constant and run some ill-defined
39 // "correctness test" is of course ludicrous. Either things are checked in normal unit tests, in
40 // every 'make check', or not at all. The below comments are ridiculous.
41 
42 #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
43 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
44 
45 const char* const publicFunc =
46  "\n"
47  "#define IllegalArgument 502\n"
48  "#define IllegalFPOperation 503 // #NUM!\n"
49  "#define NoValue 519 // #VALUE!\n"
50  "#define NoConvergence 523\n"
51  "#define DivisionByZero 532 // #DIV/0!\n"
52  "#define NOTAVAILABLE 0x7fff // #N/A\n"
53  "\n"
54  "double CreateDoubleError(ulong nErr)\n"
55  "{\n"
56  // At least nVidia on Linux and Intel on Windows seem to ignore the argument to nan(),
57  // so using that would not propagate the type of error, work that around
58  // by directly constructing the proper IEEE double NaN value
59  // TODO: maybe use a better way to detect such systems?
60  " return as_double(0x7FF8000000000000+nErr);\n"
61 // " return nan(nErr);\n"
62  "}\n"
63  "\n"
64  "uint GetDoubleErrorValue(double fVal)\n"
65  "{\n"
66  " if (isfinite(fVal))\n"
67  " return 0;\n"
68  " if (isinf(fVal))\n"
69  " return IllegalFPOperation; // normal INF\n"
70  " if (as_ulong(fVal) & 0XFFFF0000u)\n"
71  " return NoValue; // just a normal NAN\n"
72  " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
73  "}\n"
74  "\n"
75  "double fsum_count(double a, double b, __private int *p) {\n"
76  " bool t = isnan(a);\n"
77  " (*p) += t?0:1;\n"
78  " return t?b:a+b;\n"
79  "}\n"
80  "double fmin_count(double a, double b, __private int *p) {\n"
81  " double result = fmin(a, b);\n"
82  " bool t = isnan(result);\n"
83  " (*p) += t?0:1;\n"
84  " return result;\n"
85  "}\n"
86  "double fmax_count(double a, double b, __private int *p) {\n"
87  " double result = fmax(a, b);\n"
88  " bool t = isnan(result);\n"
89  " (*p) += t?0:1;\n"
90  " return result;\n"
91  "}\n"
92  "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
93  "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
94  "double fsub(double a, double b) { return a-b; }\n"
95  "double fdiv(double a, double b) { return a/b; }\n"
96  "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
97  "int is_representable_integer(double a) {\n"
98  " long kMaxInt = (1L << 53) - 1;\n"
99  " if (a <= as_double(kMaxInt))\n"
100  " {\n"
101  " long nInt = as_long(a);\n"
102  " double fInt;\n"
103  " return (nInt <= kMaxInt &&\n"
104  " (!((fInt = as_double(nInt)) < a) && !(fInt > a)));\n"
105  " }\n"
106  " return 0;\n"
107  "}\n"
108  "int approx_equal(double a, double b) {\n"
109  " double e48 = 1.0 / (16777216.0 * 16777216.0);\n"
110  " double e44 = e48 * 16.0;\n"
111  " if (a == b)\n"
112  " return 1;\n"
113  " if (a == 0.0 || b == 0.0)\n"
114  " return 0;\n"
115  " double d = fabs(a - b);\n"
116  " if (!isfinite(d))\n"
117  " return 0; // Nan or Inf involved\n"
118  " if (d > ((a = fabs(a)) * e44) || d > ((b = fabs(b)) * e44))\n"
119  " return 0;\n"
120  " if (is_representable_integer(d) && is_representable_integer(a) && is_representable_integer(b))\n"
121  " return 0; // special case for representable integers.\n"
122  " return (d < a * e48 && d < b * e48);\n"
123  "}\n"
124  "double fsum_approx(double a, double b) {\n"
125  " if ( ((a < 0.0 && b > 0.0) || (b < 0.0 && a > 0.0))\n"
126  " && approx_equal( a, -b ) )\n"
127  " return 0.0;\n"
128  " return a + b;\n"
129  "}\n"
130  "double fsub_approx(double a, double b) {\n"
131  " if ( ((a < 0.0 && b < 0.0) || (a > 0.0 && b > 0.0)) && approx_equal( a, b ) )\n"
132  " return 0.0;\n"
133  " return a - b;\n"
134  "}\n"
135  ;
136 
137 #include <vector>
138 #include <map>
139 #include <iostream>
140 #include <algorithm>
141 
142 #include <rtl/digest.h>
143 
144 #include <memory>
145 
146 using namespace formula;
147 
148 namespace sc::opencl {
149 
150 namespace {
151 
152 std::string linenumberify(const std::string& s)
153 {
154  std::stringstream ss;
155  int linenumber = 1;
156  size_t start = 0;
157  size_t newline;
158  while ((newline = s.find('\n', start)) != std::string::npos)
159  {
160  ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
161  start = newline + 1;
162  }
163  if (start < s.size())
164  ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
165  return ss.str();
166 }
167 
168 bool AllStringsAreNull(const rtl_uString* const* pStringArray, size_t nLength)
169 {
170  if (pStringArray == nullptr)
171  return true;
172 
173  for (size_t i = 0; i < nLength; i++)
174  if (pStringArray[i] != nullptr)
175  return false;
176 
177  return true;
178 }
179 
180 OUString LimitedString( const OUString& str )
181 {
182  if( str.getLength() < 20 )
183  return "\"" + str + "\"";
184  else
185  return OUString::Concat("\"") + str.subView( 0, 20 ) + "\"...";
186 }
187 
188 // Returns formatted contents of the data (possibly shortened), to be used in debug output.
189 OUString DebugPeekData(const FormulaToken* ref, int doubleRefIndex = 0)
190 {
191  if (ref->GetType() == formula::svSingleVectorRef)
192  {
193  const formula::SingleVectorRefToken* pSVR =
194  static_cast<const formula::SingleVectorRefToken*>(ref);
195  OUStringBuffer buf = "SingleRef {";
196  for( size_t i = 0; i < std::min< size_t >( 4, pSVR->GetArrayLength()); ++i )
197  {
198  if( i != 0 )
199  buf.append( "," );
200  if( pSVR->GetArray().mpNumericArray != nullptr )
201  buf.append( pSVR->GetArray().mpNumericArray[ i ] );
202  else if( pSVR->GetArray().mpStringArray != nullptr )
203  buf.append( LimitedString( OUString( pSVR->GetArray().mpStringArray[ i ] )));
204  }
205  if( pSVR->GetArrayLength() > 4 )
206  buf.append( ",..." );
207  buf.append( "}" );
208  return buf.makeStringAndClear();
209  }
210  else if (ref->GetType() == formula::svDoubleVectorRef)
211  {
212  const formula::DoubleVectorRefToken* pDVR =
213  static_cast<const formula::DoubleVectorRefToken*>(ref);
214  OUStringBuffer buf = "DoubleRef {";
215  for( size_t i = 0; i < std::min< size_t >( 4, pDVR->GetArrayLength()); ++i )
216  {
217  if( i != 0 )
218  buf.append( "," );
219  if( pDVR->GetArrays()[doubleRefIndex].mpNumericArray != nullptr )
220  buf.append( pDVR->GetArrays()[doubleRefIndex].mpNumericArray[ i ] );
221  else if( pDVR->GetArrays()[doubleRefIndex].mpStringArray != nullptr )
222  buf.append( LimitedString( OUString( pDVR->GetArrays()[doubleRefIndex].mpStringArray[ i ] )));
223  }
224  if( pDVR->GetArrayLength() > 4 )
225  buf.append( ",..." );
226  buf.append( "}" );
227  return buf.makeStringAndClear();
228  }
229  else if (ref->GetType() == formula::svString)
230  {
231  return "String " + LimitedString( ref->GetString().getString());
232  }
233  else if (ref->GetType() == formula::svDouble)
234  {
235  return OUString::number(ref->GetDouble());
236  }
237  else
238  {
239  return "?";
240  }
241 }
242 
243 // Returns formatted contents of a doubles buffer, to be used in debug output.
244 OUString DebugPeekDoubles(const double* data, int size)
245 {
246  OUStringBuffer buf = "{";
247  for( int i = 0; i < std::min( 4, size ); ++i )
248  {
249  if( i != 0 )
250  buf.append( "," );
251  buf.append( data[ i ] );
252  }
253  if( size > 4 )
254  buf.append( ",..." );
255  buf.append( "}" );
256  return buf.makeStringAndClear();
257 }
258 
259 } // anonymous namespace
260 
262 size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
263 {
264  OpenCLZone zone;
265  FormulaToken* ref = mFormulaTree->GetFormulaToken();
266  double* pHostBuffer = nullptr;
267  size_t szHostBuffer = 0;
268  if (ref->GetType() == formula::svSingleVectorRef)
269  {
270  const formula::SingleVectorRefToken* pSVR =
271  static_cast<const formula::SingleVectorRefToken*>(ref);
272 
273  SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
274 
275  pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
276  szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
277  }
278  else if (ref->GetType() == formula::svDoubleVectorRef)
279  {
280  const formula::DoubleVectorRefToken* pDVR =
281  static_cast<const formula::DoubleVectorRefToken*>(ref);
282 
283  SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
284 
285  pHostBuffer = const_cast<double*>(
286  pDVR->GetArrays()[mnIndex].mpNumericArray);
287  szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
288  }
289  else
290  {
291  throw Unhandled(__FILE__, __LINE__);
292  }
293 
296  cl_int err;
297  if (pHostBuffer)
298  {
299  mpClmem = clCreateBuffer(kEnv.mpkContext,
300  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
301  szHostBuffer,
302  pHostBuffer, &err);
303  if (CL_SUCCESS != err)
304  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
305  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
306  }
307  else
308  {
309  if (szHostBuffer == 0)
310  szHostBuffer = sizeof(double); // a dummy small value
311  // Marshal as a buffer of NANs
312  mpClmem = clCreateBuffer(kEnv.mpkContext,
313  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
314  szHostBuffer, nullptr, &err);
315  if (CL_SUCCESS != err)
316  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
317  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
318 
319  double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
320  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
321  szHostBuffer, 0, nullptr, nullptr, &err));
322  if (CL_SUCCESS != err)
323  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
324 
325  for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
326  pNanBuffer[i] = std::numeric_limits<double>::quiet_NaN();
327  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
328  pNanBuffer, 0, nullptr, nullptr);
329  // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
330  if (CL_SUCCESS != err)
331  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
332  }
333 
334  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref, mnIndex) << ")");
335  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
336  if (CL_SUCCESS != err)
337  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
338  return 1;
339 }
340 
345 
350 
351 namespace {
352 
353 class ConstStringArgument : public DynamicKernelArgument
354 {
355 public:
356  ConstStringArgument( const ScCalcConfig& config, const std::string& s,
357  const FormulaTreeNodeRef& ft ) :
358  DynamicKernelArgument(config, s, ft) { }
360  virtual void GenDecl( std::stringstream& ss ) const override
361  {
362  ss << "unsigned " << mSymName;
363  }
364  virtual void GenDeclRef( std::stringstream& ss ) const override
365  {
366  ss << GenSlidingWindowDeclRef();
367  }
368  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
369  {
370  GenDecl(ss);
371  }
372  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
373  {
374  std::stringstream ss;
375  if (GetFormulaToken()->GetType() != formula::svString)
376  throw Unhandled(__FILE__, __LINE__);
377  FormulaToken* Tok = GetFormulaToken();
378  ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
379  return ss.str();
380  }
381  virtual size_t GetWindowSize() const override
382  {
383  return 1;
384  }
386  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
387  {
388  OpenCLZone zone;
389  FormulaToken* ref = mFormulaTree->GetFormulaToken();
390  cl_uint hashCode = 0;
391  if (ref->GetType() != formula::svString)
392  {
393  throw Unhandled(__FILE__, __LINE__);
394  }
395 
396  const OUString s = ref->GetString().getString().toAsciiUpperCase();
397  hashCode = s.hashCode();
398 
399  // Pass the scalar result back to the rest of the formula kernel
400  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode << "(" << DebugPeekData(ref) << ")" );
401  cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), static_cast<void*>(&hashCode));
402  if (CL_SUCCESS != err)
403  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
404  return 1;
405  }
406 };
407 
409 class DynamicKernelConstantArgument : public DynamicKernelArgument
410 {
411 public:
412  DynamicKernelConstantArgument( const ScCalcConfig& config, const std::string& s,
413  const FormulaTreeNodeRef& ft ) :
414  DynamicKernelArgument(config, s, ft) { }
416  virtual void GenDecl( std::stringstream& ss ) const override
417  {
418  ss << "double " << mSymName;
419  }
420  virtual void GenDeclRef( std::stringstream& ss ) const override
421  {
422  ss << mSymName;
423  }
424  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
425  {
426  GenDecl(ss);
427  }
428  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
429  {
430  if (GetFormulaToken()->GetType() != formula::svDouble)
431  throw Unhandled(__FILE__, __LINE__);
432  return mSymName;
433  }
434  virtual size_t GetWindowSize() const override
435  {
436  return 1;
437  }
438  double GetDouble() const
439  {
440  FormulaToken* Tok = GetFormulaToken();
441  if (Tok->GetType() != formula::svDouble)
442  throw Unhandled(__FILE__, __LINE__);
443  return Tok->GetDouble();
444  }
446  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
447  {
448  OpenCLZone zone;
449  double tmp = GetDouble();
450  // Pass the scalar result back to the rest of the formula kernel
451  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
452  cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
453  if (CL_SUCCESS != err)
454  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
455  return 1;
456  }
457 };
458 
459 class DynamicKernelPiArgument : public DynamicKernelArgument
460 {
461 public:
462  DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
463  const FormulaTreeNodeRef& ft ) :
464  DynamicKernelArgument(config, s, ft) { }
466  virtual void GenDecl( std::stringstream& ss ) const override
467  {
468  ss << "double " << mSymName;
469  }
470  virtual void GenDeclRef( std::stringstream& ss ) const override
471  {
472  ss << "3.14159265358979";
473  }
474  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
475  {
476  GenDecl(ss);
477  }
478  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
479  {
480  return mSymName;
481  }
482  virtual size_t GetWindowSize() const override
483  {
484  return 1;
485  }
487  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
488  {
489  OpenCLZone zone;
490  double tmp = 0.0;
491  // Pass the scalar result back to the rest of the formula kernel
492  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp << " (PI)");
493  cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
494  if (CL_SUCCESS != err)
495  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
496  return 1;
497  }
498 };
499 
500 class DynamicKernelRandomArgument : public DynamicKernelArgument
501 {
502 public:
503  DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
504  const FormulaTreeNodeRef& ft ) :
505  DynamicKernelArgument(config, s, ft) { }
507  virtual void GenDecl( std::stringstream& ss ) const override
508  {
509  ss << "double " << mSymName;
510  }
511  virtual void GenDeclRef( std::stringstream& ss ) const override
512  {
513  ss << mSymName;
514  }
515  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
516  {
517  ss << "int " << mSymName;
518  }
519  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
520  {
521  return mSymName + "_Random(" + mSymName + ")";
522  }
523  virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
524  {
525  // This string is from the pi_opencl_kernel.i file as
526  // generated when building the Random123 examples. Unused
527  // stuff has been removed, and the actual kernel is not the
528  // same as in the totally different use case of that example,
529  // of course. Only the code that calculates the counter-based
530  // random number and what it needs is left.
531  ss << "\
532 \n\
533 #ifndef DEFINED_RANDOM123_STUFF\n\
534 #define DEFINED_RANDOM123_STUFF\n\
535 \n\
536 /*\n\
537 Copyright 2010-2011, D. E. Shaw Research.\n\
538 All rights reserved.\n\
539 \n\
540 Redistribution and use in source and binary forms, with or without\n\
541 modification, are permitted provided that the following conditions are\n\
542 met:\n\
543 \n\
544 * Redistributions of source code must retain the above copyright\n\
545  notice, this list of conditions, and the following disclaimer.\n\
546 \n\
547 * Redistributions in binary form must reproduce the above copyright\n\
548  notice, this list of conditions, and the following disclaimer in the\n\
549  documentation and/or other materials provided with the distribution.\n\
550 \n\
551 * Neither the name of D. E. Shaw Research nor the names of its\n\
552  contributors may be used to endorse or promote products derived from\n\
553  this software without specific prior written permission.\n\
554 \n\
555 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
556 \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
557 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
558 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
559 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
560 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
561 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
562 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
563 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
564 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
565 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
566 */\n\
567 \n\
568 typedef uint uint32_t;\n\
569 struct r123array2x32\n\
570 {\n\
571  uint32_t v[2];\n\
572 };\n\
573 enum r123_enum_threefry32x2\n\
574 {\n\
575  R_32x2_0_0 = 13,\n\
576  R_32x2_1_0 = 15,\n\
577  R_32x2_2_0 = 26,\n\
578  R_32x2_3_0 = 6,\n\
579  R_32x2_4_0 = 17,\n\
580  R_32x2_5_0 = 29,\n\
581  R_32x2_6_0 = 16,\n\
582  R_32x2_7_0 = 24\n\
583 };\n\
584 inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
585  __attribute__ ((always_inline));\n\
586 inline uint32_t\n\
587 RotL_32 (uint32_t x, unsigned int N)\n\
588 {\n\
589  return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
590 }\n\
591 \n\
592 typedef struct r123array2x32 threefry2x32_ctr_t;\n\
593 typedef struct r123array2x32 threefry2x32_key_t;\n\
594 typedef struct r123array2x32 threefry2x32_ukey_t;\n\
595 inline threefry2x32_key_t\n\
596 threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
597 {\n\
598  return uk;\n\
599 }\n\
600 \n\
601 inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
602  threefry2x32_ctr_t in,\n\
603  threefry2x32_key_t k)\n\
604  __attribute__ ((always_inline));\n\
605 inline threefry2x32_ctr_t\n\
606 threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
607  threefry2x32_key_t k)\n\
608 {\n\
609  threefry2x32_ctr_t X;\n\
610  uint32_t ks[2 + 1];\n\
611  int i;\n\
612  ks[2] = 0x1BD11BDA;\n\
613  for (i = 0; i < 2; i++) {\n\
614  ks[i] = k.v[i];\n\
615  X.v[i] = in.v[i];\n\
616  ks[2] ^= k.v[i];\n\
617  }\n\
618  X.v[0] += ks[0];\n\
619  X.v[1] += ks[1];\n\
620  if (Nrounds > 0) {\n\
621  X.v[0] += X.v[1];\n\
622  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
623  X.v[1] ^= X.v[0];\n\
624  }\n\
625  if (Nrounds > 1) {\n\
626  X.v[0] += X.v[1];\n\
627  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
628  X.v[1] ^= X.v[0];\n\
629  }\n\
630  if (Nrounds > 2) {\n\
631  X.v[0] += X.v[1];\n\
632  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
633  X.v[1] ^= X.v[0];\n\
634  }\n\
635  if (Nrounds > 3) {\n\
636  X.v[0] += X.v[1];\n\
637  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
638  X.v[1] ^= X.v[0];\n\
639  }\n\
640  if (Nrounds > 3) {\n\
641  X.v[0] += ks[1];\n\
642  X.v[1] += ks[2];\n\
643  X.v[1] += 1;\n\
644  }\n\
645  if (Nrounds > 4) {\n\
646  X.v[0] += X.v[1];\n\
647  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
648  X.v[1] ^= X.v[0];\n\
649  }\n\
650  if (Nrounds > 5) {\n\
651  X.v[0] += X.v[1];\n\
652  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
653  X.v[1] ^= X.v[0];\n\
654  }\n\
655  if (Nrounds > 6) {\n\
656  X.v[0] += X.v[1];\n\
657  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
658  X.v[1] ^= X.v[0];\n\
659  }\n\
660  if (Nrounds > 7) {\n\
661  X.v[0] += X.v[1];\n\
662  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
663  X.v[1] ^= X.v[0];\n\
664  }\n\
665  if (Nrounds > 7) {\n\
666  X.v[0] += ks[2];\n\
667  X.v[1] += ks[0];\n\
668  X.v[1] += 2;\n\
669  }\n\
670  if (Nrounds > 8) {\n\
671  X.v[0] += X.v[1];\n\
672  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
673  X.v[1] ^= X.v[0];\n\
674  }\n\
675  if (Nrounds > 9) {\n\
676  X.v[0] += X.v[1];\n\
677  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
678  X.v[1] ^= X.v[0];\n\
679  }\n\
680  if (Nrounds > 10) {\n\
681  X.v[0] += X.v[1];\n\
682  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
683  X.v[1] ^= X.v[0];\n\
684  }\n\
685  if (Nrounds > 11) {\n\
686  X.v[0] += X.v[1];\n\
687  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
688  X.v[1] ^= X.v[0];\n\
689  }\n\
690  if (Nrounds > 11) {\n\
691  X.v[0] += ks[0];\n\
692  X.v[1] += ks[1];\n\
693  X.v[1] += 3;\n\
694  }\n\
695  if (Nrounds > 12) {\n\
696  X.v[0] += X.v[1];\n\
697  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
698  X.v[1] ^= X.v[0];\n\
699  }\n\
700  if (Nrounds > 13) {\n\
701  X.v[0] += X.v[1];\n\
702  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
703  X.v[1] ^= X.v[0];\n\
704  }\n\
705  if (Nrounds > 14) {\n\
706  X.v[0] += X.v[1];\n\
707  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
708  X.v[1] ^= X.v[0];\n\
709  }\n\
710  if (Nrounds > 15) {\n\
711  X.v[0] += X.v[1];\n\
712  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
713  X.v[1] ^= X.v[0];\n\
714  }\n\
715  if (Nrounds > 15) {\n\
716  X.v[0] += ks[1];\n\
717  X.v[1] += ks[2];\n\
718  X.v[1] += 4;\n\
719  }\n\
720  if (Nrounds > 16) {\n\
721  X.v[0] += X.v[1];\n\
722  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
723  X.v[1] ^= X.v[0];\n\
724  }\n\
725  if (Nrounds > 17) {\n\
726  X.v[0] += X.v[1];\n\
727  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
728  X.v[1] ^= X.v[0];\n\
729  }\n\
730  if (Nrounds > 18) {\n\
731  X.v[0] += X.v[1];\n\
732  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
733  X.v[1] ^= X.v[0];\n\
734  }\n\
735  if (Nrounds > 19) {\n\
736  X.v[0] += X.v[1];\n\
737  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
738  X.v[1] ^= X.v[0];\n\
739  }\n\
740  if (Nrounds > 19) {\n\
741  X.v[0] += ks[2];\n\
742  X.v[1] += ks[0];\n\
743  X.v[1] += 5;\n\
744  }\n\
745  if (Nrounds > 20) {\n\
746  X.v[0] += X.v[1];\n\
747  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
748  X.v[1] ^= X.v[0];\n\
749  }\n\
750  if (Nrounds > 21) {\n\
751  X.v[0] += X.v[1];\n\
752  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
753  X.v[1] ^= X.v[0];\n\
754  }\n\
755  if (Nrounds > 22) {\n\
756  X.v[0] += X.v[1];\n\
757  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
758  X.v[1] ^= X.v[0];\n\
759  }\n\
760  if (Nrounds > 23) {\n\
761  X.v[0] += X.v[1];\n\
762  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
763  X.v[1] ^= X.v[0];\n\
764  }\n\
765  if (Nrounds > 23) {\n\
766  X.v[0] += ks[0];\n\
767  X.v[1] += ks[1];\n\
768  X.v[1] += 6;\n\
769  }\n\
770  if (Nrounds > 24) {\n\
771  X.v[0] += X.v[1];\n\
772  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
773  X.v[1] ^= X.v[0];\n\
774  }\n\
775  if (Nrounds > 25) {\n\
776  X.v[0] += X.v[1];\n\
777  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
778  X.v[1] ^= X.v[0];\n\
779  }\n\
780  if (Nrounds > 26) {\n\
781  X.v[0] += X.v[1];\n\
782  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
783  X.v[1] ^= X.v[0];\n\
784  }\n\
785  if (Nrounds > 27) {\n\
786  X.v[0] += X.v[1];\n\
787  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
788  X.v[1] ^= X.v[0];\n\
789  }\n\
790  if (Nrounds > 27) {\n\
791  X.v[0] += ks[1];\n\
792  X.v[1] += ks[2];\n\
793  X.v[1] += 7;\n\
794  }\n\
795  if (Nrounds > 28) {\n\
796  X.v[0] += X.v[1];\n\
797  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
798  X.v[1] ^= X.v[0];\n\
799  }\n\
800  if (Nrounds > 29) {\n\
801  X.v[0] += X.v[1];\n\
802  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
803  X.v[1] ^= X.v[0];\n\
804  }\n\
805  if (Nrounds > 30) {\n\
806  X.v[0] += X.v[1];\n\
807  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
808  X.v[1] ^= X.v[0];\n\
809  }\n\
810  if (Nrounds > 31) {\n\
811  X.v[0] += X.v[1];\n\
812  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
813  X.v[1] ^= X.v[0];\n\
814  }\n\
815  if (Nrounds > 31) {\n\
816  X.v[0] += ks[2];\n\
817  X.v[1] += ks[0];\n\
818  X.v[1] += 8;\n\
819  }\n\
820  return X;\n\
821 }\n\
822 \n\
823 enum r123_enum_threefry2x32\n\
824 { threefry2x32_rounds = 20 };\n\
825 inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
826  threefry2x32_key_t k)\n\
827  __attribute__ ((always_inline));\n\
828 inline threefry2x32_ctr_t\n\
829 threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
830 {\n\
831  return threefry2x32_R (threefry2x32_rounds, in, k);\n\
832 }\n\
833 #endif\n\
834 \n\
835 ";
836  ss << "double " << mSymName << "_Random (int seed)\n\
837 {\n\
838  unsigned tid = get_global_id(0);\n\
839  threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
840  threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
841  c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
842  const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
843  const double halffactor = 0.5*factor;\n\
844  return c.v[0] * factor + halffactor;\n\
845 }\n\
846 ";
847  }
848  virtual size_t GetWindowSize() const override
849  {
850  return 1;
851  }
853  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
854  {
855  OpenCLZone zone;
856  cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32);
857  // Pass the scalar result back to the rest of the formula kernel
858  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed << "(RANDOM)");
859  cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), static_cast<void*>(&seed));
860  if (CL_SUCCESS != err)
861  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
862  return 1;
863  }
864 };
865 
867 class DynamicKernelStringArgument : public VectorRef
868 {
869 public:
870  DynamicKernelStringArgument( const ScCalcConfig& config, const std::string& s,
871  const FormulaTreeNodeRef& ft, int index = 0 ) :
872  VectorRef(config, s, ft, index) { }
873 
874  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
876  virtual void GenDecl( std::stringstream& ss ) const override
877  {
878  ss << "__global unsigned int *" << mSymName;
879  }
880  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
881  {
882  DynamicKernelStringArgument::GenDecl(ss);
883  }
884  virtual size_t Marshal( cl_kernel, int, int, cl_program ) override;
885 };
886 
887 }
888 
890 size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
891 {
892  OpenCLZone zone;
893  FormulaToken* ref = mFormulaTree->GetFormulaToken();
894 
897  cl_int err;
899  size_t nStrings = 0;
900  if (ref->GetType() == formula::svSingleVectorRef)
901  {
902  const formula::SingleVectorRefToken* pSVR =
903  static_cast<const formula::SingleVectorRefToken*>(ref);
904  nStrings = pSVR->GetArrayLength();
905  vRef = pSVR->GetArray();
906  }
907  else if (ref->GetType() == formula::svDoubleVectorRef)
908  {
909  const formula::DoubleVectorRefToken* pDVR =
910  static_cast<const formula::DoubleVectorRefToken*>(ref);
911  nStrings = pDVR->GetArrayLength();
912  vRef = pDVR->GetArrays()[mnIndex];
913  }
914  size_t szHostBuffer = nStrings * sizeof(cl_int);
915  cl_uint* pHashBuffer = nullptr;
916 
917  if (vRef.mpStringArray != nullptr)
918  {
919  // Marshal strings. Right now we pass hashes of these string
920  mpClmem = clCreateBuffer(kEnv.mpkContext,
921  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
922  szHostBuffer, nullptr, &err);
923  if (CL_SUCCESS != err)
924  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
925  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
926 
927  pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
928  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
929  szHostBuffer, 0, nullptr, nullptr, &err));
930  if (CL_SUCCESS != err)
931  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
932 
933  for (size_t i = 0; i < nStrings; i++)
934  {
935  if (vRef.mpStringArray[i])
936  {
937  const OUString tmp(vRef.mpStringArray[i]);
938  pHashBuffer[i] = tmp.hashCode();
939  }
940  else
941  {
942  pHashBuffer[i] = 0;
943  }
944  }
945  }
946  else
947  {
948  if (nStrings == 0)
949  szHostBuffer = sizeof(cl_int); // a dummy small value
950  // Marshal as a buffer of NANs
951  mpClmem = clCreateBuffer(kEnv.mpkContext,
952  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
953  szHostBuffer, nullptr, &err);
954  if (CL_SUCCESS != err)
955  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
956  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
957 
958  pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
959  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
960  szHostBuffer, 0, nullptr, nullptr, &err));
961  if (CL_SUCCESS != err)
962  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
963 
964  for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
965  pHashBuffer[i] = 0;
966  }
967  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
968  pHashBuffer, 0, nullptr, nullptr);
969  if (CL_SUCCESS != err)
970  throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
971 
972  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref,mnIndex) << ")");
973  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
974  if (CL_SUCCESS != err)
975  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
976  return 1;
977 }
978 
979 namespace {
980 
982 class DynamicKernelMixedArgument : public VectorRef
983 {
984 public:
985  DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
986  const FormulaTreeNodeRef& ft ) :
987  VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
988  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
989  {
990  VectorRef::GenSlidingWindowDecl(ss);
991  ss << ", ";
992  mStringArgument.GenSlidingWindowDecl(ss);
993  }
994  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
996  virtual void GenDecl( std::stringstream& ss ) const override
997  {
998  VectorRef::GenDecl(ss);
999  ss << ", ";
1000  mStringArgument.GenDecl(ss);
1001  }
1002  virtual void GenDeclRef( std::stringstream& ss ) const override
1003  {
1004  VectorRef::GenDeclRef(ss);
1005  ss << ",";
1006  mStringArgument.GenDeclRef(ss);
1007  }
1008  virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
1009  {
1010  std::stringstream ss;
1011  ss << "(!isnan(" << VectorRef::GenSlidingWindowDeclRef();
1012  ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
1013  ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1014  ss << ")";
1015  return ss.str();
1016  }
1017  virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
1018  {
1019  std::stringstream ss;
1020  ss << VectorRef::GenSlidingWindowDeclRef();
1021  return ss.str();
1022  }
1023  virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
1024  {
1025  std::stringstream ss;
1026  ss << mStringArgument.GenSlidingWindowDeclRef();
1027  return ss.str();
1028  }
1029  virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
1030  {
1031  int i = VectorRef::Marshal(k, argno, vw, p);
1032  i += mStringArgument.Marshal(k, argno + i, vw, p);
1033  return i;
1034  }
1035 
1036 protected:
1037  DynamicKernelStringArgument mStringArgument;
1038 };
1039 
1043 template<class Base>
1044 class DynamicKernelSlidingArgument : public Base
1045 {
1046 public:
1047  DynamicKernelSlidingArgument(const ScCalcConfig& config, const std::string& s,
1048  const FormulaTreeNodeRef& ft,
1049  const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
1050  : Base(config, s, ft, index)
1051  , mpCodeGen(CodeGen)
1052  {
1053  FormulaToken* t = ft->GetFormulaToken();
1054  if (t->GetType() != formula::svDoubleVectorRef)
1055  throw Unhandled(__FILE__, __LINE__);
1056  mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1059  }
1060 
1061  // Should only be called by SumIfs. Yikes!
1062  virtual bool NeedParallelReduction() const
1063  {
1064  assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
1065  return GetWindowSize() > 100 &&
1066  ((GetStartFixed() && GetEndFixed()) ||
1067  (!GetStartFixed() && !GetEndFixed()));
1068  }
1069 
1070  virtual void GenSlidingWindowFunction( std::stringstream& ) { }
1071 
1072  std::string GenSlidingWindowDeclRef( bool nested = false ) const
1073  {
1074  size_t nArrayLength = mpDVR->GetArrayLength();
1075  std::stringstream ss;
1076  if (!bIsStartFixed && !bIsEndFixed)
1077  {
1078  if (nested)
1079  ss << "((i+gid0) <" << nArrayLength << "?";
1080  ss << Base::GetName() << "[i + gid0]";
1081  if (nested)
1082  ss << ":NAN)";
1083  }
1084  else
1085  {
1086  if (nested)
1087  ss << "(i <" << nArrayLength << "?";
1088  ss << Base::GetName() << "[i]";
1089  if (nested)
1090  ss << ":NAN)";
1091  }
1092  return ss.str();
1093  }
1095  size_t GenReductionLoopHeader(
1096  std::stringstream& ss, bool& needBody )
1097  {
1098  assert(mpDVR);
1099  size_t nCurWindowSize = mpDVR->GetRefRowSize();
1100 
1101  {
1102  if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1103  {
1104  ss << "for (int i = ";
1105  ss << "gid0; i < " << mpDVR->GetArrayLength();
1106  ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
1107  needBody = true;
1108  return nCurWindowSize;
1109  }
1110  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1111  {
1112  ss << "for (int i = ";
1113  ss << "0; i < " << mpDVR->GetArrayLength();
1114  ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
1115  needBody = true;
1116  return nCurWindowSize;
1117  }
1118  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1119  {
1120  ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1121  ss << "{int i;\n\t";
1122  std::stringstream temp1, temp2;
1123  int outLoopSize = UNROLLING_FACTOR;
1124  if (nCurWindowSize / outLoopSize != 0)
1125  {
1126  ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1127  for (int count = 0; count < outLoopSize; count++)
1128  {
1129  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1130  if (count == 0)
1131  {
1132  temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1133  temp1 << "){\n\t\t";
1134  temp1 << "tmp = legalize(";
1135  temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1136  temp1 << ", tmp);\n\t\t\t";
1137  temp1 << "}\n\t";
1138  }
1139  ss << temp1.str();
1140  }
1141  ss << "}\n\t";
1142  }
1143  // The residual of mod outLoopSize
1144  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1145  {
1146  ss << "i = " << count << ";\n\t";
1147  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1148  {
1149  temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1150  temp2 << "){\n\t\t";
1151  temp2 << "tmp = legalize(";
1152  temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1153  temp2 << ", tmp);\n\t\t\t";
1154  temp2 << "}\n\t";
1155  }
1156  ss << temp2.str();
1157  }
1158  ss << "}\n";
1159  needBody = false;
1160  return nCurWindowSize;
1161  }
1162  // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1163  else
1164  {
1165  ss << "\n\t";
1166  ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1167  ss << "{int i;\n\t";
1168  std::stringstream temp1, temp2;
1169  int outLoopSize = UNROLLING_FACTOR;
1170  if (nCurWindowSize / outLoopSize != 0)
1171  {
1172  ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1173  for (int count = 0; count < outLoopSize; count++)
1174  {
1175  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1176  if (count == 0)
1177  {
1178  temp1 << "if(i < " << mpDVR->GetArrayLength();
1179  temp1 << "){\n\t\t";
1180  temp1 << "tmp = legalize(";
1181  temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1182  temp1 << ", tmp);\n\t\t\t";
1183  temp1 << "}\n\t";
1184  }
1185  ss << temp1.str();
1186  }
1187  ss << "}\n\t";
1188  }
1189  // The residual of mod outLoopSize
1190  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1191  {
1192  ss << "i = " << count << ";\n\t";
1193  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1194  {
1195  temp2 << "if(i < " << mpDVR->GetArrayLength();
1196  temp2 << "){\n\t\t";
1197  temp2 << "tmp = legalize(";
1198  temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1199  temp2 << ", tmp);\n\t\t\t";
1200  temp2 << "}\n\t";
1201  }
1202  ss << temp2.str();
1203  }
1204  ss << "}\n";
1205  needBody = false;
1206  return nCurWindowSize;
1207  }
1208  }
1209  }
1210 
1211  size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1212 
1213  size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1214 
1215  bool GetStartFixed() const { return bIsStartFixed; }
1216 
1217  bool GetEndFixed() const { return bIsEndFixed; }
1218 
1219 protected:
1222  // from parent nodes
1223  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
1224 };
1225 
1227 class DynamicKernelMixedSlidingArgument : public VectorRef
1228 {
1229 public:
1230  DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
1231  const FormulaTreeNodeRef& ft, const std::shared_ptr<SlidingFunctionBase>& CodeGen,
1232  int index ) :
1233  VectorRef(config, s, ft),
1234  mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
1235  mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
1236  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
1237  {
1238  mDoubleArgument.GenSlidingWindowDecl(ss);
1239  ss << ", ";
1240  mStringArgument.GenSlidingWindowDecl(ss);
1241  }
1242  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
1244  virtual void GenDecl( std::stringstream& ss ) const override
1245  {
1246  mDoubleArgument.GenDecl(ss);
1247  ss << ", ";
1248  mStringArgument.GenDecl(ss);
1249  }
1250  virtual void GenDeclRef( std::stringstream& ss ) const override
1251  {
1252  mDoubleArgument.GenDeclRef(ss);
1253  ss << ",";
1254  mStringArgument.GenDeclRef(ss);
1255  }
1256  virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
1257  {
1258  std::stringstream ss;
1259  ss << "(!isnan(" << mDoubleArgument.GenSlidingWindowDeclRef();
1260  ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
1261  ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1262  ss << ")";
1263  return ss.str();
1264  }
1265  virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
1266  {
1267  std::stringstream ss;
1268  ss << mDoubleArgument.GenSlidingWindowDeclRef();
1269  return ss.str();
1270  }
1271  virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
1272  {
1273  std::stringstream ss;
1274  ss << mStringArgument.GenSlidingWindowDeclRef();
1275  return ss.str();
1276  }
1277  virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
1278  {
1279  int i = mDoubleArgument.Marshal(k, argno, vw, p);
1280  i += mStringArgument.Marshal(k, argno + i, vw, p);
1281  return i;
1282  }
1283 
1284 protected:
1285  DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
1286  DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
1287 };
1288 
1290 class SymbolTable
1291 {
1292 public:
1293  typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1294  // This avoids instability caused by using pointer as the key type
1295  SymbolTable() : mCurId(0) { }
1296  template <class T>
1297  const DynamicKernelArgument* DeclRefArg(const ScCalcConfig& config, const FormulaTreeNodeRef&,
1298  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize);
1300  void DumpSlidingWindowFunctions( std::stringstream& ss )
1301  {
1302  for (auto const& argument : mParams)
1303  {
1304  argument->GenSlidingWindowFunction(ss);
1305  ss << "\n";
1306  }
1307  }
1310  void Marshal( cl_kernel, int, cl_program );
1311 
1312 private:
1313  unsigned int mCurId;
1314  ArgumentMap mSymbols;
1315  std::vector<DynamicKernelArgumentRef> mParams;
1316 };
1317 
1318 }
1319 
1320 void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
1321 {
1322  int i = 1; //The first argument is reserved for results
1323  for (auto const& argument : mParams)
1324  {
1325  i += argument->Marshal(k, i, nVectorWidth, pProgram);
1326  }
1327 }
1328 
1329 namespace {
1330 
1333 template<class Base>
1334 class ParallelReductionVectorRef : public Base
1335 {
1336 public:
1337  ParallelReductionVectorRef(const ScCalcConfig& config, const std::string& s,
1338  const FormulaTreeNodeRef& ft,
1339  const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
1340  : Base(config, s, ft, index)
1341  , mpCodeGen(CodeGen)
1342  , mpClmem2(nullptr)
1343  {
1344  FormulaToken* t = ft->GetFormulaToken();
1345  if (t->GetType() != formula::svDoubleVectorRef)
1346  throw Unhandled(__FILE__, __LINE__);
1347  mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1350  }
1351 
1353  virtual void GenSlidingWindowFunction( std::stringstream& ss );
1354 
1355  virtual std::string GenSlidingWindowDeclRef( bool ) const
1356  {
1357  std::stringstream ss;
1358  if (!bIsStartFixed && !bIsEndFixed)
1359  ss << Base::GetName() << "[i + gid0]";
1360  else
1361  ss << Base::GetName() << "[i]";
1362  return ss.str();
1363  }
1364 
1366  size_t GenReductionLoopHeader(
1367  std::stringstream& ss, int nResultSize, bool& needBody );
1368 
1369  virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram );
1370 
1371  ~ParallelReductionVectorRef()
1372  {
1373  if (mpClmem2)
1374  {
1375  cl_int err;
1376  err = clReleaseMemObject(mpClmem2);
1377  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
1378  mpClmem2 = nullptr;
1379  }
1380  }
1381 
1382  size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1383 
1384  size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1385 
1386  bool GetStartFixed() const { return bIsStartFixed; }
1387 
1388  bool GetEndFixed() const { return bIsEndFixed; }
1389 
1390 protected:
1391  bool bIsStartFixed, bIsEndFixed;
1393  // from parent nodes
1394  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
1395  // controls whether to invoke the reduction kernel during marshaling or not
1396  cl_mem mpClmem2;
1397 };
1398 
1399 class Reduction : public SlidingFunctionBase
1400 {
1402 public:
1403  explicit Reduction(int nResultSize) : mnResultSize(nResultSize) {}
1404 
1405  typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1406  typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1407  typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1408 
1409  virtual bool HandleNaNArgument( std::stringstream&, unsigned, SubArguments& ) const
1410  {
1411  return false;
1412  }
1413 
1414  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1415  const std::string& sSymName, SubArguments& vSubArguments ) override
1416  {
1417  ss << "\ndouble " << sSymName;
1418  ss << "_" << BinFuncName() << "(";
1419  for (size_t i = 0; i < vSubArguments.size(); i++)
1420  {
1421  if (i)
1422  ss << ", ";
1423  vSubArguments[i]->GenSlidingWindowDecl(ss);
1424  }
1425  ss << ") {\n";
1426  ss << "double tmp = " << GetBottom() << ";\n";
1427  ss << "int gid0 = get_global_id(0);\n";
1428  if (isAverage() || isMinOrMax())
1429  ss << "int nCount = 0;\n";
1430  ss << "double tmpBottom;\n";
1431  unsigned i = vSubArguments.size();
1432  while (i--)
1433  {
1434  if (NumericRange* NR =
1435  dynamic_cast<NumericRange*>(vSubArguments[i].get()))
1436  {
1437  bool needBody;
1438  NR->GenReductionLoopHeader(ss, needBody);
1439  if (!needBody)
1440  continue;
1441  }
1442  else if (ParallelNumericRange* PNR =
1443  dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
1444  {
1445  //did not handle yet
1446  bool bNeedBody = false;
1447  PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
1448  if (!bNeedBody)
1449  continue;
1450  }
1451  else if (StringRange* SR =
1452  dynamic_cast<StringRange*>(vSubArguments[i].get()))
1453  {
1454  //did not handle yet
1455  bool needBody;
1456  SR->GenReductionLoopHeader(ss, needBody);
1457  if (!needBody)
1458  continue;
1459  }
1460  else
1461  {
1462  FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1463  assert(pCur);
1464  assert(pCur->GetType() != formula::svDoubleVectorRef);
1465 
1466  if (pCur->GetType() == formula::svSingleVectorRef ||
1467  pCur->GetType() == formula::svDouble)
1468  {
1469  ss << "{\n";
1470  }
1471  }
1472  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1473  {
1474  bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
1475 
1476  ss << "tmpBottom = " << GetBottom() << ";\n";
1477 
1478  if (!bNanHandled)
1479  {
1480  ss << "if (isnan(";
1481  ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1482  ss << "))\n";
1483  if (ZeroReturnZero())
1484  ss << " return 0;\n";
1485  else
1486  {
1487  ss << " tmp = ";
1488  ss << Gen2("tmpBottom", "tmp") << ";\n";
1489  }
1490  ss << "else\n";
1491  }
1492  ss << "{";
1493  ss << " tmp = ";
1494  ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1495  ss << ";\n";
1496  ss << " }\n";
1497  ss << "}\n";
1498  }
1499  else
1500  {
1501  ss << "tmp = ";
1502  ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1503  ss << ";\n";
1504  }
1505  }
1506  if (isAverage())
1507  ss <<
1508  "if (nCount==0)\n"
1509  " return CreateDoubleError(DivisionByZero);\n";
1510  else if (isMinOrMax())
1511  ss <<
1512  "if (nCount==0)\n"
1513  " return 0;\n";
1514  ss << "return tmp";
1515  if (isAverage())
1516  ss << "*pow((double)nCount,-1.0)";
1517  ss << ";\n}";
1518  }
1519  virtual bool isAverage() const { return false; }
1520  virtual bool isMinOrMax() const { return false; }
1521  virtual bool takeString() const override { return false; }
1522  virtual bool takeNumeric() const override { return true; }
1523 };
1524 
1525 // Strictly binary operators
1526 class Binary : public SlidingFunctionBase
1527 {
1528 public:
1529  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1530  const std::string& sSymName, SubArguments& vSubArguments ) override
1531  {
1532  ss << "\ndouble " << sSymName;
1533  ss << "_" << BinFuncName() << "(";
1534  assert(vSubArguments.size() == 2);
1535  for (size_t i = 0; i < vSubArguments.size(); i++)
1536  {
1537  if (i)
1538  ss << ", ";
1539  vSubArguments[i]->GenSlidingWindowDecl(ss);
1540  }
1541  ss << ") {\n\t";
1542  ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1543  ss << "double tmp = ";
1544  ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(),
1545  vSubArguments[1]->GenSlidingWindowDeclRef()) << ";\n\t";
1546  ss << "return tmp;\n}";
1547  }
1548  virtual bool takeString() const override { return true; }
1549  virtual bool takeNumeric() const override { return true; }
1550 };
1551 
1552 class SumOfProduct : public SlidingFunctionBase
1553 {
1554 public:
1555  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1556  const std::string& sSymName, SubArguments& vSubArguments ) override
1557  {
1558  size_t nCurWindowSize = 0;
1559  FormulaToken* tmpCur = nullptr;
1560  const formula::DoubleVectorRefToken* pCurDVR = nullptr;
1561  ss << "\ndouble " << sSymName;
1562  ss << "_" << BinFuncName() << "(";
1563  for (size_t i = 0; i < vSubArguments.size(); i++)
1564  {
1565  if (i)
1566  ss << ",";
1567  vSubArguments[i]->GenSlidingWindowDecl(ss);
1568  size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1569  nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1570  nCurChildWindowSize : nCurWindowSize;
1571  tmpCur = vSubArguments[i]->GetFormulaToken();
1572  if (ocPush == tmpCur->GetOpCode())
1573  {
1574 
1575  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1576  if (pCurDVR->IsStartFixed() != pCurDVR->IsEndFixed())
1577  throw Unhandled(__FILE__, __LINE__);
1578  }
1579  }
1580  ss << ") {\n";
1581  ss << " double tmp = 0.0;\n";
1582  ss << " int gid0 = get_global_id(0);\n";
1583 
1584  ss << "\tint i;\n\t";
1585  ss << "int currentCount0;\n";
1586  for (size_t i = 0; i < vSubArguments.size() - 1; i++)
1587  ss << "int currentCount" << i + 1 << ";\n";
1588  std::stringstream temp3, temp4;
1589  int outLoopSize = UNROLLING_FACTOR;
1590  if (nCurWindowSize / outLoopSize != 0)
1591  {
1592  ss << "for(int outLoop=0; outLoop<" <<
1593  nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1594  for (int count = 0; count < outLoopSize; count++)
1595  {
1596  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
1597  if (count == 0)
1598  {
1599  for (size_t i = 0; i < vSubArguments.size(); i++)
1600  {
1601  tmpCur = vSubArguments[i]->GetFormulaToken();
1602  if (ocPush == tmpCur->GetOpCode())
1603  {
1604  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1605  if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1606  {
1607  temp3 << " currentCount";
1608  temp3 << i;
1609  temp3 << " =i+gid0+1;\n";
1610  }
1611  else
1612  {
1613  temp3 << " currentCount";
1614  temp3 << i;
1615  temp3 << " =i+1;\n";
1616  }
1617  }
1618  }
1619 
1620  temp3 << "tmp = fsum(";
1621  for (size_t i = 0; i < vSubArguments.size(); i++)
1622  {
1623  if (i)
1624  temp3 << "*";
1625  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1626  {
1627  temp3 << "(";
1628  temp3 << "(currentCount";
1629  temp3 << i;
1630  temp3 << ">";
1631  if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1633  {
1634  const formula::SingleVectorRefToken* pSVR =
1635  static_cast<const formula::SingleVectorRefToken*>
1636  (vSubArguments[i]->GetFormulaToken());
1637  temp3 << pSVR->GetArrayLength();
1638  temp3 << ")||isnan(" << vSubArguments[i]
1639  ->GenSlidingWindowDeclRef();
1640  temp3 << ")?0:";
1641  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1642  temp3 << ")";
1643  }
1644  else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1646  {
1647  const formula::DoubleVectorRefToken* pSVR =
1648  static_cast<const formula::DoubleVectorRefToken*>
1649  (vSubArguments[i]->GetFormulaToken());
1650  temp3 << pSVR->GetArrayLength();
1651  temp3 << ")||isnan(" << vSubArguments[i]
1652  ->GenSlidingWindowDeclRef(true);
1653  temp3 << ")?0:";
1654  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1655  temp3 << ")";
1656  }
1657 
1658  }
1659  else
1660  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1661  }
1662  temp3 << ", tmp);\n\t";
1663  }
1664  ss << temp3.str();
1665  }
1666  ss << "}\n\t";
1667  }
1668  //The residual of mod outLoopSize
1669  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize;
1670  count < nCurWindowSize; count++)
1671  {
1672  ss << "i =" << count << ";\n";
1673  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1674  {
1675  for (size_t i = 0; i < vSubArguments.size(); i++)
1676  {
1677  tmpCur = vSubArguments[i]->GetFormulaToken();
1678  if (ocPush == tmpCur->GetOpCode())
1679  {
1680  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1681  if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1682  {
1683  temp4 << " currentCount";
1684  temp4 << i;
1685  temp4 << " =i+gid0+1;\n";
1686  }
1687  else
1688  {
1689  temp4 << " currentCount";
1690  temp4 << i;
1691  temp4 << " =i+1;\n";
1692  }
1693  }
1694  }
1695 
1696  temp4 << "tmp = fsum(";
1697  for (size_t i = 0; i < vSubArguments.size(); i++)
1698  {
1699  if (i)
1700  temp4 << "*";
1701  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1702  {
1703  temp4 << "(";
1704  temp4 << "(currentCount";
1705  temp4 << i;
1706  temp4 << ">";
1707  if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1709  {
1710  const formula::SingleVectorRefToken* pSVR =
1711  static_cast<const formula::SingleVectorRefToken*>
1712  (vSubArguments[i]->GetFormulaToken());
1713  temp4 << pSVR->GetArrayLength();
1714  temp4 << ")||isnan(" << vSubArguments[i]
1715  ->GenSlidingWindowDeclRef();
1716  temp4 << ")?0:";
1717  temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
1718  temp4 << ")";
1719  }
1720  else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1722  {
1723  const formula::DoubleVectorRefToken* pSVR =
1724  static_cast<const formula::DoubleVectorRefToken*>
1725  (vSubArguments[i]->GetFormulaToken());
1726  temp4 << pSVR->GetArrayLength();
1727  temp4 << ")||isnan(" << vSubArguments[i]
1728  ->GenSlidingWindowDeclRef(true);
1729  temp4 << ")?0:";
1730  temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1731  temp4 << ")";
1732  }
1733 
1734  }
1735  else
1736  {
1737  temp4 << vSubArguments[i]
1738  ->GenSlidingWindowDeclRef(true);
1739  }
1740  }
1741  temp4 << ", tmp);\n\t";
1742  }
1743  ss << temp4.str();
1744  }
1745  ss << "return tmp;\n";
1746  ss << "}";
1747  }
1748  virtual bool takeString() const override { return false; }
1749  virtual bool takeNumeric() const override { return true; }
1750 };
1751 
1753 class OpNop : public Reduction
1754 {
1755 public:
1756  explicit OpNop(int nResultSize) : Reduction(nResultSize) {}
1757 
1758  virtual std::string GetBottom() override { return "0"; }
1759  virtual std::string Gen2( const std::string& lhs, const std::string& ) const override
1760  {
1761  return lhs;
1762  }
1763  virtual std::string BinFuncName() const override { return "nop"; }
1764 };
1765 
1766 class OpCount : public Reduction
1767 {
1768 public:
1769  explicit OpCount(int nResultSize) : Reduction(nResultSize) {}
1770 
1771  virtual std::string GetBottom() override { return "0"; }
1772  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1773  {
1774  std::stringstream ss;
1775  ss << "(isnan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
1776  return ss.str();
1777  }
1778  virtual std::string BinFuncName() const override { return "fcount"; }
1779  virtual bool canHandleMultiVector() const override { return true; }
1780 };
1781 
1782 class OpEqual : public Binary
1783 {
1784 public:
1785  virtual std::string GetBottom() override { return "0"; }
1786  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1787  {
1788  std::stringstream ss;
1789  ss << "strequal(" << lhs << "," << rhs << ")";
1790  return ss.str();
1791  }
1792  virtual std::string BinFuncName() const override { return "eq"; }
1793 };
1794 
1795 class OpLessEqual : public Binary
1796 {
1797 public:
1798  virtual std::string GetBottom() override { return "0"; }
1799  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1800  {
1801  std::stringstream ss;
1802  ss << "(" << lhs << "<=" << rhs << ")";
1803  return ss.str();
1804  }
1805  virtual std::string BinFuncName() const override { return "leq"; }
1806 };
1807 
1808 class OpLess : public Binary
1809 {
1810 public:
1811  virtual std::string GetBottom() override { return "0"; }
1812  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1813  {
1814  std::stringstream ss;
1815  ss << "(" << lhs << "<" << rhs << ")";
1816  return ss.str();
1817  }
1818  virtual std::string BinFuncName() const override { return "less"; }
1819 };
1820 
1821 class OpGreater : public Binary
1822 {
1823 public:
1824  virtual std::string GetBottom() override { return "0"; }
1825  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1826  {
1827  std::stringstream ss;
1828  ss << "(" << lhs << ">" << rhs << ")";
1829  return ss.str();
1830  }
1831  virtual std::string BinFuncName() const override { return "gt"; }
1832 };
1833 
1834 class OpSum : public Reduction
1835 {
1836 public:
1837  explicit OpSum(int nResultSize) : Reduction(nResultSize) {}
1838 
1839  virtual std::string GetBottom() override { return "0"; }
1840  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1841  {
1842  std::stringstream ss;
1843  ss << "fsum_approx((" << lhs << "),(" << rhs << "))";
1844  return ss.str();
1845  }
1846  virtual std::string BinFuncName() const override { return "fsum"; }
1847  // All arguments are simply summed, so it doesn't matter if SvDoubleVector is split.
1848  virtual bool canHandleMultiVector() const override { return true; }
1849 };
1850 
1851 class OpAverage : public Reduction
1852 {
1853 public:
1854  explicit OpAverage(int nResultSize) : Reduction(nResultSize) {}
1855 
1856  virtual std::string GetBottom() override { return "0"; }
1857  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1858  {
1859  std::stringstream ss;
1860  ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
1861  return ss.str();
1862  }
1863  virtual std::string BinFuncName() const override { return "average"; }
1864  virtual bool isAverage() const override { return true; }
1865  virtual bool canHandleMultiVector() const override { return true; }
1866 };
1867 
1868 class OpSub : public Reduction
1869 {
1870 public:
1871  explicit OpSub(int nResultSize) : Reduction(nResultSize) {}
1872 
1873  virtual std::string GetBottom() override { return "0"; }
1874  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1875  {
1876  return "fsub_approx(" + lhs + "," + rhs + ")";
1877  }
1878  virtual std::string BinFuncName() const override { return "fsub"; }
1879 };
1880 
1881 class OpMul : public Reduction
1882 {
1883 public:
1884  explicit OpMul(int nResultSize) : Reduction(nResultSize) {}
1885 
1886  virtual std::string GetBottom() override { return "1"; }
1887  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1888  {
1889  return lhs + "*" + rhs;
1890  }
1891  virtual std::string BinFuncName() const override { return "fmul"; }
1892  virtual bool ZeroReturnZero() override { return true; }
1893 };
1894 
1896 class OpDiv : public Reduction
1897 {
1898 public:
1899  explicit OpDiv(int nResultSize) : Reduction(nResultSize) {}
1900 
1901  virtual std::string GetBottom() override { return "1.0"; }
1902  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1903  {
1904  return "(" + rhs + "==0 ? CreateDoubleError(DivisionByZero) : (" + lhs + "/" + rhs + ") )";
1905  }
1906  virtual std::string BinFuncName() const override { return "fdiv"; }
1907 
1908  virtual bool HandleNaNArgument( std::stringstream& ss, unsigned argno, SubArguments& vSubArguments ) const override
1909  {
1910  if (argno == 1)
1911  {
1912  ss <<
1913  "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
1914  " return CreateDoubleError(DivisionByZero);\n"
1915  "}\n";
1916  return true;
1917  }
1918  else if (argno == 0)
1919  {
1920  ss <<
1921  "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") &&\n"
1922  " !(isnan(" << vSubArguments[1]->GenSlidingWindowDeclRef() << ") || " << vSubArguments[1]->GenSlidingWindowDeclRef() << " == 0)) {\n"
1923  " return 0;\n"
1924  "}\n";
1925  }
1926  return false;
1927  }
1928 
1929 };
1930 
1931 class OpMin : public Reduction
1932 {
1933 public:
1934  explicit OpMin(int nResultSize) : Reduction(nResultSize) {}
1935 
1936  virtual std::string GetBottom() override { return "NAN"; }
1937  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1938  {
1939  return "fmin_count(" + lhs + "," + rhs + ", &nCount)";
1940  }
1941  virtual std::string BinFuncName() const override { return "min"; }
1942  virtual bool isMinOrMax() const override { return true; }
1943  virtual bool canHandleMultiVector() const override { return true; }
1944 };
1945 
1946 class OpMax : public Reduction
1947 {
1948 public:
1949  explicit OpMax(int nResultSize) : Reduction(nResultSize) {}
1950 
1951  virtual std::string GetBottom() override { return "NAN"; }
1952  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1953  {
1954  return "fmax_count(" + lhs + "," + rhs + ", &nCount)";
1955  }
1956  virtual std::string BinFuncName() const override { return "max"; }
1957  virtual bool isMinOrMax() const override { return true; }
1958  virtual bool canHandleMultiVector() const override { return true; }
1959 };
1960 
1961 class OpSumProduct : public SumOfProduct
1962 {
1963 public:
1964  virtual std::string GetBottom() override { return "0"; }
1965  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1966  {
1967  return lhs + "*" + rhs;
1968  }
1969  virtual std::string BinFuncName() const override { return "fsop"; }
1970 };
1971 
1972 template<class Base>
1973 void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
1974 {
1975  if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1976  {
1977  std::string name = Base::GetName();
1978  ss << "__kernel void " << name;
1979  ss << "_reduction(__global double* A, "
1980  "__global double *result,int arrayLength,int windowSize){\n";
1981  ss << " double tmp, current_result =" <<
1982  mpCodeGen->GetBottom();
1983  ss << ";\n";
1984  ss << " int writePos = get_group_id(1);\n";
1985  ss << " int lidx = get_local_id(0);\n";
1986  ss << " __local double shm_buf[256];\n";
1987  if (mpDVR->IsStartFixed())
1988  ss << " int offset = 0;\n";
1989  else // if (!mpDVR->IsStartFixed())
1990  ss << " int offset = get_group_id(1);\n";
1991  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1992  ss << " int end = windowSize;\n";
1993  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1994  ss << " int end = offset + windowSize;\n";
1995  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1996  ss << " int end = windowSize + get_group_id(1);\n";
1997  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1998  ss << " int end = windowSize;\n";
1999  ss << " end = min(end, arrayLength);\n";
2000 
2001  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2002  ss << " int loop = arrayLength/512 + 1;\n";
2003  ss << " for (int l=0; l<loop; l++){\n";
2004  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2005  ss << " int loopOffset = l*512;\n";
2006  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2007  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2008  "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
2009  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2010  "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
2011  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2012  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2013  "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
2014  ss << " shm_buf[lidx] = tmp;\n";
2015  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2016  ss << " for (int i = 128; i >0; i/=2) {\n";
2017  ss << " if (lidx < i)\n";
2018  ss << " shm_buf[lidx] = ";
2019  // Special case count
2020  if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2021  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2022  else
2023  ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
2024  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2025  ss << " }\n";
2026  ss << " if (lidx == 0)\n";
2027  ss << " current_result =";
2028  if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2029  ss << "current_result + shm_buf[0]";
2030  else
2031  ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
2032  ss << ";\n";
2033  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2034  ss << " }\n";
2035  ss << " if (lidx == 0)\n";
2036  ss << " result[writePos] = current_result;\n";
2037  ss << "}\n";
2038  }
2039  else
2040  {
2041  std::string name = Base::GetName();
2042  /*sum reduction*/
2043  ss << "__kernel void " << name << "_sum";
2044  ss << "_reduction(__global double* A, "
2045  "__global double *result,int arrayLength,int windowSize){\n";
2046  ss << " double tmp, current_result =" <<
2047  mpCodeGen->GetBottom();
2048  ss << ";\n";
2049  ss << " int writePos = get_group_id(1);\n";
2050  ss << " int lidx = get_local_id(0);\n";
2051  ss << " __local double shm_buf[256];\n";
2052  if (mpDVR->IsStartFixed())
2053  ss << " int offset = 0;\n";
2054  else // if (!mpDVR->IsStartFixed())
2055  ss << " int offset = get_group_id(1);\n";
2056  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2057  ss << " int end = windowSize;\n";
2058  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2059  ss << " int end = offset + windowSize;\n";
2060  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2061  ss << " int end = windowSize + get_group_id(1);\n";
2062  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2063  ss << " int end = windowSize;\n";
2064  ss << " end = min(end, arrayLength);\n";
2065  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2066  ss << " int loop = arrayLength/512 + 1;\n";
2067  ss << " for (int l=0; l<loop; l++){\n";
2068  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2069  ss << " int loopOffset = l*512;\n";
2070  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2071  ss << " tmp = legalize(";
2072  ss << "(A[loopOffset + lidx + offset]+ tmp)";
2073  ss << ", tmp);\n";
2074  ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
2075  ss << ", tmp);\n";
2076  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2077  ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
2078  ss << ", tmp);\n";
2079  ss << " shm_buf[lidx] = tmp;\n";
2080  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2081  ss << " for (int i = 128; i >0; i/=2) {\n";
2082  ss << " if (lidx < i)\n";
2083  ss << " shm_buf[lidx] = ";
2084  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2085  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2086  ss << " }\n";
2087  ss << " if (lidx == 0)\n";
2088  ss << " current_result =";
2089  ss << "current_result + shm_buf[0]";
2090  ss << ";\n";
2091  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2092  ss << " }\n";
2093  ss << " if (lidx == 0)\n";
2094  ss << " result[writePos] = current_result;\n";
2095  ss << "}\n";
2096  /*count reduction*/
2097  ss << "__kernel void " << name << "_count";
2098  ss << "_reduction(__global double* A, "
2099  "__global double *result,int arrayLength,int windowSize){\n";
2100  ss << " double tmp, current_result =" <<
2101  mpCodeGen->GetBottom();
2102  ss << ";\n";
2103  ss << " int writePos = get_group_id(1);\n";
2104  ss << " int lidx = get_local_id(0);\n";
2105  ss << " __local double shm_buf[256];\n";
2106  if (mpDVR->IsStartFixed())
2107  ss << " int offset = 0;\n";
2108  else // if (!mpDVR->IsStartFixed())
2109  ss << " int offset = get_group_id(1);\n";
2110  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2111  ss << " int end = windowSize;\n";
2112  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2113  ss << " int end = offset + windowSize;\n";
2114  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2115  ss << " int end = windowSize + get_group_id(1);\n";
2116  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2117  ss << " int end = windowSize;\n";
2118  ss << " end = min(end, arrayLength);\n";
2119  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2120  ss << " int loop = arrayLength/512 + 1;\n";
2121  ss << " for (int l=0; l<loop; l++){\n";
2122  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2123  ss << " int loopOffset = l*512;\n";
2124  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2125  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2126  ss << ", tmp);\n";
2127  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
2128  ss << ", tmp);\n";
2129  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2130  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2131  ss << ", tmp);\n";
2132  ss << " shm_buf[lidx] = tmp;\n";
2133  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2134  ss << " for (int i = 128; i >0; i/=2) {\n";
2135  ss << " if (lidx < i)\n";
2136  ss << " shm_buf[lidx] = ";
2137  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2138  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2139  ss << " }\n";
2140  ss << " if (lidx == 0)\n";
2141  ss << " current_result =";
2142  ss << "current_result + shm_buf[0];";
2143  ss << ";\n";
2144  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2145  ss << " }\n";
2146  ss << " if (lidx == 0)\n";
2147  ss << " result[writePos] = current_result;\n";
2148  ss << "}\n";
2149  }
2150 
2151 }
2152 
2153 template<class Base>
2154 size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
2155  std::stringstream& ss, int nResultSize, bool& needBody )
2156 {
2157  assert(mpDVR);
2158  size_t nCurWindowSize = mpDVR->GetRefRowSize();
2159  std::string temp = Base::GetName() + "[gid0]";
2160  ss << "tmp = ";
2161  // Special case count
2162  if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
2163  {
2164  ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
2165  ss << "nCount = nCount-1;\n";
2166  ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
2167  ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
2168  }
2169  else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2170  ss << temp << "+ tmp";
2171  else
2172  ss << mpCodeGen->Gen2(temp, "tmp");
2173  ss << ";\n\t";
2174  needBody = false;
2175  return nCurWindowSize;
2176 }
2177 
2178 template<class Base>
2179 size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
2180 {
2181  assert(Base::mpClmem == nullptr);
2182 
2183  OpenCLZone zone;
2186  cl_int err;
2187  size_t nInput = mpDVR->GetArrayLength();
2188  size_t nCurWindowSize = mpDVR->GetRefRowSize();
2189  // create clmem buffer
2190  if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
2191  throw Unhandled(__FILE__, __LINE__);
2192  double* pHostBuffer = const_cast<double*>(
2193  mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
2194  size_t szHostBuffer = nInput * sizeof(double);
2195  Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
2196  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
2197  szHostBuffer,
2198  pHostBuffer, &err);
2199  SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
2200 
2201  mpClmem2 = clCreateBuffer(kEnv.mpkContext,
2202  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
2203  sizeof(double) * w, nullptr, nullptr);
2204  if (CL_SUCCESS != err)
2205  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2206  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
2207 
2208  // reproduce the reduction function name
2209  std::string kernelName;
2210  if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
2211  kernelName = Base::GetName() + "_reduction";
2212  else
2213  kernelName = Base::GetName() + "_sum_reduction";
2214  cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2215  if (err != CL_SUCCESS)
2216  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2217  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
2218 
2219  // set kernel arg of reduction kernel
2220  // TODO(Wei Wei): use unique name for kernel
2221  cl_mem buf = Base::GetCLBuffer();
2222  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
2223  err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
2224  static_cast<void*>(&buf));
2225  if (CL_SUCCESS != err)
2226  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2227 
2228  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
2229  err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
2230  if (CL_SUCCESS != err)
2231  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2232 
2233  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
2234  err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
2235  if (CL_SUCCESS != err)
2236  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2237 
2238  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
2239  err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2240  if (CL_SUCCESS != err)
2241  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2242 
2243  // set work group size and execute
2244  size_t global_work_size[] = { 256, static_cast<size_t>(w) };
2245  size_t const local_work_size[] = { 256, 1 };
2246  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2247  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2248  global_work_size, local_work_size, 0, nullptr, nullptr);
2249  if (CL_SUCCESS != err)
2250  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2251  err = clFinish(kEnv.mpkCmdQueue);
2252  if (CL_SUCCESS != err)
2253  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2254  if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
2255  {
2256  /*average need more reduction kernel for count computing*/
2257  std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
2258  double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
2259  mpClmem2,
2260  CL_TRUE, CL_MAP_READ, 0,
2261  sizeof(double) * w, 0, nullptr, nullptr,
2262  &err));
2263  if (err != CL_SUCCESS)
2264  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
2265 
2266  for (int i = 0; i < w; i++)
2267  pAllBuffer[i] = resbuf[i];
2268  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
2269  if (err != CL_SUCCESS)
2270  throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
2271 
2272  kernelName = Base::GetName() + "_count_reduction";
2273  redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2274  if (err != CL_SUCCESS)
2275  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2276  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
2277 
2278  // set kernel arg of reduction kernel
2279  buf = Base::GetCLBuffer();
2280  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
2281  err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
2282  static_cast<void*>(&buf));
2283  if (CL_SUCCESS != err)
2284  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2285 
2286  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
2287  err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
2288  if (CL_SUCCESS != err)
2289  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2290 
2291  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
2292  err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
2293  if (CL_SUCCESS != err)
2294  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2295 
2296  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
2297  err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2298  if (CL_SUCCESS != err)
2299  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2300 
2301  // set work group size and execute
2302  size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
2303  size_t const local_work_size1[] = { 256, 1 };
2304  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2305  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2306  global_work_size1, local_work_size1, 0, nullptr, nullptr);
2307  if (CL_SUCCESS != err)
2308  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2309  err = clFinish(kEnv.mpkCmdQueue);
2310  if (CL_SUCCESS != err)
2311  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2312  resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
2313  mpClmem2,
2314  CL_TRUE, CL_MAP_READ, 0,
2315  sizeof(double) * w, 0, nullptr, nullptr,
2316  &err));
2317  if (err != CL_SUCCESS)
2318  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
2319  for (int i = 0; i < w; i++)
2320  pAllBuffer[i + w] = resbuf[i];
2321  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
2322  // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
2323  if (CL_SUCCESS != err)
2324  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
2325  if (mpClmem2)
2326  {
2327  err = clReleaseMemObject(mpClmem2);
2328  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
2329  mpClmem2 = nullptr;
2330  }
2331  mpClmem2 = clCreateBuffer(kEnv.mpkContext,
2332  cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
2333  w * sizeof(double) * 2, pAllBuffer.get(), &err);
2334  if (CL_SUCCESS != err)
2335  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2336  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
2337  }
2338  // set kernel arg
2339  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2340  err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
2341  if (CL_SUCCESS != err)
2342  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2343  return 1;
2344 }
2345 
2346 struct SumIfsArgs
2347 {
2348  explicit SumIfsArgs(cl_mem x) : mCLMem(x), mConst(0.0) { }
2349  explicit SumIfsArgs(double x) : mCLMem(nullptr), mConst(x) { }
2350  cl_mem mCLMem;
2351  double mConst;
2352 };
2353 
2355 class DynamicKernelSoPArguments : public DynamicKernelArgument
2356 {
2357 public:
2358  typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
2359 
2360  DynamicKernelSoPArguments( const ScCalcConfig& config,
2361  const std::string& s, const FormulaTreeNodeRef& ft,
2362  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize );
2363 
2365  virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) override
2366  {
2367  OpenCLZone zone;
2368  unsigned i = 0;
2369  for (const auto& rxSubArgument : mvSubArguments)
2370  {
2371  i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram);
2372  }
2373  if (dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
2374  {
2377  cl_int err;
2378  cl_mem pClmem2;
2379 
2380  std::vector<cl_mem> vclmem;
2381  for (const auto& rxSubArgument : mvSubArguments)
2382  {
2383  if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2384  vclmem.push_back(VR->GetCLBuffer());
2385  else
2386  vclmem.push_back(nullptr);
2387  }
2388  pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2389  sizeof(double) * nVectorWidth, nullptr, &err);
2390  if (CL_SUCCESS != err)
2391  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2392  SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2393 
2394  std::string kernelName = "GeoMean_reduction";
2395  cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2396  if (err != CL_SUCCESS)
2397  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2398  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2399 
2400  // set kernel arg of reduction kernel
2401  for (size_t j = 0; j < vclmem.size(); j++)
2402  {
2403  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
2404  err = clSetKernelArg(redKernel, j,
2405  vclmem[j] ? sizeof(cl_mem) : sizeof(double),
2406  static_cast<void*>(&vclmem[j]));
2407  if (CL_SUCCESS != err)
2408  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2409  }
2410  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
2411  err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&pClmem2));
2412  if (CL_SUCCESS != err)
2413  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2414 
2415  // set work group size and execute
2416  size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
2417  size_t const local_work_size[] = { 256, 1 };
2418  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2419  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2420  global_work_size, local_work_size, 0, nullptr, nullptr);
2421  if (CL_SUCCESS != err)
2422  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2423  err = clFinish(kEnv.mpkCmdQueue);
2424  if (CL_SUCCESS != err)
2425  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2426 
2427  // Pass pClmem2 to the "real" kernel
2428  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
2429  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&pClmem2));
2430  if (CL_SUCCESS != err)
2431  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2432  }
2433  if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
2434  {
2437  cl_int err;
2438  DynamicKernelArgument* Arg = mvSubArguments[0].get();
2439  DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
2440  static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
2441  mpClmem2 = nullptr;
2442 
2443  if (OpSumCodeGen->NeedReductionKernel())
2444  {
2445  size_t nInput = slidingArgPtr->GetArrayLength();
2446  size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
2447  std::vector<SumIfsArgs> vclmem;
2448 
2449  for (const auto& rxSubArgument : mvSubArguments)
2450  {
2451  if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2452  vclmem.emplace_back(VR->GetCLBuffer());
2453  else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(rxSubArgument.get()))
2454  vclmem.emplace_back(CA->GetDouble());
2455  else
2456  vclmem.emplace_back(nullptr);
2457  }
2458  mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2459  sizeof(double) * nVectorWidth, nullptr, &err);
2460  if (CL_SUCCESS != err)
2461  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2462  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2463 
2464  std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
2465  cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2466  if (err != CL_SUCCESS)
2467  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2468  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2469 
2470  // set kernel arg of reduction kernel
2471  for (size_t j = 0; j < vclmem.size(); j++)
2472  {
2473  if (vclmem[j].mCLMem)
2474  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
2475  else
2476  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
2477  err = clSetKernelArg(redKernel, j,
2478  vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
2479  vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
2480  static_cast<void*>(&vclmem[j].mConst));
2481  if (CL_SUCCESS != err)
2482  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2483  }
2484  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
2485  err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2486  if (CL_SUCCESS != err)
2487  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2488 
2489  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
2490  err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), static_cast<void*>(&nInput));
2491  if (CL_SUCCESS != err)
2492  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2493 
2494  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
2495  err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2496  if (CL_SUCCESS != err)
2497  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2498  // set work group size and execute
2499  size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
2500  size_t const local_work_size[] = { 256, 1 };
2501  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2502  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2503  global_work_size, local_work_size, 0, nullptr, nullptr);
2504  if (CL_SUCCESS != err)
2505  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2506 
2507  err = clFinish(kEnv.mpkCmdQueue);
2508  if (CL_SUCCESS != err)
2509  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2510 
2511  SAL_INFO("sc.opencl", "Releasing kernel " << redKernel);
2512  err = clReleaseKernel(redKernel);
2513  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
2514 
2515  // Pass mpClmem2 to the "real" kernel
2516  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2517  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2518  if (CL_SUCCESS != err)
2519  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2520  }
2521  }
2522  return i;
2523  }
2524 
2525  virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
2526  {
2527  for (DynamicKernelArgumentRef & rArg : mvSubArguments)
2528  rArg->GenSlidingWindowFunction(ss);
2529  mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2530  }
2531  virtual void GenDeclRef( std::stringstream& ss ) const override
2532  {
2533  for (size_t i = 0; i < mvSubArguments.size(); i++)
2534  {
2535  if (i)
2536  ss << ",";
2537  mvSubArguments[i]->GenDeclRef(ss);
2538  }
2539  }
2540  virtual void GenDecl( std::stringstream& ss ) const override
2541  {
2542  for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2543  ++it)
2544  {
2545  if (it != mvSubArguments.begin())
2546  ss << ", ";
2547  (*it)->GenDecl(ss);
2548  }
2549  }
2550 
2551  virtual size_t GetWindowSize() const override
2552  {
2553  size_t nCurWindowSize = 0;
2554  for (const auto & rSubArgument : mvSubArguments)
2555  {
2556  size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
2557  nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2558  nCurChildWindowSize : nCurWindowSize;
2559  }
2560  return nCurWindowSize;
2561  }
2562 
2564  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
2565  {
2566  for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2567  ++it)
2568  {
2569  if (it != mvSubArguments.begin())
2570  ss << ", ";
2571  (*it)->GenSlidingWindowDecl(ss);
2572  }
2573  }
2576  virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const override
2577  {
2578  std::stringstream ss;
2579  if (!nested)
2580  {
2581  ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
2582  for (size_t i = 0; i < mvSubArguments.size(); i++)
2583  {
2584  if (i)
2585  ss << ", ";
2586  mvSubArguments[i]->GenDeclRef(ss);
2587  }
2588  ss << ")";
2589  }
2590  else
2591  {
2592  if (mvSubArguments.size() != 2)
2593  throw Unhandled(__FILE__, __LINE__);
2594  bool bArgument1_NeedNested =
2595  mvSubArguments[0]->GetFormulaToken()->GetType()
2597  bool bArgument2_NeedNested =
2598  mvSubArguments[1]->GetFormulaToken()->GetType()
2600  ss << "(";
2601  ss << mpCodeGen->
2602  Gen2(mvSubArguments[0]
2603  ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2604  mvSubArguments[1]
2605  ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2606  ss << ")";
2607  }
2608  return ss.str();
2609  }
2610  virtual std::string DumpOpName() const override
2611  {
2612  std::string t = "_" + mpCodeGen->BinFuncName();
2613  for (const auto & rSubArgument : mvSubArguments)
2614  t += rSubArgument->DumpOpName();
2615  return t;
2616  }
2617  virtual void DumpInlineFun( std::set<std::string>& decls,
2618  std::set<std::string>& funs ) const override
2619  {
2620  mpCodeGen->BinInlineFun(decls, funs);
2621  for (const auto & rSubArgument : mvSubArguments)
2622  rSubArgument->DumpInlineFun(decls, funs);
2623  }
2624  virtual bool IsEmpty() const override
2625  {
2626  for (const auto & rSubArgument : mvSubArguments)
2627  if( !rSubArgument->IsEmpty())
2628  return false;
2629  return true;
2630  }
2631  virtual ~DynamicKernelSoPArguments() override
2632  {
2633  if (mpClmem2)
2634  {
2635  cl_int err;
2636  err = clReleaseMemObject(mpClmem2);
2637  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
2638  mpClmem2 = nullptr;
2639  }
2640  }
2641 
2642 private:
2643  SubArgumentsType mvSubArguments;
2644  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
2645  cl_mem mpClmem2;
2646 };
2647 
2648 }
2649 
2651  const std::string& ts, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
2652  int nResultSize )
2653 {
2654  return std::make_shared<DynamicKernelSoPArguments>(config, ts, ft, std::move(pCodeGen), nResultSize);
2655 }
2656 
2657 template<class Base>
2658 static std::shared_ptr<DynamicKernelArgument> VectorRefFactory( const ScCalcConfig& config, const std::string& s,
2659  const FormulaTreeNodeRef& ft,
2660  std::shared_ptr<SlidingFunctionBase>& pCodeGen,
2661  int index )
2662 {
2663  //Black lists ineligible classes here ..
2664  // SUMIFS does not perform parallel reduction at DoubleVectorRef level
2665  if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2666  {
2667  // coverity[identical_branches] - only identical if Base happens to be VectorRef
2668  if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
2669  return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(config, s, ft, pCodeGen, index);
2670  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2671  }
2672  // AVERAGE is not supported yet
2673  //Average has been supported by reduction kernel
2674  /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
2675  {
2676  return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2677  }*/
2678  // MUL is not supported yet
2679  else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2680  {
2681  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2682  }
2683  // Sub is not a reduction per se
2684  else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2685  {
2686  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2687  }
2688  // Only child class of Reduction is supported
2689  else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2690  {
2691  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2692  }
2693 
2694  const formula::DoubleVectorRefToken* pDVR =
2695  static_cast<const formula::DoubleVectorRefToken*>(
2696  ft->GetFormulaToken());
2697  // Window being too small to justify a parallel reduction
2698  if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
2699  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2700  if (pDVR->IsStartFixed() == pDVR->IsEndFixed())
2701  return std::make_shared<ParallelReductionVectorRef<Base>>(config, s, ft, pCodeGen, index);
2702  else // Other cases are not supported as well
2703  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2704 }
2705 
2706 DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
2707  const std::string& s, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize ) :
2708  DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(nullptr)
2709 {
2710  size_t nChildren = ft->Children.size();
2711 
2712  for (size_t i = 0; i < nChildren; i++)
2713  {
2714  FormulaTreeNodeRef rChild = ft->Children[i];
2715  if (!rChild)
2716  throw Unhandled(__FILE__, __LINE__);
2717  FormulaToken* pChild = rChild->GetFormulaToken();
2718  if (!pChild)
2719  throw Unhandled(__FILE__, __LINE__);
2720  OpCode opc = pChild->GetOpCode();
2721  std::stringstream tmpname;
2722  tmpname << s << "_" << i;
2723  std::string ts = tmpname.str();
2724  switch (opc)
2725  {
2726  case ocPush:
2727  if (pChild->GetType() == formula::svDoubleVectorRef)
2728  {
2729  const formula::DoubleVectorRefToken* pDVR =
2730  static_cast<const formula::DoubleVectorRefToken*>(pChild);
2731 
2732  // The code below will split one svDoubleVectorRef into one subargument
2733  // for each column of data, and then all these subarguments will be later
2734  // passed to the code generating the function. Most of the code then
2735  // simply treats each subargument as one argument to the function, and thus
2736  // could break in this case.
2737  // As a simple solution, simply prevent this case, unless the code in question
2738  // explicitly claims it will handle this situation properly.
2739  if( pDVR->GetArrays().size() > 1 )
2740  {
2741  if( !pCodeGen->canHandleMultiVector())
2742  throw UnhandledToken(("Function '" + pCodeGen->BinFuncName()
2743  + "' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
2744 
2745  SAL_INFO("sc.opencl", "multi-column DoubleRef");
2746 
2747  }
2748 
2749  // FIXME: The Right Thing to do would be to compare the accumulated kernel
2750  // parameter size against the CL_DEVICE_MAX_PARAMETER_SIZE of the device, but
2751  // let's just do this sanity check for now. The kernel compilation will
2752  // hopefully fail anyway if the size of parameters exceeds the limit and this
2753  // sanity check is just to make us bail out a bit earlier.
2754 
2755  // The number 50 comes from the fact that the minimum size of
2756  // CL_DEVICE_MAX_PARAMETER_SIZE is 256, which for 32-bit code probably means 64
2757  // of them. Round down a bit.
2758 
2759  if (pDVR->GetArrays().size() > 50)
2760  throw UnhandledToken(("Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->GetArrays().size()) + ")").c_str(), __FILE__, __LINE__);
2761 
2762  for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
2763  {
2764  SAL_INFO("sc.opencl", "i=" << i << " j=" << j <<
2765  " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
2766  " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
2767  " allStringsAreNull=" << (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength())?"YES":"NO") <<
2768  " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2769  " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2770 
2771  if (pDVR->GetArrays()[j].mpNumericArray &&
2772  pCodeGen->takeNumeric() &&
2773  pDVR->GetArrays()[j].mpStringArray &&
2774  pCodeGen->takeString())
2775  {
2776  // Function takes numbers or strings, there are both
2777  SAL_INFO("sc.opencl", "Numbers and strings");
2778  mvSubArguments.push_back(
2779  std::make_shared<DynamicKernelMixedSlidingArgument>(mCalcConfig,
2780  ts, ft->Children[i], mpCodeGen, j));
2781  }
2782  else if (pDVR->GetArrays()[j].mpNumericArray &&
2783  pCodeGen->takeNumeric() &&
2784  (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
2785  {
2786  // Function takes numbers, and either there
2787  // are no strings, or there are strings but
2788  // they are to be treated as zero
2789  SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2790  mvSubArguments.push_back(
2791  VectorRefFactory<VectorRef>(mCalcConfig,
2792  ts, ft->Children[i], mpCodeGen, j));
2793  }
2794  else if (pDVR->GetArrays()[j].mpNumericArray == nullptr &&
2795  pCodeGen->takeNumeric() &&
2796  pDVR->GetArrays()[j].mpStringArray &&
2798  {
2799  // Function takes numbers, and there are only
2800  // strings, but they are to be treated as zero
2801  SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2802  mvSubArguments.push_back(
2803  VectorRefFactory<VectorRef>(mCalcConfig,
2804  ts, ft->Children[i], mpCodeGen, j));
2805  }
2806  else if (pDVR->GetArrays()[j].mpStringArray &&
2807  pCodeGen->takeString())
2808  {
2809  // There are strings, and the function takes strings.
2810  SAL_INFO("sc.opencl", "Strings only");
2811  mvSubArguments.push_back(
2813  <DynamicKernelStringArgument>(mCalcConfig,
2814  ts, ft->Children[i], mpCodeGen, j));
2815  }
2816  else if (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) &&
2817  pDVR->GetArrays()[j].mpNumericArray == nullptr)
2818  {
2819  // There are only empty cells. Push as an
2820  // array of NANs
2821  SAL_INFO("sc.opencl", "Only empty cells");
2822  mvSubArguments.push_back(
2823  VectorRefFactory<VectorRef>(mCalcConfig,
2824  ts, ft->Children[i], mpCodeGen, j));
2825  }
2826  else
2827  {
2828  SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2829  throw UnhandledToken(("Unhandled numbers/strings combination for '"
2830  + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2831  }
2832  }
2833  }
2834  else if (pChild->GetType() == formula::svSingleVectorRef)
2835  {
2836  const formula::SingleVectorRefToken* pSVR =
2837  static_cast<const formula::SingleVectorRefToken*>(pChild);
2838 
2839  SAL_INFO("sc.opencl", "i=" << i <<
2840  " mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
2841  " mpStringArray=" << pSVR->GetArray().mpStringArray <<
2842  " allStringsAreNull=" << (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength())?"YES":"NO") <<
2843  " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2844  " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2845 
2846  if (pSVR->GetArray().mpNumericArray &&
2847  pCodeGen->takeNumeric() &&
2848  pSVR->GetArray().mpStringArray &&
2849  pCodeGen->takeString())
2850  {
2851  // Function takes numbers or strings, there are both
2852  SAL_INFO("sc.opencl", "Numbers and strings");
2853  mvSubArguments.push_back(
2854  std::make_shared<DynamicKernelMixedArgument>(mCalcConfig,
2855  ts, ft->Children[i]));
2856  }
2857  else if (pSVR->GetArray().mpNumericArray &&
2858  pCodeGen->takeNumeric() &&
2860  {
2861  // Function takes numbers, and either there
2862  // are no strings, or there are strings but
2863  // they are to be treated as zero
2864  SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2865  mvSubArguments.push_back(
2866  std::make_shared<VectorRef>(mCalcConfig, ts,
2867  ft->Children[i]));
2868  }
2869  else if (pSVR->GetArray().mpNumericArray == nullptr &&
2870  pCodeGen->takeNumeric() &&
2871  pSVR->GetArray().mpStringArray &&
2873  {
2874  // Function takes numbers, and there are only
2875  // strings, but they are to be treated as zero
2876  SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2877  mvSubArguments.push_back(
2878  std::make_shared<VectorRef>(mCalcConfig, ts,
2879  ft->Children[i]));
2880  }
2881  else if (pSVR->GetArray().mpStringArray &&
2882  pCodeGen->takeString())
2883  {
2884  // There are strings, and the function takes strings.
2885  SAL_INFO("sc.opencl", "Strings only");
2886  mvSubArguments.push_back(
2887  std::make_shared<DynamicKernelStringArgument>(mCalcConfig,
2888  ts, ft->Children[i]));
2889  }
2890  else if (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) &&
2891  pSVR->GetArray().mpNumericArray == nullptr)
2892  {
2893  // There are only empty cells. Push as an
2894  // array of NANs
2895  SAL_INFO("sc.opencl", "Only empty cells");
2896  mvSubArguments.push_back(
2897  std::make_shared<VectorRef>(mCalcConfig, ts,
2898  ft->Children[i]));
2899  }
2900  else
2901  {
2902  SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2903  throw UnhandledToken(("Unhandled numbers/strings combination for '"
2904  + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2905  }
2906  }
2907  else if (pChild->GetType() == formula::svDouble)
2908  {
2909  SAL_INFO("sc.opencl", "Constant number case");
2910  mvSubArguments.push_back(
2911  std::make_shared<DynamicKernelConstantArgument>(mCalcConfig, ts,
2912  ft->Children[i]));
2913  }
2914  else if (pChild->GetType() == formula::svString
2915  && pCodeGen->takeString())
2916  {
2917  SAL_INFO("sc.opencl", "Constant string case");
2918  mvSubArguments.push_back(
2919  std::make_shared<ConstStringArgument>(mCalcConfig, ts,
2920  ft->Children[i]));
2921  }
2922  else
2923  {
2924  SAL_INFO("sc.opencl", "Unhandled operand, rejecting for OpenCL");
2925  throw UnhandledToken(("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str(), __FILE__, __LINE__);
2926  }
2927  break;
2928  case ocDiv:
2929  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDiv>(nResultSize), nResultSize));
2930  break;
2931  case ocMul:
2932  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMul>(nResultSize), nResultSize));
2933  break;
2934  case ocSub:
2935  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSub>(nResultSize), nResultSize));
2936  break;
2937  case ocAdd:
2938  case ocSum:
2939  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSum>(nResultSize), nResultSize));
2940  break;
2941  case ocAverage:
2942  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAverage>(nResultSize), nResultSize));
2943  break;
2944  case ocMin:
2945  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMin>(nResultSize), nResultSize));
2946  break;
2947  case ocMax:
2948  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMax>(nResultSize), nResultSize));
2949  break;
2950  case ocCount:
2951  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCount>(nResultSize), nResultSize));
2952  break;
2953  case ocSumProduct:
2954  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSumProduct>(), nResultSize));
2955  break;
2956  case ocIRR:
2957  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIRR>(), nResultSize));
2958  break;
2959  case ocMIRR:
2960  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMIRR>(), nResultSize));
2961  break;
2962  case ocPMT:
2963  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPMT>(), nResultSize));
2964  break;
2965  case ocRate:
2966  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIntrate>(), nResultSize));
2967  break;
2968  case ocRRI:
2969  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRRI>(), nResultSize));
2970  break;
2971  case ocPpmt:
2972  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPPMT>(), nResultSize));
2973  break;
2974  case ocFisher:
2975  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisher>(), nResultSize));
2976  break;
2977  case ocFisherInv:
2978  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisherInv>(), nResultSize));
2979  break;
2980  case ocGamma:
2981  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGamma>(), nResultSize));
2982  break;
2983  case ocSLN:
2984  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSLN>(), nResultSize));
2985  break;
2986  case ocGammaLn:
2987  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGammaLn>(), nResultSize));
2988  break;
2989  case ocGauss:
2990  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGauss>(), nResultSize));
2991  break;
2992  /*case ocGeoMean:
2993  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGeoMean));
2994  break;*/
2995  case ocHarMean:
2996  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpHarMean>(), nResultSize));
2997  break;
2998  case ocLessEqual:
2999  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLessEqual>(), nResultSize));
3000  break;
3001  case ocLess:
3002  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLess>(), nResultSize));
3003  break;
3004  case ocEqual:
3005  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEqual>(), nResultSize));
3006  break;
3007  case ocGreater:
3008  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGreater>(), nResultSize));
3009  break;
3010  case ocSYD:
3011  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSYD>(), nResultSize));
3012  break;
3013  case ocCorrel:
3014  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCorrel>(), nResultSize));
3015  break;
3016  case ocCos:
3017  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCos>(), nResultSize));
3018  break;
3019  case ocNegBinomVert :
3020  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNegbinomdist>(), nResultSize));
3021  break;
3022  case ocPearson:
3023  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPearson>(), nResultSize));
3024  break;
3025  case ocRSQ:
3026  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRsq>(), nResultSize));
3027  break;
3028  case ocCosecant:
3029  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCsc>(), nResultSize));
3030  break;
3031  case ocISPMT:
3032  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpISPMT>(), nResultSize));
3033  break;
3034  case ocPDuration:
3035  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3036  ft->Children[i], std::make_shared<OpPDuration>(), nResultSize));
3037  break;
3038  case ocSinHyp:
3039  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3040  ft->Children[i], std::make_shared<OpSinh>(), nResultSize));
3041  break;
3042  case ocAbs:
3043  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3044  ft->Children[i], std::make_shared<OpAbs>(), nResultSize));
3045  break;
3046  case ocPV:
3047  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3048  ft->Children[i], std::make_shared<OpPV>(), nResultSize));
3049  break;
3050  case ocSin:
3051  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3052  ft->Children[i], std::make_shared<OpSin>(), nResultSize));
3053  break;
3054  case ocTan:
3055  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3056  ft->Children[i], std::make_shared<OpTan>(), nResultSize));
3057  break;
3058  case ocTanHyp:
3059  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3060  ft->Children[i], std::make_shared<OpTanH>(), nResultSize));
3061  break;
3062  case ocStandard:
3063  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3064  ft->Children[i], std::make_shared<OpStandard>(), nResultSize));
3065  break;
3066  case ocWeibull:
3067  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3068  ft->Children[i], std::make_shared<OpWeibull>(), nResultSize));
3069  break;
3070  /*case ocMedian:
3071  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3072  ft->Children[i],std::make_shared<OpMedian));
3073  break;*/
3074  case ocDDB:
3075  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3076  ft->Children[i], std::make_shared<OpDDB>(), nResultSize));
3077  break;
3078  case ocFV:
3079  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3080  ft->Children[i], std::make_shared<OpFV>(), nResultSize));
3081  break;
3082  case ocSumIfs:
3083  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3084  ft->Children[i], std::make_shared<OpSumIfs>(), nResultSize));
3085  break;
3086  /*case ocVBD:
3087  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3088  ft->Children[i],std::make_shared<OpVDB));
3089  break;*/
3090  case ocKurt:
3091  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3092  ft->Children[i], std::make_shared<OpKurt>(), nResultSize));
3093  break;
3094  /*case ocNper:
3095  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3096  ft->Children[i], std::make_shared<OpNper));
3097  break;*/
3098  case ocNormDist:
3099  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3100  ft->Children[i], std::make_shared<OpNormdist>(), nResultSize));
3101  break;
3102  case ocArcCos:
3103  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3104  ft->Children[i], std::make_shared<OpArcCos>(), nResultSize));
3105  break;
3106  case ocSqrt:
3107  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3108  ft->Children[i], std::make_shared<OpSqrt>(), nResultSize));
3109  break;
3110  case ocArcCosHyp:
3111  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3112  ft->Children[i], std::make_shared<OpArcCosHyp>(), nResultSize));
3113  break;
3114  case ocNPV:
3115  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3116  ft->Children[i], std::make_shared<OpNPV>(), nResultSize));
3117  break;
3118  case ocStdNormDist:
3119  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3120  ft->Children[i], std::make_shared<OpNormsdist>(), nResultSize));
3121  break;
3122  case ocNormInv:
3123  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3124  ft->Children[i], std::make_shared<OpNorminv>(), nResultSize));
3125  break;
3126  case ocSNormInv:
3127  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3128  ft->Children[i], std::make_shared<OpNormsinv>(), nResultSize));
3129  break;
3130  case ocPermut:
3131  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3132  ft->Children[i], std::make_shared<OpPermut>(), nResultSize));
3133  break;
3134  case ocPermutationA:
3135  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3136  ft->Children[i], std::make_shared<OpPermutationA>(), nResultSize));
3137  break;
3138  case ocPhi:
3139  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3140  ft->Children[i], std::make_shared<OpPhi>(), nResultSize));
3141  break;
3142  case ocIpmt:
3143  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3144  ft->Children[i], std::make_shared<OpIPMT>(), nResultSize));
3145  break;
3146  case ocConfidence:
3147  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3148  ft->Children[i], std::make_shared<OpConfidence>(), nResultSize));
3149  break;
3150  case ocIntercept:
3151  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3152  ft->Children[i], std::make_shared<OpIntercept>(), nResultSize));
3153  break;
3154  case ocDB:
3155  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3156  std::make_shared<OpDB>(), nResultSize));
3157  break;
3158  case ocLogInv:
3159  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3160  ft->Children[i], std::make_shared<OpLogInv>(), nResultSize));
3161  break;
3162  case ocArcCot:
3163  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3164  ft->Children[i], std::make_shared<OpArcCot>(), nResultSize));
3165  break;
3166  case ocCosHyp:
3167  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3168  ft->Children[i], std::make_shared<OpCosh>(), nResultSize));
3169  break;
3170  case ocCritBinom:
3171  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3172  ft->Children[i], std::make_shared<OpCritBinom>(), nResultSize));
3173  break;
3174  case ocArcCotHyp:
3175  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3176  ft->Children[i], std::make_shared<OpArcCotHyp>(), nResultSize));
3177  break;
3178  case ocArcSin:
3179  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3180  ft->Children[i], std::make_shared<OpArcSin>(), nResultSize));
3181  break;
3182  case ocArcSinHyp:
3183  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3184  ft->Children[i], std::make_shared<OpArcSinHyp>(), nResultSize));
3185  break;
3186  case ocArcTan:
3187  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3188  ft->Children[i], std::make_shared<OpArcTan>(), nResultSize));
3189  break;
3190  case ocArcTanHyp:
3191  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3192  ft->Children[i], std::make_shared<OpArcTanH>(), nResultSize));
3193  break;
3194  case ocBitAnd:
3195  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3196  ft->Children[i], std::make_shared<OpBitAnd>(), nResultSize));
3197  break;
3198  case ocForecast:
3199  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3200  ft->Children[i], std::make_shared<OpForecast>(), nResultSize));
3201  break;
3202  case ocLogNormDist:
3203  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3204  ft->Children[i], std::make_shared<OpLogNormDist>(), nResultSize));
3205  break;
3206  /*case ocGammaDist:
3207  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3208  ft->Children[i], std::make_shared<OpGammaDist));
3209  break;*/
3210  case ocLn:
3211  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3212  ft->Children[i], std::make_shared<OpLn>(), nResultSize));
3213  break;
3214  case ocRound:
3215  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3216  ft->Children[i], std::make_shared<OpRound>(), nResultSize));
3217  break;
3218  case ocCot:
3219  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3220  ft->Children[i], std::make_shared<OpCot>(), nResultSize));
3221  break;
3222  case ocCotHyp:
3223  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3224  ft->Children[i], std::make_shared<OpCoth>(), nResultSize));
3225  break;
3226  case ocFDist:
3227  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3228  ft->Children[i], std::make_shared<OpFdist>(), nResultSize));
3229  break;
3230  case ocVar:
3231  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3232  ft->Children[i], std::make_shared<OpVar>(), nResultSize));
3233  break;
3234  /*case ocChiDist:
3235  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3236  ft->Children[i],std::make_shared<OpChiDist));
3237  break;*/
3238  case ocPow:
3239  case ocPower:
3240  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3241  ft->Children[i], std::make_shared<OpPower>(), nResultSize));
3242  break;
3243  case ocOdd:
3244  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3245  ft->Children[i], std::make_shared<OpOdd>(), nResultSize));
3246  break;
3247  /*case ocChiSqDist:
3248  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3249  ft->Children[i],std::make_shared<OpChiSqDist));
3250  break;
3251  case ocChiSqInv:
3252  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3253  ft->Children[i],std::make_shared<OpChiSqInv));
3254  break;
3255  case ocGammaInv:
3256  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3257  ft->Children[i], std::make_shared<OpGammaInv));
3258  break;*/
3259  case ocFloor:
3260  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3261  ft->Children[i], std::make_shared<OpFloor>(), nResultSize));
3262  break;
3263  /*case ocFInv:
3264  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3265  ft->Children[i], std::make_shared<OpFInv));
3266  break;*/
3267  case ocFTest:
3268  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3269  ft->Children[i], std::make_shared<OpFTest>(), nResultSize));
3270  break;
3271  case ocB:
3272  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3273  ft->Children[i], std::make_shared<OpB>(), nResultSize));
3274  break;
3275  case ocBetaDist:
3276  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3277  ft->Children[i], std::make_shared<OpBetaDist>(), nResultSize));
3278  break;
3279  case ocCosecantHyp:
3280  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3281  ft->Children[i], std::make_shared<OpCscH>(), nResultSize));
3282  break;
3283  case ocExp:
3284  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3285  ft->Children[i], std::make_shared<OpExp>(), nResultSize));
3286  break;
3287  case ocLog10:
3288  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3289  ft->Children[i], std::make_shared<OpLog10>(), nResultSize));
3290  break;
3291  case ocExpDist:
3292  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3293  ft->Children[i], std::make_shared<OpExponDist>(), nResultSize));
3294  break;
3295  case ocAverageIfs:
3296  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3297  ft->Children[i], std::make_shared<OpAverageIfs>(), nResultSize));
3298  break;
3299  case ocCountIfs:
3300  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3301  ft->Children[i], std::make_shared<OpCountIfs>(), nResultSize));
3302  break;
3303  case ocCombinA:
3304  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3305  ft->Children[i], std::make_shared<OpCombinA>(), nResultSize));
3306  break;
3307  case ocEven:
3308  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3309  ft->Children[i], std::make_shared<OpEven>(), nResultSize));
3310  break;
3311  case ocLog:
3312  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3313  ft->Children[i], std::make_shared<OpLog>(), nResultSize));
3314  break;
3315  case ocMod:
3316  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3317  ft->Children[i], std::make_shared<OpMod>(), nResultSize));
3318  break;
3319  case ocTrunc:
3320  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3321  ft->Children[i], std::make_shared<OpTrunc>(), nResultSize));
3322  break;
3323  case ocSkew:
3324  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3325  ft->Children[i], std::make_shared<OpSkew>(), nResultSize));
3326  break;
3327  case ocArcTan2:
3328  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3329  ft->Children[i], std::make_shared<OpArcTan2>(), nResultSize));
3330  break;
3331  case ocBitOr:
3332  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3333  ft->Children[i], std::make_shared<OpBitOr>(), nResultSize));
3334  break;
3335  case ocBitLshift:
3336  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3337  ft->Children[i], std::make_shared<OpBitLshift>(), nResultSize));
3338  break;
3339  case ocBitRshift:
3340  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3341  ft->Children[i], std::make_shared<OpBitRshift>(), nResultSize));
3342  break;
3343  case ocBitXor:
3344  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3345  ft->Children[i], std::make_shared<OpBitXor>(), nResultSize));
3346  break;
3347  /*case ocChiInv:
3348  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3349  ft->Children[i],std::make_shared<OpChiInv));
3350  break;*/
3351  case ocPoissonDist:
3352  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3353  ft->Children[i], std::make_shared<OpPoisson>(), nResultSize));
3354  break;
3355  case ocSumSQ:
3356  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3357  ft->Children[i], std::make_shared<OpSumSQ>(), nResultSize));
3358  break;
3359  case ocSkewp:
3360  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3361  ft->Children[i], std::make_shared<OpSkewp>(), nResultSize));
3362  break;
3363  case ocBinomDist:
3364  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3365  ft->Children[i], std::make_shared<OpBinomdist>(), nResultSize));
3366  break;
3367  case ocVarP:
3368  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3369  ft->Children[i], std::make_shared<OpVarP>(), nResultSize));
3370  break;
3371  case ocCeil:
3372  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3373  ft->Children[i], std::make_shared<OpCeil>(), nResultSize));
3374  break;
3375  case ocCombin:
3376  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3377  ft->Children[i], std::make_shared<OpCombin>(), nResultSize));
3378  break;
3379  case ocDevSq:
3380  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3381  ft->Children[i], std::make_shared<OpDevSq>(), nResultSize));
3382  break;
3383  case ocStDev:
3384  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3385  ft->Children[i], std::make_shared<OpStDev>(), nResultSize));
3386  break;
3387  case ocSlope:
3388  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3389  ft->Children[i], std::make_shared<OpSlope>(), nResultSize));
3390  break;
3391  case ocSTEYX:
3392  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3393  ft->Children[i], std::make_shared<OpSTEYX>(), nResultSize));
3394  break;
3395  case ocZTest:
3396  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3397  ft->Children[i], std::make_shared<OpZTest>(), nResultSize));
3398  break;
3399  case ocPi:
3400  mvSubArguments.push_back(
3401  std::make_shared<DynamicKernelPiArgument>(mCalcConfig, ts,
3402  ft->Children[i]));
3403  break;
3404  case ocRandom:
3405  mvSubArguments.push_back(
3406  std::make_shared<DynamicKernelRandomArgument>(mCalcConfig, ts,
3407  ft->Children[i]));
3408  break;
3409  case ocProduct:
3410  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3411  ft->Children[i], std::make_shared<OpProduct>(), nResultSize));
3412  break;
3413  /*case ocHypGeomDist:
3414  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3415  ft->Children[i],std::make_shared<OpHypGeomDist));
3416  break;*/
3417  case ocSumX2MY2:
3418  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3419  ft->Children[i], std::make_shared<OpSumX2MY2>(), nResultSize));
3420  break;
3421  case ocSumX2DY2:
3422  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3423  ft->Children[i], std::make_shared<OpSumX2PY2>(), nResultSize));
3424  break;
3425  /*case ocBetaInv:
3426  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3427  ft->Children[i],std::make_shared<OpBetainv));
3428  break;*/
3429  case ocTTest:
3430  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3431  ft->Children[i], std::make_shared<OpTTest>(), nResultSize));
3432  break;
3433  case ocTDist:
3434  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3435  ft->Children[i], std::make_shared<OpTDist>(), nResultSize));
3436  break;
3437  /*case ocTInv:
3438  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3439  ft->Children[i], std::make_shared<OpTInv));
3440  break;*/
3441  case ocSumXMY2:
3442  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3443  ft->Children[i], std::make_shared<OpSumXMY2>(), nResultSize));
3444  break;
3445  case ocStDevP:
3446  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3447  ft->Children[i], std::make_shared<OpStDevP>(), nResultSize));
3448  break;
3449  case ocCovar:
3450  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3451  ft->Children[i], std::make_shared<OpCovar>(), nResultSize));
3452  break;
3453  case ocAnd:
3454  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3455  ft->Children[i], std::make_shared<OpAnd>(), nResultSize));
3456  break;
3457  case ocVLookup:
3458  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3459  ft->Children[i], std::make_shared<OpVLookup>(), nResultSize));
3460  break;
3461  case ocOr:
3462  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3463  ft->Children[i], std::make_shared<OpOr>(), nResultSize));
3464  break;
3465  case ocNot:
3466  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3467  ft->Children[i], std::make_shared<OpNot>(), nResultSize));
3468  break;
3469  case ocXor:
3470  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3471  ft->Children[i], std::make_shared<OpXor>(), nResultSize));
3472  break;
3473  case ocDBMax:
3474  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3475  ft->Children[i], std::make_shared<OpDmax>(), nResultSize));
3476  break;
3477  case ocDBMin:
3478  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3479  ft->Children[i], std::make_shared<OpDmin>(), nResultSize));
3480  break;
3481  case ocDBProduct:
3482  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3483  ft->Children[i], std::make_shared<OpDproduct>(), nResultSize));
3484  break;
3485  case ocDBAverage:
3486  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3487  ft->Children[i], std::make_shared<OpDaverage>(), nResultSize));
3488  break;
3489  case ocDBStdDev:
3490  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3491  ft->Children[i], std::make_shared<OpDstdev>(), nResultSize));
3492  break;
3493  case ocDBStdDevP:
3494  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3495  ft->Children[i], std::make_shared<OpDstdevp>(), nResultSize));
3496  break;
3497  case ocDBSum:
3498  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3499  ft->Children[i], std::make_shared<OpDsum>(), nResultSize));
3500  break;
3501  case ocDBVar:
3502  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3503  ft->Children[i], std::make_shared<OpDvar>(), nResultSize));
3504  break;
3505  case ocDBVarP:
3506  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3507  ft->Children[i], std::make_shared<OpDvarp>(), nResultSize));
3508  break;
3509  case ocAverageIf:
3510  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3511  ft->Children[i], std::make_shared<OpAverageIf>(), nResultSize));
3512  break;
3513  case ocDBCount:
3514  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3515  ft->Children[i], std::make_shared<OpDcount>(), nResultSize));
3516  break;
3517  case ocDBCount2:
3518  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3519  ft->Children[i], std::make_shared<OpDcount2>(), nResultSize));
3520  break;
3521  case ocDeg:
3522  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3523  ft->Children[i], std::make_shared<OpDeg>(), nResultSize));
3524  break;
3525  case ocRoundUp:
3526  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3527  ft->Children[i], std::make_shared<OpRoundUp>(), nResultSize));
3528  break;
3529  case ocRoundDown:
3530  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3531  ft->Children[i], std::make_shared<OpRoundDown>(), nResultSize));
3532  break;
3533  case ocInt:
3534  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3535  ft->Children[i], std::make_shared<OpInt>(), nResultSize));
3536  break;
3537  case ocRad:
3538  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3539  ft->Children[i], std::make_shared<OpRadians>(), nResultSize));
3540  break;
3541  case ocCountIf:
3542  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3543  ft->Children[i], std::make_shared<OpCountIf>(), nResultSize));
3544  break;
3545  case ocIsEven:
3546  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3547  ft->Children[i], std::make_shared<OpIsEven>(), nResultSize));
3548  break;
3549  case ocIsOdd:
3550  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3551  ft->Children[i], std::make_shared<OpIsOdd>(), nResultSize));
3552  break;
3553  case ocFact:
3554  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3555  ft->Children[i], std::make_shared<OpFact>(), nResultSize));
3556  break;
3557  case ocMinA:
3558  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3559  ft->Children[i], std::make_shared<OpMinA>(), nResultSize));
3560  break;
3561  case ocCount2:
3562  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3563  ft->Children[i], std::make_shared<OpCountA>(), nResultSize));
3564  break;
3565  case ocMaxA:
3566  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3567  ft->Children[i], std::make_shared<OpMaxA>(), nResultSize));
3568  break;
3569  case ocAverageA:
3570  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3571  ft->Children[i], std::make_shared<OpAverageA>(), nResultSize));
3572  break;
3573  case ocVarA:
3574  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3575  ft->Children[i], std::make_shared<OpVarA>(), nResultSize));
3576  break;
3577  case ocVarPA:
3578  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3579  ft->Children[i], std::make_shared<OpVarPA>(), nResultSize));
3580  break;
3581  case ocStDevA:
3582  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3583  ft->Children[i], std::make_shared<OpStDevA>(), nResultSize));
3584  break;
3585  case ocStDevPA:
3586  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3587  ft->Children[i], std::make_shared<OpStDevPA>(), nResultSize));
3588  break;
3589  case ocSecant:
3590  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3591  ft->Children[i], std::make_shared<OpSec>(), nResultSize));
3592  break;
3593  case ocSecantHyp:
3594  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3595  ft->Children[i], std::make_shared<OpSecH>(), nResultSize));
3596  break;
3597  case ocSumIf:
3598  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3599  ft->Children[i], std::make_shared<OpSumIf>(), nResultSize));
3600  break;
3601  case ocNegSub:
3602  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3603  ft->Children[i], std::make_shared<OpNegSub>(), nResultSize));
3604  break;
3605  case ocAveDev:
3606  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3607  ft->Children[i], std::make_shared<OpAveDev>(), nResultSize));
3608  break;
3609  case ocIf:
3610  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3611  ft->Children[i], std::make_shared<OpIf>(), nResultSize));
3612  break;
3613  case ocExternal:
3614  if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getEffect")
3615  {
3616  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEffective>(), nResultSize));
3617  }
3618  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumipmt")
3619  {
3620  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumipmt>(), nResultSize));
3621  }
3622  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getNominal")
3623  {
3624  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNominal>(), nResultSize));
3625  }
3626  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumprinc")
3627  {
3628  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumprinc>(), nResultSize));
3629  }
3630  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXnpv")
3631  {
3632  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpXNPV>(), nResultSize));
3633  }
3634  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricemat")
3635  {
3636  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPriceMat>(), nResultSize));
3637  }
3638  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getReceived")
3639  {
3640  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpReceived>(), nResultSize));
3641  }
3642  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbilleq")
3643  {
3644  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbilleq>(), nResultSize));
3645  }
3646  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillprice")
3647  {
3648  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillprice>(), nResultSize));
3649  }
3650  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillyield")
3651  {
3652  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillyield>(), nResultSize));
3653  }
3654  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getFvschedule")
3655  {
3656  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFvschedule>(), nResultSize));
3657  }
3658  /*else if ( pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYield")
3659  {
3660  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYield));
3661  }*/
3662  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYielddisc")
3663  {
3664  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYielddisc>(), nResultSize));
3665  }
3666  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYieldmat")
3667  {
3668  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYieldmat>(), nResultSize));
3669  }
3670  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrintm")
3671  {
3672  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAccrintm>(), nResultSize));
3673  }
3674  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaybs")
3675  {
3676  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaybs>(), nResultSize));
3677  }
3678  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarde")
3679  {
3680  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarde>(), nResultSize));
3681  }
3682  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarfr")
3683  {
3684  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarfr>(), nResultSize));
3685  }
3686  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdays")
3687  {
3688  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdays>(), nResultSize));
3689  }
3690  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaysnc")
3691  {
3692  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaysnc>(), nResultSize));
3693  }
3694  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDisc")
3695  {
3696  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDISC>(), nResultSize));
3697  }
3698  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIntrate")
3699  {
3700  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpINTRATE>(), nResultSize));
3701  }
3702  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPrice")
3703  {
3704  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3705  ft->Children[i], std::make_shared<OpPrice>(), nResultSize));
3706  }
3707  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupnum")
3708  {
3709  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3710  std::make_shared<OpCoupnum>(), nResultSize));
3711  }
3712  /*else if pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDuration"))
3713  {
3714  mvSubArguments.push_back(
3715  SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDuration_ADD));
3716  }*/
3717  /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmordegrc")
3718  {
3719  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3720  std::make_shared<OpAmordegrc, nResultSize));
3721  }*/
3722  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmorlinc")
3723  {
3724  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3725  std::make_shared<OpAmorlinc>(), nResultSize));
3726  }
3727  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMduration")
3728  {
3729  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3730  std::make_shared<OpMDuration>(), nResultSize));
3731  }
3732  /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXirr")
3733  {
3734  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3735  std::make_shared<OpXirr, nResultSize));
3736  }*/
3737  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlprice")
3738  {
3739  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3740  ft->Children[i], std::make_shared<OpOddlprice>(), nResultSize));
3741  }
3742  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlyield")
3743  {
3744  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3745  std::make_shared<OpOddlyield>(), nResultSize));
3746  }
3747  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricedisc")
3748  {
3749  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3750  ft->Children[i], std::make_shared<OpPriceDisc>(), nResultSize));
3751  }
3752  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCouppcd")
3753  {
3754  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3755  std::make_shared<OpCouppcd>(), nResultSize));
3756  }
3757  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupncd")
3758  {
3759  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3760  std::make_shared<OpCoupncd>(), nResultSize));
3761  }
3762  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrint")
3763  {
3764  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3765  std::make_shared<OpAccrint>(), nResultSize));
3766  }
3767  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSqrtpi")
3768  {
3769  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3770  std::make_shared<OpSqrtPi>(), nResultSize));
3771  }
3772  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getConvert")
3773  {
3774  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3775  std::make_shared<OpConvert>(), nResultSize));
3776  }
3777  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIseven")
3778  {
3779  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3780  std::make_shared<OpIsEven>(), nResultSize));
3781  }
3782  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIsodd")
3783  {
3784  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3785  std::make_shared<OpIsOdd>(), nResultSize));
3786  }
3787  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMround")
3788  {
3789  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3790  std::make_shared<OpMROUND>(), nResultSize));
3791  }
3792  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getQuotient")
3793  {
3794  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3795  std::make_shared<OpQuotient>(), nResultSize));
3796  }
3797  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSeriessum")
3798  {
3799  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3800  std::make_shared<OpSeriesSum>(), nResultSize));
3801  }
3802  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getBesselj")
3803  {
3804  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3805  std::make_shared<OpBesselj>(), nResultSize));
3806  }
3807  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getGestep")
3808  {
3809  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3810  std::make_shared<OpGestep>(), nResultSize));
3811  }
3812  else
3813  throw UnhandledToken(OUString("unhandled external " + pChild->GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
3814  break;
3815 
3816  default:
3817  throw UnhandledToken(OUString("unhandled opcode "
3818  + formula::FormulaCompiler().GetOpCodeMap(com::sun::star::sheet::FormulaLanguage::ENGLISH)->getSymbol(opc)
3819  + "(" + OUString::number(opc) + ")").toUtf8().getStr(), __FILE__, __LINE__);
3820  }
3821  }
3822 }
3823 
3824 namespace {
3825 
3826 class DynamicKernel : public CompiledFormula
3827 {
3828 public:
3829  DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize );
3830  virtual ~DynamicKernel() override;
3831 
3832  static std::shared_ptr<DynamicKernel> create( const ScCalcConfig& config, const ScTokenArray& rCode, int nResultSize );
3833 
3835  void CodeGen();
3836 
3838  std::string const & GetMD5();
3839 
3843  void CreateKernel();
3844 
3847  void Launch( size_t nr );
3848 
3849  cl_mem GetResultBuffer() const { return mpResClmem; }
3850 
3851 private:
3854  SymbolTable mSyms;
3856  std::string mFullProgramSrc;
3857  cl_program mpProgram;
3858  cl_kernel mpKernel;
3859  cl_mem mpResClmem; // Results
3860  std::set<std::string> inlineDecl;
3861  std::set<std::string> inlineFun;
3862 
3863  int mnResultSize;
3864 };
3865 
3866 }
3867 
3868 DynamicKernel::DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize ) :
3869  mCalcConfig(config),
3870  mpRoot(r),
3871  mpProgram(nullptr),
3872  mpKernel(nullptr),
3873  mpResClmem(nullptr),
3874  mnResultSize(nResultSize) {}
3875 
3876 DynamicKernel::~DynamicKernel()
3877 {
3878  cl_int err;
3879  if (mpResClmem)
3880  {
3881  err = clReleaseMemObject(mpResClmem);
3882  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
3883  }
3884  if (mpKernel)
3885  {
3886  SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
3887  err = clReleaseKernel(mpKernel);
3888  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
3889  }
3890  // mpProgram is not going to be released here -- it's cached.
3891 }
3892 
3893 void DynamicKernel::CodeGen()
3894 {
3895  // Traverse the tree of expression and declare symbols used
3896  const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, std::make_shared<OpNop>(mnResultSize), mnResultSize);
3897 
3898  std::stringstream decl;
3899  if (openclwrapper::gpuEnv.mnKhrFp64Flag)
3900  {
3901  decl << "#if __OPENCL_VERSION__ < 120\n";
3902  decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3903  decl << "#endif\n";
3904  }
3905  else if (openclwrapper::gpuEnv.mnAmdFp64Flag)
3906  {
3907  decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3908  }
3909  // preambles
3910  decl << publicFunc;
3911  DK->DumpInlineFun(inlineDecl, inlineFun);
3912  for (const auto& rItem : inlineDecl)
3913  {
3914  decl << rItem;
3915  }
3916 
3917  for (const auto& rItem : inlineFun)
3918  {
3919  decl << rItem;
3920  }
3921  mSyms.DumpSlidingWindowFunctions(decl);
3922  mKernelSignature = DK->DumpOpName();
3923  decl << "__kernel void DynamicKernel" << mKernelSignature;
3924  decl << "(__global double *result";
3925  if( !DK->IsEmpty())
3926  {
3927  decl << ", ";
3928  DK->GenSlidingWindowDecl(decl);
3929  }
3930  decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3931  DK->GenSlidingWindowDeclRef() << ";\n}\n";
3932  mFullProgramSrc = decl.str();
3933  SAL_INFO(
3934  "sc.opencl.source",
3935  (mKernelSignature[0] == '_'
3936  ? mKernelSignature.substr(1, std::string::npos) : mKernelSignature)
3937  << " program to be compiled:\n" << linenumberify(mFullProgramSrc));
3938 }
3939 
3940 std::string const & DynamicKernel::GetMD5()
3941 {
3942  if (mKernelHash.empty())
3943  {
3944  std::stringstream md5s;
3945  // Compute MD5SUM of kernel body to obtain the name
3946  sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
3947  rtl_digest_MD5(
3948  mFullProgramSrc.c_str(),
3949  mFullProgramSrc.length(), result,
3950  RTL_DIGEST_LENGTH_MD5);
3951  for (sal_uInt8 i : result)
3952  {
3953  md5s << std::hex << static_cast<int>(i);
3954  }
3955  mKernelHash = md5s.str();
3956  }
3957  return mKernelHash;
3958 }
3959 
3961 void DynamicKernel::CreateKernel()
3962 {
3963  if (mpKernel)
3964  // already created.
3965  return;
3966 
3967  cl_int err;
3968  std::string kname = "DynamicKernel" + mKernelSignature;
3969  // Compile kernel here!!!
3970 
3971  OpenCLZone zone;
3974  const char* src = mFullProgramSrc.c_str();
3975  static std::string lastOneKernelHash;
3976  static std::string lastSecondKernelHash;
3977  static cl_program lastOneProgram = nullptr;
3978  static cl_program lastSecondProgram = nullptr;
3979  std::string KernelHash = mKernelSignature + GetMD5();
3980  if (lastOneKernelHash == KernelHash && lastOneProgram)
3981  {
3982  mpProgram = lastOneProgram;
3983  }
3984  else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3985  {
3986  mpProgram = lastSecondProgram;
3987  }
3988  else
3989  { // doesn't match the last compiled formula.
3990 
3991  if (lastSecondProgram)
3992  {
3993  SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
3994  err = clReleaseProgram(lastSecondProgram);
3995  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << openclwrapper::errorString(err));
3996  lastSecondProgram = nullptr;
3997  }
3999  &openclwrapper::gpuEnv, KernelHash.c_str(), 0))
4000  {
4001  mpProgram = openclwrapper::gpuEnv.mpArryPrograms[0];
4003  }
4004  else
4005  {
4006  mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
4007  &src, nullptr, &err);
4008  if (err != CL_SUCCESS)
4009  throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
4010  SAL_INFO("sc.opencl", "Created program " << mpProgram);
4011 
4012  err = clBuildProgram(mpProgram, 1,
4013  &openclwrapper::gpuEnv.mpDevID, "", nullptr, nullptr);
4014  if (err != CL_SUCCESS)
4015  {
4016 #if OSL_DEBUG_LEVEL > 0
4017  if (err == CL_BUILD_PROGRAM_FAILURE)
4018  {
4019  cl_build_status stat;
4020  cl_int e = clGetProgramBuildInfo(
4021  mpProgram, openclwrapper::gpuEnv.mpDevID,
4022  CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
4023  &stat, nullptr);
4024  SAL_WARN_IF(
4025  e != CL_SUCCESS, "sc.opencl",
4026  "after CL_BUILD_PROGRAM_FAILURE,"
4027  " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
4028  " fails with " << openclwrapper::errorString(e));
4029  if (e == CL_SUCCESS)
4030  {
4031  size_t n;
4032  e = clGetProgramBuildInfo(
4033  mpProgram, openclwrapper::gpuEnv.mpDevID,
4034  CL_PROGRAM_BUILD_LOG, 0, nullptr, &n);
4035  SAL_WARN_IF(
4036  e != CL_SUCCESS || n == 0, "sc.opencl",
4037  "after CL_BUILD_PROGRAM_FAILURE,"
4038  " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
4039  " fails with " << openclwrapper::errorString(e) << ", n=" << n);
4040  if (e == CL_SUCCESS && n != 0)
4041  {
4042  std::vector<char> log(n);
4043  e = clGetProgramBuildInfo(
4044  mpProgram, openclwrapper::gpuEnv.mpDevID,
4045  CL_PROGRAM_BUILD_LOG, n, log.data(), nullptr);
4046  SAL_WARN_IF(
4047  e != CL_SUCCESS || n == 0, "sc.opencl",
4048  "after CL_BUILD_PROGRAM_FAILURE,"
4049  " clGetProgramBuildInfo("
4050  "CL_PROGRAM_BUILD_LOG) fails with " << openclwrapper::errorString(e));
4051  if (e == CL_SUCCESS)
4052  SAL_WARN(
4053  "sc.opencl",
4054  "CL_BUILD_PROGRAM_FAILURE, status " << stat
4055  << ", log \"" << log.data() << "\"");
4056  }
4057  }
4058  }
4059 #endif
4060 #ifdef DBG_UTIL
4061  SAL_WARN("sc.opencl", "Program failed to build, aborting.");
4062  abort(); // make sure errors such as typos don't accidentally go unnoticed
4063 #else
4064  throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
4065 #endif
4066  }
4067  SAL_INFO("sc.opencl", "Built program " << mpProgram);
4068 
4069  // Generate binary out of compiled kernel.
4071  (mKernelSignature + GetMD5()).c_str());
4072  }
4073  lastSecondKernelHash = lastOneKernelHash;
4074  lastSecondProgram = lastOneProgram;
4075  lastOneKernelHash = KernelHash;
4076  lastOneProgram = mpProgram;
4077  }
4078  mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
4079  if (err != CL_SUCCESS)
4080  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
4081  SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
4082 }
4083 
4084 void DynamicKernel::Launch( size_t nr )
4085 {
4086  OpenCLZone zone;
4089  cl_int err;
4090  // The results
4091  mpResClmem = clCreateBuffer(kEnv.mpkContext,
4092  cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
4093  nr * sizeof(double), nullptr, &err);
4094  if (CL_SUCCESS != err)
4095  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
4096  SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
4097 
4098  SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem << " (result)");
4099  err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), static_cast<void*>(&mpResClmem));
4100  if (CL_SUCCESS != err)
4101  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
4102  // The rest of buffers
4103  mSyms.Marshal(mpKernel, nr, mpProgram);
4104  size_t global_work_size[] = { nr };
4105  SAL_INFO("sc.opencl", "Enqueuing kernel " << mpKernel);
4106  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, nullptr,
4107  global_work_size, nullptr, 0, nullptr, nullptr);
4108  if (CL_SUCCESS != err)
4109  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
4110  err = clFlush(kEnv.mpkCmdQueue);
4111  if (CL_SUCCESS != err)
4112  throw OpenCLError("clFlush", err, __FILE__, __LINE__);
4113 }
4114 
4115 // Symbol lookup. If there is no such symbol created, allocate one
4116 // kernel with argument with unique name and return so.
4117 // The template argument T must be a subclass of DynamicKernelArgument
4118 template <typename T>
4119 const DynamicKernelArgument* SymbolTable::DeclRefArg(const ScCalcConfig& config,
4120  const FormulaTreeNodeRef& t,
4121  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize)
4122 {
4123  FormulaToken* ref = t->GetFormulaToken();
4124  ArgumentMap::iterator it = mSymbols.find(ref);
4125  if (it == mSymbols.end())
4126  {
4127  // Allocate new symbols
4128  std::stringstream ss;
4129  ss << "tmp" << mCurId++;
4130  DynamicKernelArgumentRef new_arg = std::make_shared<T>(config, ss.str(), t, std::move(pCodeGen), nResultSize);
4131  mSymbols[ref] = new_arg;
4132  mParams.push_back(new_arg);
4133  return new_arg.get();
4134  }
4135  else
4136  {
4137  return it->second.get();
4138  }
4139 }
4140 
4141 FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() {}
4142 
4143 FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
4144 
4145 ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
4146 {
4147  return nullptr;
4148 }
4149 
4150 std::shared_ptr<DynamicKernel> DynamicKernel::create( const ScCalcConfig& rConfig, const ScTokenArray& rCode, int nResultSize )
4151 {
4152  // Constructing "AST"
4153  FormulaTokenIterator aCode(rCode);
4154  std::vector<FormulaToken*> aTokenVector;
4155  std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
4156  FormulaToken* pCur;
4157  while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != nullptr)
4158  {
4159  OpCode eOp = pCur->GetOpCode();
4160  if (eOp != ocPush)
4161  {
4162  FormulaTreeNodeRef pCurNode = std::make_shared<FormulaTreeNode>(pCur);
4164  for (sal_uInt8 i = 0; i < nParamCount; i++)
4165  {
4166  if( aTokenVector.empty())
4167  return nullptr;
4168  FormulaToken* pTempFormula = aTokenVector.back();
4169  aTokenVector.pop_back();
4170  if (pTempFormula->GetOpCode() != ocPush)
4171  {
4172  if (aHashMap.find(pTempFormula) == aHashMap.end())
4173  return nullptr;
4174  pCurNode->Children.push_back(aHashMap[pTempFormula]);
4175  }
4176  else
4177  {
4178  FormulaTreeNodeRef pChildTreeNode =
4179  std::make_shared<FormulaTreeNode>(pTempFormula);
4180  pCurNode->Children.push_back(pChildTreeNode);
4181  }
4182  }
4183  std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
4184  aHashMap[pCur] = pCurNode;
4185  }
4186  aTokenVector.push_back(pCur);
4187  }
4188 
4189  FormulaTreeNodeRef Root = std::make_shared<FormulaTreeNode>(nullptr);
4190  Root->Children.push_back(aHashMap[aTokenVector.back()]);
4191 
4192  auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
4193 
4194  // OpenCL source code generation and kernel compilation
4195  try
4196  {
4197  pDynamicKernel->CodeGen();
4198  pDynamicKernel->CreateKernel();
4199  }
4200  catch (const UnhandledToken& ut)
4201  {
4202  SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4203  return nullptr;
4204  }
4205  catch (const InvalidParameterCount& ipc)
4206  {
4207  SAL_INFO("sc.opencl", "Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
4208  << " at " << ipc.mFile << ":" << ipc.mLineNumber);
4209  return nullptr;
4210  }
4211  catch (const OpenCLError& oce)
4212  {
4213  // I think OpenCLError exceptions are actually exceptional (unexpected), so do use SAL_WARN
4214  // here.
4215  SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4216 
4217  // OpenCLError used to go to the catch-all below, and not delete pDynamicKernel. Was that
4218  // intentional, should we not do it here then either?
4220  return nullptr;
4221  }
4222  catch (const Unhandled& uh)
4223  {
4224  SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
4225 
4226  // Unhandled used to go to the catch-all below, and not delete pDynamicKernel. Was that
4227  // intentional, should we not do it here then either?
4229  return nullptr;
4230  }
4231  catch (...)
4232  {
4233  // FIXME: Do we really want to catch random exceptions here?
4234  SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
4236  return nullptr;
4237  }
4238  return pDynamicKernel;
4239 }
4240 
4241 namespace {
4242 
4243 class CLInterpreterResult
4244 {
4245  DynamicKernel* mpKernel;
4246 
4248 
4249  cl_mem mpCLResBuf;
4250  double* mpResBuf;
4251 
4252 public:
4253  CLInterpreterResult() : mpKernel(nullptr), mnGroupLength(0), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4254  CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
4255  mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4256 
4257  bool isValid() const { return mpKernel != nullptr; }
4258 
4259  void fetchResultFromKernel()
4260  {
4261  if (!isValid())
4262  return;
4263 
4264  OpenCLZone zone;
4265 
4266  // Map results back
4267  mpCLResBuf = mpKernel->GetResultBuffer();
4268 
4271 
4272  cl_int err;
4273  mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
4274  mpCLResBuf,
4275  CL_TRUE, CL_MAP_READ, 0,
4276  mnGroupLength * sizeof(double), 0, nullptr, nullptr,
4277  &err));
4278 
4279  if (err != CL_SUCCESS)
4280  {
4281  SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << openclwrapper::errorString(err));
4282  mpResBuf = nullptr;
4283  return;
4284  }
4285  SAL_INFO("sc.opencl", "Kernel results: cl_mem: " << mpResBuf << " (" << DebugPeekDoubles(mpResBuf, mnGroupLength) << ")");
4286  }
4287 
4288  bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
4289  {
4290  if (!mpResBuf)
4291  return false;
4292 
4293  OpenCLZone zone;
4294 
4295  rDoc.SetFormulaResults(rTopPos, mpResBuf, mnGroupLength);
4296 
4299 
4300  cl_int err;
4301  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, nullptr, nullptr);
4302 
4303  if (err != CL_SUCCESS)
4304  {
4305  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
4306  return false;
4307  }
4308 
4309  return true;
4310  }
4311 };
4312 
4313 class CLInterpreterContext
4314 {
4315  std::shared_ptr<DynamicKernel> mpKernelStore;
4316  DynamicKernel* mpKernel;
4317 
4319 
4320 public:
4321  explicit CLInterpreterContext(SCROW nGroupLength)
4322  : mpKernel(nullptr)
4323  , mnGroupLength(nGroupLength) {}
4324 
4325  bool isValid() const
4326  {
4327  return mpKernel != nullptr;
4328  }
4329 
4330  void setManagedKernel( std::shared_ptr<DynamicKernel> pKernel )
4331  {
4332  mpKernelStore = std::move(pKernel);
4333  mpKernel = mpKernelStore.get();
4334  }
4335 
4336  CLInterpreterResult launchKernel()
4337  {
4338  if (!isValid())
4339  return CLInterpreterResult();
4340 
4341  try
4342  {
4343  // Run the kernel.
4344  mpKernel->Launch(mnGroupLength);
4345  }
4346  catch (const UnhandledToken& ut)
4347  {
4348  SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4350  return CLInterpreterResult();
4351  }
4352  catch (const OpenCLError& oce)
4353  {
4354  SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4356  return CLInterpreterResult();
4357  }
4358  catch (const Unhandled& uh)
4359  {
4360  SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
4362  return CLInterpreterResult();
4363  }
4364  catch (...)
4365  {
4366  SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
4368  return CLInterpreterResult();
4369  }
4370 
4371  return CLInterpreterResult(mpKernel, mnGroupLength);
4372  }
4373 };
4374 
4375 
4376 CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
4377  const ScFormulaCellGroupRef& xGroup, const ScTokenArray& rCode )
4378 {
4379  CLInterpreterContext aCxt(xGroup->mnLength);
4380 
4381  aCxt.setManagedKernel(DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
4382 
4383  return aCxt;
4384 }
4385 
4386 void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
4387 {
4388  ScCompiler aComp(rDoc, rTopPos, rCode, rDoc.GetGrammar());
4389  // Disable special ordering for jump commands for the OpenCL interpreter.
4390  aComp.EnableJumpCommandReorder(false);
4391  aComp.CompileTokenArray(); // Regenerate RPN tokens.
4392 }
4393 
4394 bool waitForResults()
4395 {
4396  OpenCLZone zone;
4399 
4400  cl_int err = clFinish(kEnv.mpkCmdQueue);
4401  if (err != CL_SUCCESS)
4402  SAL_WARN("sc.opencl", "clFinish failed: " << openclwrapper::errorString(err));
4403 
4404  return err == CL_SUCCESS;
4405 }
4406 
4407 }
4408 
4409 bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
4410  const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
4411  ScTokenArray& rCode )
4412 {
4413  MergeCalcConfig(rDoc);
4414 
4415  genRPNTokens(rDoc, rTopPos, rCode);
4416 
4417  if( rCode.GetCodeLen() == 0 )
4418  return false;
4419 
4420  CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
4421  if (!aCxt.isValid())
4422  return false;
4423 
4424  CLInterpreterResult aRes = aCxt.launchKernel();
4425  if (!aRes.isValid())
4426  return false;
4427 
4428  if (!waitForResults())
4429  return false;
4430 
4431  aRes.fetchResultFromKernel();
4432 
4433  return aRes.pushResultToDocument(rDoc, rTopPos);
4434 }
4435 
4436 } // namespace sc::opencl
4437 
4438 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Matrix data type that can store values of mixed types.
Definition: scmatrix.hxx:112
ocDBCount
bool generatBinFromKernelSource(cl_program program, const char *clFileName)
ocStDevP
ocVarPA
ocSqrt
ocBitAnd
ocPermutationA
ocBinomDist
::boost::intrusive_ptr< ScFormulaCellGroup > ScFormulaCellGroupRef
Definition: types.hxx:43
sal_uInt64 kernelFailures
ocEven
DynamicKernelSlidingArgument< VectorRef > mDoubleArgument
OUString getString() const
std::string mFullProgramSrc
std::shared_ptr< DynamicKernelArgument > DynamicKernelArgumentRef
Definition: opbase.hxx:151
std::string mKernelSignature
ocSumSQ
double mConst
ocArcCotHyp
ocCosecantHyp
void SC_DLLPUBLIC SetFormulaResults(const ScAddress &rTopPos, const double *pResults, size_t nLen)
Set an array of numerical formula results to a group of contiguous formula cells. ...
Definition: documen8.cxx:404
ocVar
const VectorRefArray & GetArray() const
ocPi
ocDevSq
Failed in marshaling.
Definition: opbase.hxx:41
Inconsistent state.
Definition: opbase.hxx:53
ocCos
ocPDuration
cl_program mpArryPrograms[MAX_CLFILE_NUM]
std::set< std::string > inlineDecl
ocPpmt
ocDBMin
ocIntercept
SCROW mnGroupLength
ocDBAverage
ocB
unsigned int mCurId
sal_Int64 n
cl_mem mpClmem2
ocCosecant
const std::vector< VectorRefArray > & GetArrays() const
ocDBSum
ocExp
ocSLN
void setKernelEnv(KernelEnv *envInfo)
ocLogNormDist
sal_uInt16 GetCodeLen() const
std::shared_ptr< FormulaTreeNode > FormulaTreeNodeRef
Definition: opbase.hxx:81
virtual double GetDouble() const
ocPV
ocProduct
OpCode GetOpCode() const
SymbolTable mSyms
ocPearson
SC_DLLPUBLIC formula::FormulaGrammar::Grammar GetGrammar() const
Definition: document.hxx:984
(Partially) abstract base class for an operand
Definition: opbase.hxx:101
ocCount
ocRad
ocMin
ocGauss
ocSinHyp
ocCount2
cl_kernel mpKernel
for managed kernel instance.
ocSTEYX
ocIf
ocLogInv
ocSYD
ocFisherInv
tuple log
#define SAL_MAX_UINT32
ocPhi
char sal_uInt16 & nParamCount
Definition: callform.cxx:53
ocDBProduct
ocNegSub
#define REDUCE_THRESHOLD
=1+"1" or =1+"x" give 1
ocRSQ
virtual const OUString & GetExternal() const
ocBetaDist
ocRound
OpCode
ocSumProduct
ocAbs
ocStDevA
ocForecast
ocWeibull
ocGreater
ocSNormInv
ocIsOdd
ocEqual
::boost::intrusive_ptr< ScMatrix > ScMatrixRef
Definition: types.hxx:25
ocNot
StringConversion meStringConversion
Definition: calcconfig.hxx:54
Reference< deployment::XPackageRegistry > create(Reference< deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, Reference< XComponentContext > const &xComponentContext)
std::string StackVarEnumToString(StackVar const e)
ocCot
ocCorrel
ocBitLshift
ScCalcConfig mCalcConfig
ocDDB
ocVarP
std::shared_ptr< DynamicKernel > mpKernelStore
ocSumIfs
ocArcTanHyp
err
ocFisher
ocSumX2MY2
ocDBMax
ocAverageA
ocPower
ocNormInv
ocBitOr
const formula::DoubleVectorRefToken * mpDVR
Base
ocFTest
ocArcCos
ocLog
int mnResultSize
ocFV
int i
ocSkew
ocNPV
cl_mem mCLMem
ocMul
ocCosHyp
ocArcCosHyp
static DynamicKernelArgumentRef SoPHelper(const ScCalcConfig &config, const std::string &ts, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > pCodeGen, int nResultSize)
ocDBVar
ocPush
std::set< std::string > inlineFun
std::shared_ptr< SlidingFunctionBase > mpCodeGen
std::string mKernelHash
ocSub
ocTDist
#define VR
Definition: xlformula.cxx:62
void EnableJumpCommandReorder(bool bEnable)
ocSecantHyp
ocFloor
ocPoissonDist
ocMIRR
ocSlope
ocExternal
ocAverageIfs
ocMaxA
ocLog10
size
ocCeil
Arguments that are actually compile-time constant string Currently, only the hash is passed...
::boost::spirit::classic::rule< ScannerT > argument
ocMax
SubArgumentsType mvSubArguments
double * mpResBuf
ocInt
bool bIsEndFixed
ocDBStdDevP
ocSkewp
ocSin
tuple index
ArgumentMap mSymbols
ocVLookup
ocCritBinom
ocRate
ocLess
sal_uInt8 GetParamCount() const
exports com.sun.star.chart2. data
XPropertyListType t
bool buildProgramFromBinary(const char *buildOption, GPUEnv *gpuInfo, const char *filename, int idx)
ocNegBinomVert
const double * mpNumericArray
FormulaTreeNodeRef mpRoot
ocSumIf
Configuration options for formula interpreter.
Definition: calcconfig.hxx:43
ocVarA
ocISPMT
ocDBCount2
ocOdd
ocOr
ocBitRshift
static std::shared_ptr< DynamicKernelArgument > VectorRefFactory(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > &pCodeGen, int index)
DynamicKernelStringArgument mStringArgument
ocCotHyp
sal_Int32 SCROW
Definition: types.hxx:17
ocFact
ocCombin
ocAdd
ocRoundUp
ocStDevPA
ocSumX2DY2
ocArcTan
bool bIsStartFixed
ocDB
ocAverage
ocHarMean
#define UNROLLING_FACTOR
ocMod
#define SAL_WARN_IF(condition, area, stream)
rtl_uString ** mpStringArray
unsigned char sal_uInt8
ocRandom
ocDBVarP
virtual const svl::SharedString & GetString() const
ocArcSin
cl_mem mpResClmem
ocAnd
#define SAL_INFO(area, stream)
ocArcCot
ocLn
cl_command_queue mpkCmdQueue
ocStdNormDist
int uniform_int_distribution(int a, int b)
tuple config
ocCountIfs
cl_mem mpCLResBuf
const char * name
ocGammaLn
ocRoundDown
ocRRI
ocBitXor
ocSumXMY2
std::vector< DynamicKernelArgumentRef > mParams
ocStandard
const char *const publicFunc
ocDiv
ocAveDev
ocGamma
ocPow
ocTrunc
cl_program mpProgram
Any result
ocIRR
#define SAL_WARN(area, stream)
ocNormDist
ocSecant
const char * errorString(cl_int nError)
ocPMT
ocDeg
ocFDist
ocPermut
ocTanHyp
sal_Int32 nLength
ocLessEqual
ocConfidence
size_t GetArrayLength() const
size_t GetArrayLength() const
ocIpmt
int mnIndex
ocCovar
ocTTest
ocAverageIf
ocCountIf
ocIsEven
ocKurt
ocXor
ocTan
StackVar GetType() const
ocZTest
ocDBStdDev