LibreOffice Module sc (master)  1
formulagroupcl.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  */
9 
10 #include <formulagroup.hxx>
11 #include <formulagroupcl.hxx>
12 #include <document.hxx>
13 #include <formulacell.hxx>
14 #include <tokenarray.hxx>
15 #include <compiler.hxx>
16 #include <comphelper/random.hxx>
17 #include <formula/vectortoken.hxx>
18 #include <scmatrix.hxx>
19 #include <sal/log.hxx>
20 #include <rtl/math.hxx>
21 
22 #include <opencl/openclwrapper.hxx>
23 #include <opencl/OpenCLZone.hxx>
24 
25 #include "op_financial.hxx"
26 #include "op_database.hxx"
27 #include "op_math.hxx"
28 #include "op_logical.hxx"
29 #include "op_statistical.hxx"
30 #include "op_array.hxx"
31 #include "op_spreadsheet.hxx"
32 #include "op_addin.hxx"
33 
34 #include <com/sun/star/sheet/FormulaLanguage.hpp>
35 
36 // FIXME: The idea that somebody would bother to (now and then? once a year? once a month?) manually
37 // edit a source file and change the value of some #defined constant and run some ill-defined
38 // "correctness test" is of course ludicrous. Either things are checked in normal unit tests, in
39 // every 'make check', or not at all. The below comments are ridiculous.
40 
41 #define REDUCE_THRESHOLD 201 // set to 4 for correctness testing. priority 1
42 #define UNROLLING_FACTOR 16 // set to 4 for correctness testing (if no reduce)
43 
44 const char* const publicFunc =
45  "\n"
46  "#define IllegalArgument 502\n"
47  "#define IllegalFPOperation 503 // #NUM!\n"
48  "#define NoValue 519 // #VALUE!\n"
49  "#define NoConvergence 523\n"
50  "#define DivisionByZero 532 // #DIV/0!\n"
51  "#define NOTAVAILABLE 0x7fff // #N/A\n"
52  "\n"
53  "double CreateDoubleError(ulong nErr)\n"
54  "{\n"
55  // At least nVidia on Linux and Intel on Windows seem to ignore the argument to nan(),
56  // so using that would not propagate the type of error, work that around
57  // by directly constructing the proper IEEE double NaN value
58  // TODO: maybe use a better way to detect such systems?
59  " return as_double(0x7FF8000000000000+nErr);\n"
60 // " return nan(nErr);\n"
61  "}\n"
62  "\n"
63  "uint GetDoubleErrorValue(double fVal)\n"
64  "{\n"
65  " if (isfinite(fVal))\n"
66  " return 0;\n"
67  " if (isinf(fVal))\n"
68  " return IllegalFPOperation; // normal INF\n"
69  " if (as_ulong(fVal) & 0XFFFF0000u)\n"
70  " return NoValue; // just a normal NAN\n"
71  " return (as_ulong(fVal) & 0XFFFF); // any other error\n"
72  "}\n"
73  "\n"
74  "double fsum_count(double a, double b, __private int *p) {\n"
75  " bool t = isnan(a);\n"
76  " (*p) += t?0:1;\n"
77  " return t?b:a+b;\n"
78  "}\n"
79  "double fmin_count(double a, double b, __private int *p) {\n"
80  " double result = fmin(a, b);\n"
81  " bool t = isnan(result);\n"
82  " (*p) += t?0:1;\n"
83  " return result;\n"
84  "}\n"
85  "double fmax_count(double a, double b, __private int *p) {\n"
86  " double result = fmax(a, b);\n"
87  " bool t = isnan(result);\n"
88  " (*p) += t?0:1;\n"
89  " return result;\n"
90  "}\n"
91  "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
92  "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
93  "double fsub(double a, double b) { return a-b; }\n"
94  "double fdiv(double a, double b) { return a/b; }\n"
95  "double strequal(unsigned a, unsigned b) { return (a==b)?1.0:0; }\n"
96  "int is_representable_integer(double a) {\n"
97  " long kMaxInt = (1L << 53) - 1;\n"
98  " if (a <= as_double(kMaxInt))\n"
99  " {\n"
100  " long nInt = as_long(a);\n"
101  " double fInt;\n"
102  " return (nInt <= kMaxInt &&\n"
103  " (!((fInt = as_double(nInt)) < a) && !(fInt > a)));\n"
104  " }\n"
105  " return 0;\n"
106  "}\n"
107  "int approx_equal(double a, double b) {\n"
108  " double e48 = 1.0 / (16777216.0 * 16777216.0);\n"
109  " double e44 = e48 * 16.0;\n"
110  " if (a == b)\n"
111  " return 1;\n"
112  " if (a == 0.0 || b == 0.0)\n"
113  " return 0;\n"
114  " double d = fabs(a - b);\n"
115  " if (!isfinite(d))\n"
116  " return 0; // Nan or Inf involved\n"
117  " if (d > ((a = fabs(a)) * e44) || d > ((b = fabs(b)) * e44))\n"
118  " return 0;\n"
119  " if (is_representable_integer(d) && is_representable_integer(a) && is_representable_integer(b))\n"
120  " return 0; // special case for representable integers.\n"
121  " return (d < a * e48 && d < b * e48);\n"
122  "}\n"
123  "double fsum_approx(double a, double b) {\n"
124  " if ( ((a < 0.0 && b > 0.0) || (b < 0.0 && a > 0.0))\n"
125  " && approx_equal( a, -b ) )\n"
126  " return 0.0;\n"
127  " return a + b;\n"
128  "}\n"
129  "double fsub_approx(double a, double b) {\n"
130  " if ( ((a < 0.0 && b < 0.0) || (a > 0.0 && b > 0.0)) && approx_equal( a, b ) )\n"
131  " return 0.0;\n"
132  " return a - b;\n"
133  "}\n"
134  ;
135 
136 #include <vector>
137 #include <map>
138 #include <iostream>
139 #include <algorithm>
140 
141 #include <rtl/digest.h>
142 
143 #include <memory>
144 
145 using namespace formula;
146 
147 namespace sc::opencl {
148 
149 namespace {
150 
151 std::string linenumberify(const std::string& s)
152 {
153  std::stringstream ss;
154  int linenumber = 1;
155  size_t start = 0;
156  size_t newline;
157  while ((newline = s.find('\n', start)) != std::string::npos)
158  {
159  ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
160  start = newline + 1;
161  }
162  if (start < s.size())
163  ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
164  return ss.str();
165 }
166 
167 bool AllStringsAreNull(const rtl_uString* const* pStringArray, size_t nLength)
168 {
169  if (pStringArray == nullptr)
170  return true;
171 
172  for (size_t i = 0; i < nLength; i++)
173  if (pStringArray[i] != nullptr)
174  return false;
175 
176  return true;
177 }
178 
179 OUString LimitedString( const OUString& str )
180 {
181  if( str.getLength() < 20 )
182  return "\"" + str + "\"";
183  else
184  return "\"" + str.copy( 0, 20 ) + "\"...";
185 }
186 
187 // Returns formatted contents of the data (possibly shortened), to be used in debug output.
188 OUString DebugPeekData(const FormulaToken* ref, int doubleRefIndex = 0)
189 {
190  if (ref->GetType() == formula::svSingleVectorRef)
191  {
192  const formula::SingleVectorRefToken* pSVR =
193  static_cast<const formula::SingleVectorRefToken*>(ref);
194  OUStringBuffer buf = "SingleRef {";
195  for( size_t i = 0; i < std::min< size_t >( 4, pSVR->GetArrayLength()); ++i )
196  {
197  if( i != 0 )
198  buf.append( "," );
199  if( pSVR->GetArray().mpNumericArray != nullptr )
200  buf.append( pSVR->GetArray().mpNumericArray[ i ] );
201  else if( pSVR->GetArray().mpStringArray != nullptr )
202  buf.append( LimitedString( OUString( pSVR->GetArray().mpStringArray[ i ] )));
203  }
204  if( pSVR->GetArrayLength() > 4 )
205  buf.append( ",..." );
206  buf.append( "}" );
207  return buf.makeStringAndClear();
208  }
209  else if (ref->GetType() == formula::svDoubleVectorRef)
210  {
211  const formula::DoubleVectorRefToken* pDVR =
212  static_cast<const formula::DoubleVectorRefToken*>(ref);
213  OUStringBuffer buf = "DoubleRef {";
214  for( size_t i = 0; i < std::min< size_t >( 4, pDVR->GetArrayLength()); ++i )
215  {
216  if( i != 0 )
217  buf.append( "," );
218  if( pDVR->GetArrays()[doubleRefIndex].mpNumericArray != nullptr )
219  buf.append( pDVR->GetArrays()[doubleRefIndex].mpNumericArray[ i ] );
220  else if( pDVR->GetArrays()[doubleRefIndex].mpStringArray != nullptr )
221  buf.append( LimitedString( OUString( pDVR->GetArrays()[doubleRefIndex].mpStringArray[ i ] )));
222  }
223  if( pDVR->GetArrayLength() > 4 )
224  buf.append( ",..." );
225  buf.append( "}" );
226  return buf.makeStringAndClear();
227  }
228  else if (ref->GetType() == formula::svString)
229  {
230  return "String " + LimitedString( ref->GetString().getString());
231  }
232  else if (ref->GetType() == formula::svDouble)
233  {
234  return OUString::number(ref->GetDouble());
235  }
236  else
237  {
238  return "?";
239  }
240 }
241 
242 // Returns formatted contents of a doubles buffer, to be used in debug output.
243 OUString DebugPeekDoubles(const double* data, int size)
244 {
245  OUStringBuffer buf = "{";
246  for( int i = 0; i < std::min( 4, size ); ++i )
247  {
248  if( i != 0 )
249  buf.append( "," );
250  buf.append( data[ i ] );
251  }
252  if( size > 4 )
253  buf.append( ",..." );
254  buf.append( "}" );
255  return buf.makeStringAndClear();
256 }
257 
258 } // anonymous namespace
259 
261 size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
262 {
263  OpenCLZone zone;
264  FormulaToken* ref = mFormulaTree->GetFormulaToken();
265  double* pHostBuffer = nullptr;
266  size_t szHostBuffer = 0;
267  if (ref->GetType() == formula::svSingleVectorRef)
268  {
269  const formula::SingleVectorRefToken* pSVR =
270  static_cast<const formula::SingleVectorRefToken*>(ref);
271 
272  SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
273 
274  pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
275  szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
276  }
277  else if (ref->GetType() == formula::svDoubleVectorRef)
278  {
279  const formula::DoubleVectorRefToken* pDVR =
280  static_cast<const formula::DoubleVectorRefToken*>(ref);
281 
282  SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
283 
284  pHostBuffer = const_cast<double*>(
285  pDVR->GetArrays()[mnIndex].mpNumericArray);
286  szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
287  }
288  else
289  {
290  throw Unhandled(__FILE__, __LINE__);
291  }
292 
295  cl_int err;
296  if (pHostBuffer)
297  {
298  mpClmem = clCreateBuffer(kEnv.mpkContext,
299  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
300  szHostBuffer,
301  pHostBuffer, &err);
302  if (CL_SUCCESS != err)
303  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
304  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
305  }
306  else
307  {
308  if (szHostBuffer == 0)
309  szHostBuffer = sizeof(double); // a dummy small value
310  // Marshal as a buffer of NANs
311  mpClmem = clCreateBuffer(kEnv.mpkContext,
312  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
313  szHostBuffer, nullptr, &err);
314  if (CL_SUCCESS != err)
315  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
316  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
317 
318  double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
319  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
320  szHostBuffer, 0, nullptr, nullptr, &err));
321  if (CL_SUCCESS != err)
322  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
323 
324  for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
325  rtl::math::setNan(&pNanBuffer[i]);
326  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
327  pNanBuffer, 0, nullptr, nullptr);
328  // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
329  if (CL_SUCCESS != err)
330  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
331  }
332 
333  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref, mnIndex) << ")");
334  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
335  if (CL_SUCCESS != err)
336  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
337  return 1;
338 }
339 
344 
349 
350 namespace {
351 
352 class ConstStringArgument : public DynamicKernelArgument
353 {
354 public:
355  ConstStringArgument( const ScCalcConfig& config, const std::string& s,
356  const FormulaTreeNodeRef& ft ) :
357  DynamicKernelArgument(config, s, ft) { }
359  virtual void GenDecl( std::stringstream& ss ) const override
360  {
361  ss << "unsigned " << mSymName;
362  }
363  virtual void GenDeclRef( std::stringstream& ss ) const override
364  {
365  ss << GenSlidingWindowDeclRef();
366  }
367  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
368  {
369  GenDecl(ss);
370  }
371  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
372  {
373  std::stringstream ss;
374  if (GetFormulaToken()->GetType() != formula::svString)
375  throw Unhandled(__FILE__, __LINE__);
376  FormulaToken* Tok = GetFormulaToken();
377  ss << Tok->GetString().getString().toAsciiUpperCase().hashCode() << "U";
378  return ss.str();
379  }
380  virtual size_t GetWindowSize() const override
381  {
382  return 1;
383  }
385  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
386  {
387  OpenCLZone zone;
388  FormulaToken* ref = mFormulaTree->GetFormulaToken();
389  cl_uint hashCode = 0;
390  if (ref->GetType() != formula::svString)
391  {
392  throw Unhandled(__FILE__, __LINE__);
393  }
394 
395  const OUString s = ref->GetString().getString().toAsciiUpperCase();
396  hashCode = s.hashCode();
397 
398  // Pass the scalar result back to the rest of the formula kernel
399  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_uint: " << hashCode << "(" << DebugPeekData(ref) << ")" );
400  cl_int err = clSetKernelArg(k, argno, sizeof(cl_uint), static_cast<void*>(&hashCode));
401  if (CL_SUCCESS != err)
402  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
403  return 1;
404  }
405 };
406 
408 class DynamicKernelConstantArgument : public DynamicKernelArgument
409 {
410 public:
411  DynamicKernelConstantArgument( const ScCalcConfig& config, const std::string& s,
412  const FormulaTreeNodeRef& ft ) :
413  DynamicKernelArgument(config, s, ft) { }
415  virtual void GenDecl( std::stringstream& ss ) const override
416  {
417  ss << "double " << mSymName;
418  }
419  virtual void GenDeclRef( std::stringstream& ss ) const override
420  {
421  ss << mSymName;
422  }
423  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
424  {
425  GenDecl(ss);
426  }
427  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
428  {
429  if (GetFormulaToken()->GetType() != formula::svDouble)
430  throw Unhandled(__FILE__, __LINE__);
431  return mSymName;
432  }
433  virtual size_t GetWindowSize() const override
434  {
435  return 1;
436  }
437  double GetDouble() const
438  {
439  FormulaToken* Tok = GetFormulaToken();
440  if (Tok->GetType() != formula::svDouble)
441  throw Unhandled(__FILE__, __LINE__);
442  return Tok->GetDouble();
443  }
445  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
446  {
447  OpenCLZone zone;
448  double tmp = GetDouble();
449  // Pass the scalar result back to the rest of the formula kernel
450  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp);
451  cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
452  if (CL_SUCCESS != err)
453  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
454  return 1;
455  }
456 };
457 
458 class DynamicKernelPiArgument : public DynamicKernelArgument
459 {
460 public:
461  DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
462  const FormulaTreeNodeRef& ft ) :
463  DynamicKernelArgument(config, s, ft) { }
465  virtual void GenDecl( std::stringstream& ss ) const override
466  {
467  ss << "double " << mSymName;
468  }
469  virtual void GenDeclRef( std::stringstream& ss ) const override
470  {
471  ss << "3.14159265358979";
472  }
473  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
474  {
475  GenDecl(ss);
476  }
477  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
478  {
479  return mSymName;
480  }
481  virtual size_t GetWindowSize() const override
482  {
483  return 1;
484  }
486  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
487  {
488  OpenCLZone zone;
489  double tmp = 0.0;
490  // Pass the scalar result back to the rest of the formula kernel
491  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << tmp << " (PI)");
492  cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
493  if (CL_SUCCESS != err)
494  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
495  return 1;
496  }
497 };
498 
499 class DynamicKernelRandomArgument : public DynamicKernelArgument
500 {
501 public:
502  DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
503  const FormulaTreeNodeRef& ft ) :
504  DynamicKernelArgument(config, s, ft) { }
506  virtual void GenDecl( std::stringstream& ss ) const override
507  {
508  ss << "double " << mSymName;
509  }
510  virtual void GenDeclRef( std::stringstream& ss ) const override
511  {
512  ss << mSymName;
513  }
514  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
515  {
516  ss << "int " << mSymName;
517  }
518  virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
519  {
520  return mSymName + "_Random(" + mSymName + ")";
521  }
522  virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
523  {
524  // This string is from the pi_opencl_kernel.i file as
525  // generated when building the Random123 examples. Unused
526  // stuff has been removed, and the actual kernel is not the
527  // same as in the totally different use case of that example,
528  // of course. Only the code that calculates the counter-based
529  // random number and what it needs is left.
530  ss << "\
531 \n\
532 #ifndef DEFINED_RANDOM123_STUFF\n\
533 #define DEFINED_RANDOM123_STUFF\n\
534 \n\
535 /*\n\
536 Copyright 2010-2011, D. E. Shaw Research.\n\
537 All rights reserved.\n\
538 \n\
539 Redistribution and use in source and binary forms, with or without\n\
540 modification, are permitted provided that the following conditions are\n\
541 met:\n\
542 \n\
543 * Redistributions of source code must retain the above copyright\n\
544  notice, this list of conditions, and the following disclaimer.\n\
545 \n\
546 * Redistributions in binary form must reproduce the above copyright\n\
547  notice, this list of conditions, and the following disclaimer in the\n\
548  documentation and/or other materials provided with the distribution.\n\
549 \n\
550 * Neither the name of D. E. Shaw Research nor the names of its\n\
551  contributors may be used to endorse or promote products derived from\n\
552  this software without specific prior written permission.\n\
553 \n\
554 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
555 \"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
556 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
557 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
558 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
559 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
560 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
561 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
562 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
563 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
564 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
565 */\n\
566 \n\
567 typedef uint uint32_t;\n\
568 struct r123array2x32\n\
569 {\n\
570  uint32_t v[2];\n\
571 };\n\
572 enum r123_enum_threefry32x2\n\
573 {\n\
574  R_32x2_0_0 = 13,\n\
575  R_32x2_1_0 = 15,\n\
576  R_32x2_2_0 = 26,\n\
577  R_32x2_3_0 = 6,\n\
578  R_32x2_4_0 = 17,\n\
579  R_32x2_5_0 = 29,\n\
580  R_32x2_6_0 = 16,\n\
581  R_32x2_7_0 = 24\n\
582 };\n\
583 inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
584  __attribute__ ((always_inline));\n\
585 inline uint32_t\n\
586 RotL_32 (uint32_t x, unsigned int N)\n\
587 {\n\
588  return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
589 }\n\
590 \n\
591 typedef struct r123array2x32 threefry2x32_ctr_t;\n\
592 typedef struct r123array2x32 threefry2x32_key_t;\n\
593 typedef struct r123array2x32 threefry2x32_ukey_t;\n\
594 inline threefry2x32_key_t\n\
595 threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
596 {\n\
597  return uk;\n\
598 }\n\
599 \n\
600 inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
601  threefry2x32_ctr_t in,\n\
602  threefry2x32_key_t k)\n\
603  __attribute__ ((always_inline));\n\
604 inline threefry2x32_ctr_t\n\
605 threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
606  threefry2x32_key_t k)\n\
607 {\n\
608  threefry2x32_ctr_t X;\n\
609  uint32_t ks[2 + 1];\n\
610  int i;\n\
611  ks[2] = 0x1BD11BDA;\n\
612  for (i = 0; i < 2; i++) {\n\
613  ks[i] = k.v[i];\n\
614  X.v[i] = in.v[i];\n\
615  ks[2] ^= k.v[i];\n\
616  }\n\
617  X.v[0] += ks[0];\n\
618  X.v[1] += ks[1];\n\
619  if (Nrounds > 0) {\n\
620  X.v[0] += X.v[1];\n\
621  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
622  X.v[1] ^= X.v[0];\n\
623  }\n\
624  if (Nrounds > 1) {\n\
625  X.v[0] += X.v[1];\n\
626  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
627  X.v[1] ^= X.v[0];\n\
628  }\n\
629  if (Nrounds > 2) {\n\
630  X.v[0] += X.v[1];\n\
631  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
632  X.v[1] ^= X.v[0];\n\
633  }\n\
634  if (Nrounds > 3) {\n\
635  X.v[0] += X.v[1];\n\
636  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
637  X.v[1] ^= X.v[0];\n\
638  }\n\
639  if (Nrounds > 3) {\n\
640  X.v[0] += ks[1];\n\
641  X.v[1] += ks[2];\n\
642  X.v[1] += 1;\n\
643  }\n\
644  if (Nrounds > 4) {\n\
645  X.v[0] += X.v[1];\n\
646  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
647  X.v[1] ^= X.v[0];\n\
648  }\n\
649  if (Nrounds > 5) {\n\
650  X.v[0] += X.v[1];\n\
651  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
652  X.v[1] ^= X.v[0];\n\
653  }\n\
654  if (Nrounds > 6) {\n\
655  X.v[0] += X.v[1];\n\
656  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
657  X.v[1] ^= X.v[0];\n\
658  }\n\
659  if (Nrounds > 7) {\n\
660  X.v[0] += X.v[1];\n\
661  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
662  X.v[1] ^= X.v[0];\n\
663  }\n\
664  if (Nrounds > 7) {\n\
665  X.v[0] += ks[2];\n\
666  X.v[1] += ks[0];\n\
667  X.v[1] += 2;\n\
668  }\n\
669  if (Nrounds > 8) {\n\
670  X.v[0] += X.v[1];\n\
671  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
672  X.v[1] ^= X.v[0];\n\
673  }\n\
674  if (Nrounds > 9) {\n\
675  X.v[0] += X.v[1];\n\
676  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
677  X.v[1] ^= X.v[0];\n\
678  }\n\
679  if (Nrounds > 10) {\n\
680  X.v[0] += X.v[1];\n\
681  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
682  X.v[1] ^= X.v[0];\n\
683  }\n\
684  if (Nrounds > 11) {\n\
685  X.v[0] += X.v[1];\n\
686  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
687  X.v[1] ^= X.v[0];\n\
688  }\n\
689  if (Nrounds > 11) {\n\
690  X.v[0] += ks[0];\n\
691  X.v[1] += ks[1];\n\
692  X.v[1] += 3;\n\
693  }\n\
694  if (Nrounds > 12) {\n\
695  X.v[0] += X.v[1];\n\
696  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
697  X.v[1] ^= X.v[0];\n\
698  }\n\
699  if (Nrounds > 13) {\n\
700  X.v[0] += X.v[1];\n\
701  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
702  X.v[1] ^= X.v[0];\n\
703  }\n\
704  if (Nrounds > 14) {\n\
705  X.v[0] += X.v[1];\n\
706  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
707  X.v[1] ^= X.v[0];\n\
708  }\n\
709  if (Nrounds > 15) {\n\
710  X.v[0] += X.v[1];\n\
711  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
712  X.v[1] ^= X.v[0];\n\
713  }\n\
714  if (Nrounds > 15) {\n\
715  X.v[0] += ks[1];\n\
716  X.v[1] += ks[2];\n\
717  X.v[1] += 4;\n\
718  }\n\
719  if (Nrounds > 16) {\n\
720  X.v[0] += X.v[1];\n\
721  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
722  X.v[1] ^= X.v[0];\n\
723  }\n\
724  if (Nrounds > 17) {\n\
725  X.v[0] += X.v[1];\n\
726  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
727  X.v[1] ^= X.v[0];\n\
728  }\n\
729  if (Nrounds > 18) {\n\
730  X.v[0] += X.v[1];\n\
731  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
732  X.v[1] ^= X.v[0];\n\
733  }\n\
734  if (Nrounds > 19) {\n\
735  X.v[0] += X.v[1];\n\
736  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
737  X.v[1] ^= X.v[0];\n\
738  }\n\
739  if (Nrounds > 19) {\n\
740  X.v[0] += ks[2];\n\
741  X.v[1] += ks[0];\n\
742  X.v[1] += 5;\n\
743  }\n\
744  if (Nrounds > 20) {\n\
745  X.v[0] += X.v[1];\n\
746  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
747  X.v[1] ^= X.v[0];\n\
748  }\n\
749  if (Nrounds > 21) {\n\
750  X.v[0] += X.v[1];\n\
751  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
752  X.v[1] ^= X.v[0];\n\
753  }\n\
754  if (Nrounds > 22) {\n\
755  X.v[0] += X.v[1];\n\
756  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
757  X.v[1] ^= X.v[0];\n\
758  }\n\
759  if (Nrounds > 23) {\n\
760  X.v[0] += X.v[1];\n\
761  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
762  X.v[1] ^= X.v[0];\n\
763  }\n\
764  if (Nrounds > 23) {\n\
765  X.v[0] += ks[0];\n\
766  X.v[1] += ks[1];\n\
767  X.v[1] += 6;\n\
768  }\n\
769  if (Nrounds > 24) {\n\
770  X.v[0] += X.v[1];\n\
771  X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
772  X.v[1] ^= X.v[0];\n\
773  }\n\
774  if (Nrounds > 25) {\n\
775  X.v[0] += X.v[1];\n\
776  X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
777  X.v[1] ^= X.v[0];\n\
778  }\n\
779  if (Nrounds > 26) {\n\
780  X.v[0] += X.v[1];\n\
781  X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
782  X.v[1] ^= X.v[0];\n\
783  }\n\
784  if (Nrounds > 27) {\n\
785  X.v[0] += X.v[1];\n\
786  X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
787  X.v[1] ^= X.v[0];\n\
788  }\n\
789  if (Nrounds > 27) {\n\
790  X.v[0] += ks[1];\n\
791  X.v[1] += ks[2];\n\
792  X.v[1] += 7;\n\
793  }\n\
794  if (Nrounds > 28) {\n\
795  X.v[0] += X.v[1];\n\
796  X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
797  X.v[1] ^= X.v[0];\n\
798  }\n\
799  if (Nrounds > 29) {\n\
800  X.v[0] += X.v[1];\n\
801  X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
802  X.v[1] ^= X.v[0];\n\
803  }\n\
804  if (Nrounds > 30) {\n\
805  X.v[0] += X.v[1];\n\
806  X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
807  X.v[1] ^= X.v[0];\n\
808  }\n\
809  if (Nrounds > 31) {\n\
810  X.v[0] += X.v[1];\n\
811  X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
812  X.v[1] ^= X.v[0];\n\
813  }\n\
814  if (Nrounds > 31) {\n\
815  X.v[0] += ks[2];\n\
816  X.v[1] += ks[0];\n\
817  X.v[1] += 8;\n\
818  }\n\
819  return X;\n\
820 }\n\
821 \n\
822 enum r123_enum_threefry2x32\n\
823 { threefry2x32_rounds = 20 };\n\
824 inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
825  threefry2x32_key_t k)\n\
826  __attribute__ ((always_inline));\n\
827 inline threefry2x32_ctr_t\n\
828 threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
829 {\n\
830  return threefry2x32_R (threefry2x32_rounds, in, k);\n\
831 }\n\
832 #endif\n\
833 \n\
834 ";
835  ss << "double " << mSymName << "_Random (int seed)\n\
836 {\n\
837  unsigned tid = get_global_id(0);\n\
838  threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
839  threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
840  c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
841  const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
842  const double halffactor = 0.5*factor;\n\
843  return c.v[0] * factor + halffactor;\n\
844 }\n\
845 ";
846  }
847  virtual size_t GetWindowSize() const override
848  {
849  return 1;
850  }
852  virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
853  {
854  OpenCLZone zone;
855  cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32);
856  // Pass the scalar result back to the rest of the formula kernel
857  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed << "(RANDOM)");
858  cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), static_cast<void*>(&seed));
859  if (CL_SUCCESS != err)
860  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
861  return 1;
862  }
863 };
864 
866 class DynamicKernelStringArgument : public VectorRef
867 {
868 public:
869  DynamicKernelStringArgument( const ScCalcConfig& config, const std::string& s,
870  const FormulaTreeNodeRef& ft, int index = 0 ) :
871  VectorRef(config, s, ft, index) { }
872 
873  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
875  virtual void GenDecl( std::stringstream& ss ) const override
876  {
877  ss << "__global unsigned int *" << mSymName;
878  }
879  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
880  {
881  DynamicKernelStringArgument::GenDecl(ss);
882  }
883  virtual size_t Marshal( cl_kernel, int, int, cl_program ) override;
884 };
885 
886 }
887 
889 size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
890 {
891  OpenCLZone zone;
892  FormulaToken* ref = mFormulaTree->GetFormulaToken();
893 
896  cl_int err;
898  size_t nStrings = 0;
899  if (ref->GetType() == formula::svSingleVectorRef)
900  {
901  const formula::SingleVectorRefToken* pSVR =
902  static_cast<const formula::SingleVectorRefToken*>(ref);
903  nStrings = pSVR->GetArrayLength();
904  vRef = pSVR->GetArray();
905  }
906  else if (ref->GetType() == formula::svDoubleVectorRef)
907  {
908  const formula::DoubleVectorRefToken* pDVR =
909  static_cast<const formula::DoubleVectorRefToken*>(ref);
910  nStrings = pDVR->GetArrayLength();
911  vRef = pDVR->GetArrays()[mnIndex];
912  }
913  size_t szHostBuffer = nStrings * sizeof(cl_int);
914  cl_uint* pHashBuffer = nullptr;
915 
916  if (vRef.mpStringArray != nullptr)
917  {
918  // Marshal strings. Right now we pass hashes of these string
919  mpClmem = clCreateBuffer(kEnv.mpkContext,
920  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
921  szHostBuffer, nullptr, &err);
922  if (CL_SUCCESS != err)
923  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
924  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
925 
926  pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
927  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
928  szHostBuffer, 0, nullptr, nullptr, &err));
929  if (CL_SUCCESS != err)
930  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
931 
932  for (size_t i = 0; i < nStrings; i++)
933  {
934  if (vRef.mpStringArray[i])
935  {
936  const OUString tmp(vRef.mpStringArray[i]);
937  pHashBuffer[i] = tmp.hashCode();
938  }
939  else
940  {
941  pHashBuffer[i] = 0;
942  }
943  }
944  }
945  else
946  {
947  if (nStrings == 0)
948  szHostBuffer = sizeof(cl_int); // a dummy small value
949  // Marshal as a buffer of NANs
950  mpClmem = clCreateBuffer(kEnv.mpkContext,
951  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
952  szHostBuffer, nullptr, &err);
953  if (CL_SUCCESS != err)
954  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
955  SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
956 
957  pHashBuffer = static_cast<cl_uint*>(clEnqueueMapBuffer(
958  kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
959  szHostBuffer, 0, nullptr, nullptr, &err));
960  if (CL_SUCCESS != err)
961  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
962 
963  for (size_t i = 0; i < szHostBuffer / sizeof(cl_int); i++)
964  pHashBuffer[i] = 0;
965  }
966  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
967  pHashBuffer, 0, nullptr, nullptr);
968  if (CL_SUCCESS != err)
969  throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
970 
971  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref,mnIndex) << ")");
972  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
973  if (CL_SUCCESS != err)
974  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
975  return 1;
976 }
977 
978 namespace {
979 
981 class DynamicKernelMixedArgument : public VectorRef
982 {
983 public:
984  DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
985  const FormulaTreeNodeRef& ft ) :
986  VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
987  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
988  {
989  VectorRef::GenSlidingWindowDecl(ss);
990  ss << ", ";
991  mStringArgument.GenSlidingWindowDecl(ss);
992  }
993  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
995  virtual void GenDecl( std::stringstream& ss ) const override
996  {
997  VectorRef::GenDecl(ss);
998  ss << ", ";
999  mStringArgument.GenDecl(ss);
1000  }
1001  virtual void GenDeclRef( std::stringstream& ss ) const override
1002  {
1003  VectorRef::GenDeclRef(ss);
1004  ss << ",";
1005  mStringArgument.GenDeclRef(ss);
1006  }
1007  virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
1008  {
1009  std::stringstream ss;
1010  ss << "(!isnan(" << VectorRef::GenSlidingWindowDeclRef();
1011  ss << ")?" << VectorRef::GenSlidingWindowDeclRef();
1012  ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1013  ss << ")";
1014  return ss.str();
1015  }
1016  virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
1017  {
1018  std::stringstream ss;
1019  ss << VectorRef::GenSlidingWindowDeclRef();
1020  return ss.str();
1021  }
1022  virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
1023  {
1024  std::stringstream ss;
1025  ss << mStringArgument.GenSlidingWindowDeclRef();
1026  return ss.str();
1027  }
1028  virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
1029  {
1030  int i = VectorRef::Marshal(k, argno, vw, p);
1031  i += mStringArgument.Marshal(k, argno + i, vw, p);
1032  return i;
1033  }
1034 
1035 protected:
1036  DynamicKernelStringArgument mStringArgument;
1037 };
1038 
1042 template<class Base>
1043 class DynamicKernelSlidingArgument : public Base
1044 {
1045 public:
1046  DynamicKernelSlidingArgument(const ScCalcConfig& config, const std::string& s,
1047  const FormulaTreeNodeRef& ft,
1048  const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
1049  : Base(config, s, ft, index)
1050  , mpCodeGen(CodeGen)
1051  {
1052  FormulaToken* t = ft->GetFormulaToken();
1053  if (t->GetType() != formula::svDoubleVectorRef)
1054  throw Unhandled(__FILE__, __LINE__);
1055  mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1058  }
1059 
1060  // Should only be called by SumIfs. Yikes!
1061  virtual bool NeedParallelReduction() const
1062  {
1063  assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
1064  return GetWindowSize() > 100 &&
1065  ((GetStartFixed() && GetEndFixed()) ||
1066  (!GetStartFixed() && !GetEndFixed()));
1067  }
1068 
1069  virtual void GenSlidingWindowFunction( std::stringstream& ) { }
1070 
1071  std::string GenSlidingWindowDeclRef( bool nested = false ) const
1072  {
1073  size_t nArrayLength = mpDVR->GetArrayLength();
1074  std::stringstream ss;
1075  if (!bIsStartFixed && !bIsEndFixed)
1076  {
1077  if (nested)
1078  ss << "((i+gid0) <" << nArrayLength << "?";
1079  ss << Base::GetName() << "[i + gid0]";
1080  if (nested)
1081  ss << ":NAN)";
1082  }
1083  else
1084  {
1085  if (nested)
1086  ss << "(i <" << nArrayLength << "?";
1087  ss << Base::GetName() << "[i]";
1088  if (nested)
1089  ss << ":NAN)";
1090  }
1091  return ss.str();
1092  }
1094  size_t GenReductionLoopHeader(
1095  std::stringstream& ss, bool& needBody )
1096  {
1097  assert(mpDVR);
1098  size_t nCurWindowSize = mpDVR->GetRefRowSize();
1099 
1100  {
1101  if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1102  {
1103  ss << "for (int i = ";
1104  ss << "gid0; i < " << mpDVR->GetArrayLength();
1105  ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
1106  needBody = true;
1107  return nCurWindowSize;
1108  }
1109  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1110  {
1111  ss << "for (int i = ";
1112  ss << "0; i < " << mpDVR->GetArrayLength();
1113  ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
1114  needBody = true;
1115  return nCurWindowSize;
1116  }
1117  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1118  {
1119  ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1120  ss << "{int i;\n\t";
1121  std::stringstream temp1, temp2;
1122  int outLoopSize = UNROLLING_FACTOR;
1123  if (nCurWindowSize / outLoopSize != 0)
1124  {
1125  ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1126  for (int count = 0; count < outLoopSize; count++)
1127  {
1128  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1129  if (count == 0)
1130  {
1131  temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1132  temp1 << "){\n\t\t";
1133  temp1 << "tmp = legalize(";
1134  temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1135  temp1 << ", tmp);\n\t\t\t";
1136  temp1 << "}\n\t";
1137  }
1138  ss << temp1.str();
1139  }
1140  ss << "}\n\t";
1141  }
1142  // The residual of mod outLoopSize
1143  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1144  {
1145  ss << "i = " << count << ";\n\t";
1146  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1147  {
1148  temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1149  temp2 << "){\n\t\t";
1150  temp2 << "tmp = legalize(";
1151  temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1152  temp2 << ", tmp);\n\t\t\t";
1153  temp2 << "}\n\t";
1154  }
1155  ss << temp2.str();
1156  }
1157  ss << "}\n";
1158  needBody = false;
1159  return nCurWindowSize;
1160  }
1161  // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1162  else
1163  {
1164  ss << "\n\t";
1165  ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1166  ss << "{int i;\n\t";
1167  std::stringstream temp1, temp2;
1168  int outLoopSize = UNROLLING_FACTOR;
1169  if (nCurWindowSize / outLoopSize != 0)
1170  {
1171  ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1172  for (int count = 0; count < outLoopSize; count++)
1173  {
1174  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1175  if (count == 0)
1176  {
1177  temp1 << "if(i < " << mpDVR->GetArrayLength();
1178  temp1 << "){\n\t\t";
1179  temp1 << "tmp = legalize(";
1180  temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1181  temp1 << ", tmp);\n\t\t\t";
1182  temp1 << "}\n\t";
1183  }
1184  ss << temp1.str();
1185  }
1186  ss << "}\n\t";
1187  }
1188  // The residual of mod outLoopSize
1189  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1190  {
1191  ss << "i = " << count << ";\n\t";
1192  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1193  {
1194  temp2 << "if(i < " << mpDVR->GetArrayLength();
1195  temp2 << "){\n\t\t";
1196  temp2 << "tmp = legalize(";
1197  temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1198  temp2 << ", tmp);\n\t\t\t";
1199  temp2 << "}\n\t";
1200  }
1201  ss << temp2.str();
1202  }
1203  ss << "}\n";
1204  needBody = false;
1205  return nCurWindowSize;
1206  }
1207  }
1208  }
1209 
1210  size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1211 
1212  size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1213 
1214  bool GetStartFixed() const { return bIsStartFixed; }
1215 
1216  bool GetEndFixed() const { return bIsEndFixed; }
1217 
1218 protected:
1221  // from parent nodes
1222  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
1223 };
1224 
1226 class DynamicKernelMixedSlidingArgument : public VectorRef
1227 {
1228 public:
1229  DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
1230  const FormulaTreeNodeRef& ft, const std::shared_ptr<SlidingFunctionBase>& CodeGen,
1231  int index ) :
1232  VectorRef(config, s, ft),
1233  mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
1234  mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
1235  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
1236  {
1237  mDoubleArgument.GenSlidingWindowDecl(ss);
1238  ss << ", ";
1239  mStringArgument.GenSlidingWindowDecl(ss);
1240  }
1241  virtual void GenSlidingWindowFunction( std::stringstream& ) override { }
1243  virtual void GenDecl( std::stringstream& ss ) const override
1244  {
1245  mDoubleArgument.GenDecl(ss);
1246  ss << ", ";
1247  mStringArgument.GenDecl(ss);
1248  }
1249  virtual void GenDeclRef( std::stringstream& ss ) const override
1250  {
1251  mDoubleArgument.GenDeclRef(ss);
1252  ss << ",";
1253  mStringArgument.GenDeclRef(ss);
1254  }
1255  virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
1256  {
1257  std::stringstream ss;
1258  ss << "(!isnan(" << mDoubleArgument.GenSlidingWindowDeclRef();
1259  ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef();
1260  ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1261  ss << ")";
1262  return ss.str();
1263  }
1264  virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
1265  {
1266  std::stringstream ss;
1267  ss << mDoubleArgument.GenSlidingWindowDeclRef();
1268  return ss.str();
1269  }
1270  virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
1271  {
1272  std::stringstream ss;
1273  ss << mStringArgument.GenSlidingWindowDeclRef();
1274  return ss.str();
1275  }
1276  virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
1277  {
1278  int i = mDoubleArgument.Marshal(k, argno, vw, p);
1279  i += mStringArgument.Marshal(k, argno + i, vw, p);
1280  return i;
1281  }
1282 
1283 protected:
1284  DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
1285  DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
1286 };
1287 
1289 class SymbolTable
1290 {
1291 public:
1292  typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1293  // This avoids instability caused by using pointer as the key type
1294  SymbolTable() : mCurId(0) { }
1295  template <class T>
1296  const DynamicKernelArgument* DeclRefArg(const ScCalcConfig& config, const FormulaTreeNodeRef&,
1297  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize);
1299  void DumpSlidingWindowFunctions( std::stringstream& ss )
1300  {
1301  for (auto const& argument : mParams)
1302  {
1303  argument->GenSlidingWindowFunction(ss);
1304  ss << "\n";
1305  }
1306  }
1309  void Marshal( cl_kernel, int, cl_program );
1310 
1311 private:
1312  unsigned int mCurId;
1313  ArgumentMap mSymbols;
1314  std::vector<DynamicKernelArgumentRef> mParams;
1315 };
1316 
1317 }
1318 
1319 void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
1320 {
1321  int i = 1; //The first argument is reserved for results
1322  for (auto const& argument : mParams)
1323  {
1324  i += argument->Marshal(k, i, nVectorWidth, pProgram);
1325  }
1326 }
1327 
1328 namespace {
1329 
1332 template<class Base>
1333 class ParallelReductionVectorRef : public Base
1334 {
1335 public:
1336  ParallelReductionVectorRef(const ScCalcConfig& config, const std::string& s,
1337  const FormulaTreeNodeRef& ft,
1338  const std::shared_ptr<SlidingFunctionBase>& CodeGen, int index)
1339  : Base(config, s, ft, index)
1340  , mpCodeGen(CodeGen)
1341  , mpClmem2(nullptr)
1342  {
1343  FormulaToken* t = ft->GetFormulaToken();
1344  if (t->GetType() != formula::svDoubleVectorRef)
1345  throw Unhandled(__FILE__, __LINE__);
1346  mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1349  }
1350 
1352  virtual void GenSlidingWindowFunction( std::stringstream& ss );
1353 
1354  virtual std::string GenSlidingWindowDeclRef( bool ) const
1355  {
1356  std::stringstream ss;
1357  if (!bIsStartFixed && !bIsEndFixed)
1358  ss << Base::GetName() << "[i + gid0]";
1359  else
1360  ss << Base::GetName() << "[i]";
1361  return ss.str();
1362  }
1363 
1365  size_t GenReductionLoopHeader(
1366  std::stringstream& ss, int nResultSize, bool& needBody );
1367 
1368  virtual size_t Marshal( cl_kernel k, int argno, int w, cl_program mpProgram );
1369 
1370  ~ParallelReductionVectorRef()
1371  {
1372  if (mpClmem2)
1373  {
1374  cl_int err;
1375  err = clReleaseMemObject(mpClmem2);
1376  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
1377  mpClmem2 = nullptr;
1378  }
1379  }
1380 
1381  size_t GetArrayLength() const { return mpDVR->GetArrayLength(); }
1382 
1383  size_t GetWindowSize() const { return mpDVR->GetRefRowSize(); }
1384 
1385  bool GetStartFixed() const { return bIsStartFixed; }
1386 
1387  bool GetEndFixed() const { return bIsEndFixed; }
1388 
1389 protected:
1390  bool bIsStartFixed, bIsEndFixed;
1392  // from parent nodes
1393  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
1394  // controls whether to invoke the reduction kernel during marshaling or not
1395  cl_mem mpClmem2;
1396 };
1397 
1398 class Reduction : public SlidingFunctionBase
1399 {
1401 public:
1402  explicit Reduction(int nResultSize) : mnResultSize(nResultSize) {}
1403 
1404  typedef DynamicKernelSlidingArgument<VectorRef> NumericRange;
1405  typedef DynamicKernelSlidingArgument<DynamicKernelStringArgument> StringRange;
1406  typedef ParallelReductionVectorRef<VectorRef> ParallelNumericRange;
1407 
1408  virtual bool HandleNaNArgument( std::stringstream&, unsigned, SubArguments& ) const
1409  {
1410  return false;
1411  }
1412 
1413  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1414  const std::string& sSymName, SubArguments& vSubArguments ) override
1415  {
1416  ss << "\ndouble " << sSymName;
1417  ss << "_" << BinFuncName() << "(";
1418  for (size_t i = 0; i < vSubArguments.size(); i++)
1419  {
1420  if (i)
1421  ss << ", ";
1422  vSubArguments[i]->GenSlidingWindowDecl(ss);
1423  }
1424  ss << ") {\n";
1425  ss << "double tmp = " << GetBottom() << ";\n";
1426  ss << "int gid0 = get_global_id(0);\n";
1427  if (isAverage() || isMinOrMax())
1428  ss << "int nCount = 0;\n";
1429  ss << "double tmpBottom;\n";
1430  unsigned i = vSubArguments.size();
1431  while (i--)
1432  {
1433  if (NumericRange* NR =
1434  dynamic_cast<NumericRange*>(vSubArguments[i].get()))
1435  {
1436  bool needBody;
1437  NR->GenReductionLoopHeader(ss, needBody);
1438  if (!needBody)
1439  continue;
1440  }
1441  else if (ParallelNumericRange* PNR =
1442  dynamic_cast<ParallelNumericRange*>(vSubArguments[i].get()))
1443  {
1444  //did not handle yet
1445  bool bNeedBody = false;
1446  PNR->GenReductionLoopHeader(ss, mnResultSize, bNeedBody);
1447  if (!bNeedBody)
1448  continue;
1449  }
1450  else if (StringRange* SR =
1451  dynamic_cast<StringRange*>(vSubArguments[i].get()))
1452  {
1453  //did not handle yet
1454  bool needBody;
1455  SR->GenReductionLoopHeader(ss, needBody);
1456  if (!needBody)
1457  continue;
1458  }
1459  else
1460  {
1461  FormulaToken* pCur = vSubArguments[i]->GetFormulaToken();
1462  assert(pCur);
1464 
1465  if (pCur->GetType() == formula::svSingleVectorRef ||
1466  pCur->GetType() == formula::svDouble)
1467  {
1468  ss << "{\n";
1469  }
1470  }
1471  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1472  {
1473  bool bNanHandled = HandleNaNArgument(ss, i, vSubArguments);
1474 
1475  ss << "tmpBottom = " << GetBottom() << ";\n";
1476 
1477  if (!bNanHandled)
1478  {
1479  ss << "if (isnan(";
1480  ss << vSubArguments[i]->GenSlidingWindowDeclRef();
1481  ss << "))\n";
1482  if (ZeroReturnZero())
1483  ss << " return 0;\n";
1484  else
1485  {
1486  ss << " tmp = ";
1487  ss << Gen2("tmpBottom", "tmp") << ";\n";
1488  }
1489  ss << "else\n";
1490  }
1491  ss << "{";
1492  ss << " tmp = ";
1493  ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1494  ss << ";\n";
1495  ss << " }\n";
1496  ss << "}\n";
1497  }
1498  else
1499  {
1500  ss << "tmp = ";
1501  ss << Gen2(vSubArguments[i]->GenSlidingWindowDeclRef(), "tmp");
1502  ss << ";\n";
1503  }
1504  }
1505  if (isAverage())
1506  ss <<
1507  "if (nCount==0)\n"
1508  " return CreateDoubleError(DivisionByZero);\n";
1509  else if (isMinOrMax())
1510  ss <<
1511  "if (nCount==0)\n"
1512  " return 0;\n";
1513  ss << "return tmp";
1514  if (isAverage())
1515  ss << "*pow((double)nCount,-1.0)";
1516  ss << ";\n}";
1517  }
1518  virtual bool isAverage() const { return false; }
1519  virtual bool isMinOrMax() const { return false; }
1520  virtual bool takeString() const override { return false; }
1521  virtual bool takeNumeric() const override { return true; }
1522 };
1523 
1524 // Strictly binary operators
1525 class Binary : public SlidingFunctionBase
1526 {
1527 public:
1528  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1529  const std::string& sSymName, SubArguments& vSubArguments ) override
1530  {
1531  ss << "\ndouble " << sSymName;
1532  ss << "_" << BinFuncName() << "(";
1533  assert(vSubArguments.size() == 2);
1534  for (size_t i = 0; i < vSubArguments.size(); i++)
1535  {
1536  if (i)
1537  ss << ", ";
1538  vSubArguments[i]->GenSlidingWindowDecl(ss);
1539  }
1540  ss << ") {\n\t";
1541  ss << "int gid0 = get_global_id(0), i = 0;\n\t";
1542  ss << "double tmp = ";
1543  ss << Gen2(vSubArguments[0]->GenSlidingWindowDeclRef(),
1544  vSubArguments[1]->GenSlidingWindowDeclRef()) << ";\n\t";
1545  ss << "return tmp;\n}";
1546  }
1547  virtual bool takeString() const override { return true; }
1548  virtual bool takeNumeric() const override { return true; }
1549 };
1550 
1551 class SumOfProduct : public SlidingFunctionBase
1552 {
1553 public:
1554  virtual void GenSlidingWindowFunction( std::stringstream& ss,
1555  const std::string& sSymName, SubArguments& vSubArguments ) override
1556  {
1557  size_t nCurWindowSize = 0;
1558  FormulaToken* tmpCur = nullptr;
1559  const formula::DoubleVectorRefToken* pCurDVR = nullptr;
1560  ss << "\ndouble " << sSymName;
1561  ss << "_" << BinFuncName() << "(";
1562  for (size_t i = 0; i < vSubArguments.size(); i++)
1563  {
1564  if (i)
1565  ss << ",";
1566  vSubArguments[i]->GenSlidingWindowDecl(ss);
1567  size_t nCurChildWindowSize = vSubArguments[i]->GetWindowSize();
1568  nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1569  nCurChildWindowSize : nCurWindowSize;
1570  tmpCur = vSubArguments[i]->GetFormulaToken();
1571  if (ocPush == tmpCur->GetOpCode())
1572  {
1573 
1574  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1575  if (pCurDVR->IsStartFixed() != pCurDVR->IsEndFixed())
1576  throw Unhandled(__FILE__, __LINE__);
1577  }
1578  }
1579  ss << ") {\n";
1580  ss << " double tmp = 0.0;\n";
1581  ss << " int gid0 = get_global_id(0);\n";
1582 
1583  ss << "\tint i;\n\t";
1584  ss << "int currentCount0;\n";
1585  for (size_t i = 0; i < vSubArguments.size() - 1; i++)
1586  ss << "int currentCount" << i + 1 << ";\n";
1587  std::stringstream temp3, temp4;
1588  int outLoopSize = UNROLLING_FACTOR;
1589  if (nCurWindowSize / outLoopSize != 0)
1590  {
1591  ss << "for(int outLoop=0; outLoop<" <<
1592  nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1593  for (int count = 0; count < outLoopSize; count++)
1594  {
1595  ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n";
1596  if (count == 0)
1597  {
1598  for (size_t i = 0; i < vSubArguments.size(); i++)
1599  {
1600  tmpCur = vSubArguments[i]->GetFormulaToken();
1601  if (ocPush == tmpCur->GetOpCode())
1602  {
1603  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1604  if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1605  {
1606  temp3 << " currentCount";
1607  temp3 << i;
1608  temp3 << " =i+gid0+1;\n";
1609  }
1610  else
1611  {
1612  temp3 << " currentCount";
1613  temp3 << i;
1614  temp3 << " =i+1;\n";
1615  }
1616  }
1617  }
1618 
1619  temp3 << "tmp = fsum(";
1620  for (size_t i = 0; i < vSubArguments.size(); i++)
1621  {
1622  if (i)
1623  temp3 << "*";
1624  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1625  {
1626  temp3 << "(";
1627  temp3 << "(currentCount";
1628  temp3 << i;
1629  temp3 << ">";
1630  if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1632  {
1633  const formula::SingleVectorRefToken* pSVR =
1634  static_cast<const formula::SingleVectorRefToken*>
1635  (vSubArguments[i]->GetFormulaToken());
1636  temp3 << pSVR->GetArrayLength();
1637  temp3 << ")||isnan(" << vSubArguments[i]
1638  ->GenSlidingWindowDeclRef();
1639  temp3 << ")?0:";
1640  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef();
1641  temp3 << ")";
1642  }
1643  else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1645  {
1646  const formula::DoubleVectorRefToken* pSVR =
1647  static_cast<const formula::DoubleVectorRefToken*>
1648  (vSubArguments[i]->GetFormulaToken());
1649  temp3 << pSVR->GetArrayLength();
1650  temp3 << ")||isnan(" << vSubArguments[i]
1651  ->GenSlidingWindowDeclRef(true);
1652  temp3 << ")?0:";
1653  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1654  temp3 << ")";
1655  }
1656 
1657  }
1658  else
1659  temp3 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1660  }
1661  temp3 << ", tmp);\n\t";
1662  }
1663  ss << temp3.str();
1664  }
1665  ss << "}\n\t";
1666  }
1667  //The residual of mod outLoopSize
1668  for (size_t count = nCurWindowSize / outLoopSize * outLoopSize;
1669  count < nCurWindowSize; count++)
1670  {
1671  ss << "i =" << count << ";\n";
1672  if (count == nCurWindowSize / outLoopSize * outLoopSize)
1673  {
1674  for (size_t i = 0; i < vSubArguments.size(); i++)
1675  {
1676  tmpCur = vSubArguments[i]->GetFormulaToken();
1677  if (ocPush == tmpCur->GetOpCode())
1678  {
1679  pCurDVR = static_cast<const formula::DoubleVectorRefToken*>(tmpCur);
1680  if (!pCurDVR->IsStartFixed() && !pCurDVR->IsEndFixed())
1681  {
1682  temp4 << " currentCount";
1683  temp4 << i;
1684  temp4 << " =i+gid0+1;\n";
1685  }
1686  else
1687  {
1688  temp4 << " currentCount";
1689  temp4 << i;
1690  temp4 << " =i+1;\n";
1691  }
1692  }
1693  }
1694 
1695  temp4 << "tmp = fsum(";
1696  for (size_t i = 0; i < vSubArguments.size(); i++)
1697  {
1698  if (i)
1699  temp4 << "*";
1700  if (ocPush == vSubArguments[i]->GetFormulaToken()->GetOpCode())
1701  {
1702  temp4 << "(";
1703  temp4 << "(currentCount";
1704  temp4 << i;
1705  temp4 << ">";
1706  if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1708  {
1709  const formula::SingleVectorRefToken* pSVR =
1710  static_cast<const formula::SingleVectorRefToken*>
1711  (vSubArguments[i]->GetFormulaToken());
1712  temp4 << pSVR->GetArrayLength();
1713  temp4 << ")||isnan(" << vSubArguments[i]
1714  ->GenSlidingWindowDeclRef();
1715  temp4 << ")?0:";
1716  temp4 << vSubArguments[i]->GenSlidingWindowDeclRef();
1717  temp4 << ")";
1718  }
1719  else if (vSubArguments[i]->GetFormulaToken()->GetType() ==
1721  {
1722  const formula::DoubleVectorRefToken* pSVR =
1723  static_cast<const formula::DoubleVectorRefToken*>
1724  (vSubArguments[i]->GetFormulaToken());
1725  temp4 << pSVR->GetArrayLength();
1726  temp4 << ")||isnan(" << vSubArguments[i]
1727  ->GenSlidingWindowDeclRef(true);
1728  temp4 << ")?0:";
1729  temp4 << vSubArguments[i]->GenSlidingWindowDeclRef(true);
1730  temp4 << ")";
1731  }
1732 
1733  }
1734  else
1735  {
1736  temp4 << vSubArguments[i]
1737  ->GenSlidingWindowDeclRef(true);
1738  }
1739  }
1740  temp4 << ", tmp);\n\t";
1741  }
1742  ss << temp4.str();
1743  }
1744  ss << "return tmp;\n";
1745  ss << "}";
1746  }
1747  virtual bool takeString() const override { return false; }
1748  virtual bool takeNumeric() const override { return true; }
1749 };
1750 
1752 class OpNop : public Reduction
1753 {
1754 public:
1755  explicit OpNop(int nResultSize) : Reduction(nResultSize) {}
1756 
1757  virtual std::string GetBottom() override { return "0"; }
1758  virtual std::string Gen2( const std::string& lhs, const std::string& ) const override
1759  {
1760  return lhs;
1761  }
1762  virtual std::string BinFuncName() const override { return "nop"; }
1763 };
1764 
1765 class OpCount : public Reduction
1766 {
1767 public:
1768  explicit OpCount(int nResultSize) : Reduction(nResultSize) {}
1769 
1770  virtual std::string GetBottom() override { return "0"; }
1771  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1772  {
1773  std::stringstream ss;
1774  ss << "(isnan(" << lhs << ")?" << rhs << ":" << rhs << "+1.0)";
1775  return ss.str();
1776  }
1777  virtual std::string BinFuncName() const override { return "fcount"; }
1778  virtual bool canHandleMultiVector() const override { return true; }
1779 };
1780 
1781 class OpEqual : public Binary
1782 {
1783 public:
1784  virtual std::string GetBottom() override { return "0"; }
1785  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1786  {
1787  std::stringstream ss;
1788  ss << "strequal(" << lhs << "," << rhs << ")";
1789  return ss.str();
1790  }
1791  virtual std::string BinFuncName() const override { return "eq"; }
1792 };
1793 
1794 class OpLessEqual : public Binary
1795 {
1796 public:
1797  virtual std::string GetBottom() override { return "0"; }
1798  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1799  {
1800  std::stringstream ss;
1801  ss << "(" << lhs << "<=" << rhs << ")";
1802  return ss.str();
1803  }
1804  virtual std::string BinFuncName() const override { return "leq"; }
1805 };
1806 
1807 class OpLess : public Binary
1808 {
1809 public:
1810  virtual std::string GetBottom() override { return "0"; }
1811  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1812  {
1813  std::stringstream ss;
1814  ss << "(" << lhs << "<" << rhs << ")";
1815  return ss.str();
1816  }
1817  virtual std::string BinFuncName() const override { return "less"; }
1818 };
1819 
1820 class OpGreater : public Binary
1821 {
1822 public:
1823  virtual std::string GetBottom() override { return "0"; }
1824  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1825  {
1826  std::stringstream ss;
1827  ss << "(" << lhs << ">" << rhs << ")";
1828  return ss.str();
1829  }
1830  virtual std::string BinFuncName() const override { return "gt"; }
1831 };
1832 
1833 class OpSum : public Reduction
1834 {
1835 public:
1836  explicit OpSum(int nResultSize) : Reduction(nResultSize) {}
1837 
1838  virtual std::string GetBottom() override { return "0"; }
1839  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1840  {
1841  std::stringstream ss;
1842  ss << "fsum_approx((" << lhs << "),(" << rhs << "))";
1843  return ss.str();
1844  }
1845  virtual std::string BinFuncName() const override { return "fsum"; }
1846  // All arguments are simply summed, so it doesn't matter if SvDoubleVector is split.
1847  virtual bool canHandleMultiVector() const override { return true; }
1848 };
1849 
1850 class OpAverage : public Reduction
1851 {
1852 public:
1853  explicit OpAverage(int nResultSize) : Reduction(nResultSize) {}
1854 
1855  virtual std::string GetBottom() override { return "0"; }
1856  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1857  {
1858  std::stringstream ss;
1859  ss << "fsum_count(" << lhs << "," << rhs << ", &nCount)";
1860  return ss.str();
1861  }
1862  virtual std::string BinFuncName() const override { return "average"; }
1863  virtual bool isAverage() const override { return true; }
1864  virtual bool canHandleMultiVector() const override { return true; }
1865 };
1866 
1867 class OpSub : public Reduction
1868 {
1869 public:
1870  explicit OpSub(int nResultSize) : Reduction(nResultSize) {}
1871 
1872  virtual std::string GetBottom() override { return "0"; }
1873  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1874  {
1875  return "fsub_approx(" + lhs + "," + rhs + ")";
1876  }
1877  virtual std::string BinFuncName() const override { return "fsub"; }
1878 };
1879 
1880 class OpMul : public Reduction
1881 {
1882 public:
1883  explicit OpMul(int nResultSize) : Reduction(nResultSize) {}
1884 
1885  virtual std::string GetBottom() override { return "1"; }
1886  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1887  {
1888  return lhs + "*" + rhs;
1889  }
1890  virtual std::string BinFuncName() const override { return "fmul"; }
1891  virtual bool ZeroReturnZero() override { return true; }
1892 };
1893 
1895 class OpDiv : public Reduction
1896 {
1897 public:
1898  explicit OpDiv(int nResultSize) : Reduction(nResultSize) {}
1899 
1900  virtual std::string GetBottom() override { return "1.0"; }
1901  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1902  {
1903  return "(" + rhs + "==0 ? CreateDoubleError(DivisionByZero) : (" + lhs + "/" + rhs + ") )";
1904  }
1905  virtual std::string BinFuncName() const override { return "fdiv"; }
1906 
1907  virtual bool HandleNaNArgument( std::stringstream& ss, unsigned argno, SubArguments& vSubArguments ) const override
1908  {
1909  if (argno == 1)
1910  {
1911  ss <<
1912  "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ")) {\n"
1913  " return CreateDoubleError(DivisionByZero);\n"
1914  "}\n";
1915  return true;
1916  }
1917  else if (argno == 0)
1918  {
1919  ss <<
1920  "if (isnan(" << vSubArguments[argno]->GenSlidingWindowDeclRef() << ") &&\n"
1921  " !(isnan(" << vSubArguments[1]->GenSlidingWindowDeclRef() << ") || " << vSubArguments[1]->GenSlidingWindowDeclRef() << " == 0)) {\n"
1922  " return 0;\n"
1923  "}\n";
1924  }
1925  return false;
1926  }
1927 
1928 };
1929 
1930 class OpMin : public Reduction
1931 {
1932 public:
1933  explicit OpMin(int nResultSize) : Reduction(nResultSize) {}
1934 
1935  virtual std::string GetBottom() override { return "NAN"; }
1936  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1937  {
1938  return "fmin_count(" + lhs + "," + rhs + ", &nCount)";
1939  }
1940  virtual std::string BinFuncName() const override { return "min"; }
1941  virtual bool isMinOrMax() const override { return true; }
1942  virtual bool canHandleMultiVector() const override { return true; }
1943 };
1944 
1945 class OpMax : public Reduction
1946 {
1947 public:
1948  explicit OpMax(int nResultSize) : Reduction(nResultSize) {}
1949 
1950  virtual std::string GetBottom() override { return "NAN"; }
1951  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1952  {
1953  return "fmax_count(" + lhs + "," + rhs + ", &nCount)";
1954  }
1955  virtual std::string BinFuncName() const override { return "max"; }
1956  virtual bool isMinOrMax() const override { return true; }
1957  virtual bool canHandleMultiVector() const override { return true; }
1958 };
1959 
1960 class OpSumProduct : public SumOfProduct
1961 {
1962 public:
1963  virtual std::string GetBottom() override { return "0"; }
1964  virtual std::string Gen2( const std::string& lhs, const std::string& rhs ) const override
1965  {
1966  return lhs + "*" + rhs;
1967  }
1968  virtual std::string BinFuncName() const override { return "fsop"; }
1969 };
1970 
1971 template<class Base>
1972 void ParallelReductionVectorRef<Base>::GenSlidingWindowFunction( std::stringstream& ss )
1973 {
1974  if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1975  {
1976  std::string name = Base::GetName();
1977  ss << "__kernel void " << name;
1978  ss << "_reduction(__global double* A, "
1979  "__global double *result,int arrayLength,int windowSize){\n";
1980  ss << " double tmp, current_result =" <<
1981  mpCodeGen->GetBottom();
1982  ss << ";\n";
1983  ss << " int writePos = get_group_id(1);\n";
1984  ss << " int lidx = get_local_id(0);\n";
1985  ss << " __local double shm_buf[256];\n";
1986  if (mpDVR->IsStartFixed())
1987  ss << " int offset = 0;\n";
1988  else // if (!mpDVR->IsStartFixed())
1989  ss << " int offset = get_group_id(1);\n";
1990  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1991  ss << " int end = windowSize;\n";
1992  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1993  ss << " int end = offset + windowSize;\n";
1994  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1995  ss << " int end = windowSize + get_group_id(1);\n";
1996  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1997  ss << " int end = windowSize;\n";
1998  ss << " end = min(end, arrayLength);\n";
1999 
2000  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2001  ss << " int loop = arrayLength/512 + 1;\n";
2002  ss << " for (int l=0; l<loop; l++){\n";
2003  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2004  ss << " int loopOffset = l*512;\n";
2005  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2006  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2007  "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
2008  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2009  "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
2010  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2011  ss << " tmp = legalize(" << mpCodeGen->Gen2(
2012  "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
2013  ss << " shm_buf[lidx] = tmp;\n";
2014  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2015  ss << " for (int i = 128; i >0; i/=2) {\n";
2016  ss << " if (lidx < i)\n";
2017  ss << " shm_buf[lidx] = ";
2018  // Special case count
2019  if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2020  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2021  else
2022  ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
2023  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2024  ss << " }\n";
2025  ss << " if (lidx == 0)\n";
2026  ss << " current_result =";
2027  if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2028  ss << "current_result + shm_buf[0]";
2029  else
2030  ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
2031  ss << ";\n";
2032  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2033  ss << " }\n";
2034  ss << " if (lidx == 0)\n";
2035  ss << " result[writePos] = current_result;\n";
2036  ss << "}\n";
2037  }
2038  else
2039  {
2040  std::string name = Base::GetName();
2041  /*sum reduction*/
2042  ss << "__kernel void " << name << "_sum";
2043  ss << "_reduction(__global double* A, "
2044  "__global double *result,int arrayLength,int windowSize){\n";
2045  ss << " double tmp, current_result =" <<
2046  mpCodeGen->GetBottom();
2047  ss << ";\n";
2048  ss << " int writePos = get_group_id(1);\n";
2049  ss << " int lidx = get_local_id(0);\n";
2050  ss << " __local double shm_buf[256];\n";
2051  if (mpDVR->IsStartFixed())
2052  ss << " int offset = 0;\n";
2053  else // if (!mpDVR->IsStartFixed())
2054  ss << " int offset = get_group_id(1);\n";
2055  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2056  ss << " int end = windowSize;\n";
2057  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2058  ss << " int end = offset + windowSize;\n";
2059  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2060  ss << " int end = windowSize + get_group_id(1);\n";
2061  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2062  ss << " int end = windowSize;\n";
2063  ss << " end = min(end, arrayLength);\n";
2064  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2065  ss << " int loop = arrayLength/512 + 1;\n";
2066  ss << " for (int l=0; l<loop; l++){\n";
2067  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2068  ss << " int loopOffset = l*512;\n";
2069  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2070  ss << " tmp = legalize(";
2071  ss << "(A[loopOffset + lidx + offset]+ tmp)";
2072  ss << ", tmp);\n";
2073  ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
2074  ss << ", tmp);\n";
2075  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2076  ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
2077  ss << ", tmp);\n";
2078  ss << " shm_buf[lidx] = tmp;\n";
2079  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2080  ss << " for (int i = 128; i >0; i/=2) {\n";
2081  ss << " if (lidx < i)\n";
2082  ss << " shm_buf[lidx] = ";
2083  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2084  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2085  ss << " }\n";
2086  ss << " if (lidx == 0)\n";
2087  ss << " current_result =";
2088  ss << "current_result + shm_buf[0]";
2089  ss << ";\n";
2090  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2091  ss << " }\n";
2092  ss << " if (lidx == 0)\n";
2093  ss << " result[writePos] = current_result;\n";
2094  ss << "}\n";
2095  /*count reduction*/
2096  ss << "__kernel void " << name << "_count";
2097  ss << "_reduction(__global double* A, "
2098  "__global double *result,int arrayLength,int windowSize){\n";
2099  ss << " double tmp, current_result =" <<
2100  mpCodeGen->GetBottom();
2101  ss << ";\n";
2102  ss << " int writePos = get_group_id(1);\n";
2103  ss << " int lidx = get_local_id(0);\n";
2104  ss << " __local double shm_buf[256];\n";
2105  if (mpDVR->IsStartFixed())
2106  ss << " int offset = 0;\n";
2107  else // if (!mpDVR->IsStartFixed())
2108  ss << " int offset = get_group_id(1);\n";
2109  if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2110  ss << " int end = windowSize;\n";
2111  else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2112  ss << " int end = offset + windowSize;\n";
2113  else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
2114  ss << " int end = windowSize + get_group_id(1);\n";
2115  else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
2116  ss << " int end = windowSize;\n";
2117  ss << " end = min(end, arrayLength);\n";
2118  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2119  ss << " int loop = arrayLength/512 + 1;\n";
2120  ss << " for (int l=0; l<loop; l++){\n";
2121  ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
2122  ss << " int loopOffset = l*512;\n";
2123  ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
2124  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2125  ss << ", tmp);\n";
2126  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
2127  ss << ", tmp);\n";
2128  ss << " } else if ((loopOffset + lidx + offset) < end)\n";
2129  ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
2130  ss << ", tmp);\n";
2131  ss << " shm_buf[lidx] = tmp;\n";
2132  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2133  ss << " for (int i = 128; i >0; i/=2) {\n";
2134  ss << " if (lidx < i)\n";
2135  ss << " shm_buf[lidx] = ";
2136  ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
2137  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2138  ss << " }\n";
2139  ss << " if (lidx == 0)\n";
2140  ss << " current_result =";
2141  ss << "current_result + shm_buf[0];";
2142  ss << ";\n";
2143  ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
2144  ss << " }\n";
2145  ss << " if (lidx == 0)\n";
2146  ss << " result[writePos] = current_result;\n";
2147  ss << "}\n";
2148  }
2149 
2150 }
2151 
2152 template<class Base>
2153 size_t ParallelReductionVectorRef<Base>::GenReductionLoopHeader(
2154  std::stringstream& ss, int nResultSize, bool& needBody )
2155 {
2156  assert(mpDVR);
2157  size_t nCurWindowSize = mpDVR->GetRefRowSize();
2158  std::string temp = Base::GetName() + "[gid0]";
2159  ss << "tmp = ";
2160  // Special case count
2161  if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
2162  {
2163  ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
2164  ss << "nCount = nCount-1;\n";
2165  ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
2166  ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
2167  }
2168  else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
2169  ss << temp << "+ tmp";
2170  else
2171  ss << mpCodeGen->Gen2(temp, "tmp");
2172  ss << ";\n\t";
2173  needBody = false;
2174  return nCurWindowSize;
2175 }
2176 
2177 template<class Base>
2178 size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
2179 {
2180  assert(Base::mpClmem == nullptr);
2181 
2182  OpenCLZone zone;
2185  cl_int err;
2186  size_t nInput = mpDVR->GetArrayLength();
2187  size_t nCurWindowSize = mpDVR->GetRefRowSize();
2188  // create clmem buffer
2189  if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
2190  throw Unhandled(__FILE__, __LINE__);
2191  double* pHostBuffer = const_cast<double*>(
2192  mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
2193  size_t szHostBuffer = nInput * sizeof(double);
2194  Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
2195  cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
2196  szHostBuffer,
2197  pHostBuffer, &err);
2198  SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
2199 
2200  mpClmem2 = clCreateBuffer(kEnv.mpkContext,
2201  CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
2202  sizeof(double) * w, nullptr, nullptr);
2203  if (CL_SUCCESS != err)
2204  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2205  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
2206 
2207  // reproduce the reduction function name
2208  std::string kernelName;
2209  if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
2210  kernelName = Base::GetName() + "_reduction";
2211  else
2212  kernelName = Base::GetName() + "_sum_reduction";
2213  cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2214  if (err != CL_SUCCESS)
2215  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2216  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
2217 
2218  // set kernel arg of reduction kernel
2219  // TODO(Wei Wei): use unique name for kernel
2220  cl_mem buf = Base::GetCLBuffer();
2221  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
2222  err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
2223  static_cast<void*>(&buf));
2224  if (CL_SUCCESS != err)
2225  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2226 
2227  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
2228  err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
2229  if (CL_SUCCESS != err)
2230  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2231 
2232  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
2233  err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
2234  if (CL_SUCCESS != err)
2235  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2236 
2237  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
2238  err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2239  if (CL_SUCCESS != err)
2240  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2241 
2242  // set work group size and execute
2243  size_t global_work_size[] = { 256, static_cast<size_t>(w) };
2244  size_t const local_work_size[] = { 256, 1 };
2245  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2246  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2247  global_work_size, local_work_size, 0, nullptr, nullptr);
2248  if (CL_SUCCESS != err)
2249  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2250  err = clFinish(kEnv.mpkCmdQueue);
2251  if (CL_SUCCESS != err)
2252  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2253  if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
2254  {
2255  /*average need more reduction kernel for count computing*/
2256  std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
2257  double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
2258  mpClmem2,
2259  CL_TRUE, CL_MAP_READ, 0,
2260  sizeof(double) * w, 0, nullptr, nullptr,
2261  &err));
2262  if (err != CL_SUCCESS)
2263  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
2264 
2265  for (int i = 0; i < w; i++)
2266  pAllBuffer[i] = resbuf[i];
2267  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
2268  if (err != CL_SUCCESS)
2269  throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
2270 
2271  kernelName = Base::GetName() + "_count_reduction";
2272  redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
2273  if (err != CL_SUCCESS)
2274  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2275  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
2276 
2277  // set kernel arg of reduction kernel
2278  buf = Base::GetCLBuffer();
2279  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
2280  err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
2281  static_cast<void*>(&buf));
2282  if (CL_SUCCESS != err)
2283  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2284 
2285  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
2286  err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
2287  if (CL_SUCCESS != err)
2288  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2289 
2290  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
2291  err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
2292  if (CL_SUCCESS != err)
2293  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2294 
2295  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
2296  err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2297  if (CL_SUCCESS != err)
2298  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2299 
2300  // set work group size and execute
2301  size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
2302  size_t const local_work_size1[] = { 256, 1 };
2303  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2304  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2305  global_work_size1, local_work_size1, 0, nullptr, nullptr);
2306  if (CL_SUCCESS != err)
2307  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2308  err = clFinish(kEnv.mpkCmdQueue);
2309  if (CL_SUCCESS != err)
2310  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2311  resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
2312  mpClmem2,
2313  CL_TRUE, CL_MAP_READ, 0,
2314  sizeof(double) * w, 0, nullptr, nullptr,
2315  &err));
2316  if (err != CL_SUCCESS)
2317  throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
2318  for (int i = 0; i < w; i++)
2319  pAllBuffer[i + w] = resbuf[i];
2320  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
2321  // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
2322  if (CL_SUCCESS != err)
2323  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
2324  if (mpClmem2)
2325  {
2326  err = clReleaseMemObject(mpClmem2);
2327  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
2328  mpClmem2 = nullptr;
2329  }
2330  mpClmem2 = clCreateBuffer(kEnv.mpkContext,
2331  cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
2332  w * sizeof(double) * 2, pAllBuffer.get(), &err);
2333  if (CL_SUCCESS != err)
2334  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2335  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
2336  }
2337  // set kernel arg
2338  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2339  err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
2340  if (CL_SUCCESS != err)
2341  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2342  return 1;
2343 }
2344 
2345 struct SumIfsArgs
2346 {
2347  explicit SumIfsArgs(cl_mem x) : mCLMem(x), mConst(0.0) { }
2348  explicit SumIfsArgs(double x) : mCLMem(nullptr), mConst(x) { }
2349  cl_mem mCLMem;
2350  double mConst;
2351 };
2352 
2354 class DynamicKernelSoPArguments : public DynamicKernelArgument
2355 {
2356 public:
2357  typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
2358 
2359  DynamicKernelSoPArguments( const ScCalcConfig& config,
2360  const std::string& s, const FormulaTreeNodeRef& ft,
2361  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize );
2362 
2364  virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) override
2365  {
2366  OpenCLZone zone;
2367  unsigned i = 0;
2368  for (const auto& rxSubArgument : mvSubArguments)
2369  {
2370  i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram);
2371  }
2372  if (dynamic_cast<OpGeoMean*>(mpCodeGen.get()))
2373  {
2376  cl_int err;
2377  cl_mem pClmem2;
2378 
2379  std::vector<cl_mem> vclmem;
2380  for (const auto& rxSubArgument : mvSubArguments)
2381  {
2382  if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2383  vclmem.push_back(VR->GetCLBuffer());
2384  else
2385  vclmem.push_back(nullptr);
2386  }
2387  pClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2388  sizeof(double) * nVectorWidth, nullptr, &err);
2389  if (CL_SUCCESS != err)
2390  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2391  SAL_INFO("sc.opencl", "Created buffer " << pClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2392 
2393  std::string kernelName = "GeoMean_reduction";
2394  cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2395  if (err != CL_SUCCESS)
2396  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2397  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2398 
2399  // set kernel arg of reduction kernel
2400  for (size_t j = 0; j < vclmem.size(); j++)
2401  {
2402  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": " << (vclmem[j] ? "cl_mem" : "double") << ": " << vclmem[j]);
2403  err = clSetKernelArg(redKernel, j,
2404  vclmem[j] ? sizeof(cl_mem) : sizeof(double),
2405  static_cast<void*>(&vclmem[j]));
2406  if (CL_SUCCESS != err)
2407  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2408  }
2409  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << pClmem2);
2410  err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&pClmem2));
2411  if (CL_SUCCESS != err)
2412  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2413 
2414  // set work group size and execute
2415  size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
2416  size_t const local_work_size[] = { 256, 1 };
2417  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2418  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2419  global_work_size, local_work_size, 0, nullptr, nullptr);
2420  if (CL_SUCCESS != err)
2421  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2422  err = clFinish(kEnv.mpkCmdQueue);
2423  if (CL_SUCCESS != err)
2424  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2425 
2426  // Pass pClmem2 to the "real" kernel
2427  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << pClmem2);
2428  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&pClmem2));
2429  if (CL_SUCCESS != err)
2430  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2431  }
2432  if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
2433  {
2436  cl_int err;
2437  DynamicKernelArgument* Arg = mvSubArguments[0].get();
2438  DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
2439  static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
2440  mpClmem2 = nullptr;
2441 
2442  if (OpSumCodeGen->NeedReductionKernel())
2443  {
2444  size_t nInput = slidingArgPtr->GetArrayLength();
2445  size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
2446  std::vector<SumIfsArgs> vclmem;
2447 
2448  for (const auto& rxSubArgument : mvSubArguments)
2449  {
2450  if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
2451  vclmem.emplace_back(VR->GetCLBuffer());
2452  else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(rxSubArgument.get()))
2453  vclmem.emplace_back(CA->GetDouble());
2454  else
2455  vclmem.emplace_back(nullptr);
2456  }
2457  mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
2458  sizeof(double) * nVectorWidth, nullptr, &err);
2459  if (CL_SUCCESS != err)
2460  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2461  SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
2462 
2463  std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
2464  cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
2465  if (err != CL_SUCCESS)
2466  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2467  SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
2468 
2469  // set kernel arg of reduction kernel
2470  for (size_t j = 0; j < vclmem.size(); j++)
2471  {
2472  if (vclmem[j].mCLMem)
2473  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
2474  else
2475  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << vclmem[j].mConst);
2476  err = clSetKernelArg(redKernel, j,
2477  vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
2478  vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
2479  static_cast<void*>(&vclmem[j].mConst));
2480  if (CL_SUCCESS != err)
2481  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2482  }
2483  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
2484  err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2485  if (CL_SUCCESS != err)
2486  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2487 
2488  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
2489  err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), static_cast<void*>(&nInput));
2490  if (CL_SUCCESS != err)
2491  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2492 
2493  SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
2494  err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
2495  if (CL_SUCCESS != err)
2496  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2497  // set work group size and execute
2498  size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
2499  size_t const local_work_size[] = { 256, 1 };
2500  SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
2501  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
2502  global_work_size, local_work_size, 0, nullptr, nullptr);
2503  if (CL_SUCCESS != err)
2504  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2505 
2506  err = clFinish(kEnv.mpkCmdQueue);
2507  if (CL_SUCCESS != err)
2508  throw OpenCLError("clFinish", err, __FILE__, __LINE__);
2509 
2510  SAL_INFO("sc.opencl", "Releasing kernel " << redKernel);
2511  err = clReleaseKernel(redKernel);
2512  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
2513 
2514  // Pass mpClmem2 to the "real" kernel
2515  SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
2516  err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem2));
2517  if (CL_SUCCESS != err)
2518  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2519  }
2520  }
2521  return i;
2522  }
2523 
2524  virtual void GenSlidingWindowFunction( std::stringstream& ss ) override
2525  {
2526  for (DynamicKernelArgumentRef & rArg : mvSubArguments)
2527  rArg->GenSlidingWindowFunction(ss);
2528  mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
2529  }
2530  virtual void GenDeclRef( std::stringstream& ss ) const override
2531  {
2532  for (size_t i = 0; i < mvSubArguments.size(); i++)
2533  {
2534  if (i)
2535  ss << ",";
2536  mvSubArguments[i]->GenDeclRef(ss);
2537  }
2538  }
2539  virtual void GenDecl( std::stringstream& ss ) const override
2540  {
2541  for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2542  ++it)
2543  {
2544  if (it != mvSubArguments.begin())
2545  ss << ", ";
2546  (*it)->GenDecl(ss);
2547  }
2548  }
2549 
2550  virtual size_t GetWindowSize() const override
2551  {
2552  size_t nCurWindowSize = 0;
2553  for (const auto & rSubArgument : mvSubArguments)
2554  {
2555  size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
2556  nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
2557  nCurChildWindowSize : nCurWindowSize;
2558  }
2559  return nCurWindowSize;
2560  }
2561 
2563  virtual void GenSlidingWindowDecl( std::stringstream& ss ) const override
2564  {
2565  for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
2566  ++it)
2567  {
2568  if (it != mvSubArguments.begin())
2569  ss << ", ";
2570  (*it)->GenSlidingWindowDecl(ss);
2571  }
2572  }
2575  virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const override
2576  {
2577  std::stringstream ss;
2578  if (!nested)
2579  {
2580  ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
2581  for (size_t i = 0; i < mvSubArguments.size(); i++)
2582  {
2583  if (i)
2584  ss << ", ";
2585  mvSubArguments[i]->GenDeclRef(ss);
2586  }
2587  ss << ")";
2588  }
2589  else
2590  {
2591  if (mvSubArguments.size() != 2)
2592  throw Unhandled(__FILE__, __LINE__);
2593  bool bArgument1_NeedNested =
2594  mvSubArguments[0]->GetFormulaToken()->GetType()
2596  bool bArgument2_NeedNested =
2597  mvSubArguments[1]->GetFormulaToken()->GetType()
2599  ss << "(";
2600  ss << mpCodeGen->
2601  Gen2(mvSubArguments[0]
2602  ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
2603  mvSubArguments[1]
2604  ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
2605  ss << ")";
2606  }
2607  return ss.str();
2608  }
2609  virtual std::string DumpOpName() const override
2610  {
2611  std::string t = "_" + mpCodeGen->BinFuncName();
2612  for (const auto & rSubArgument : mvSubArguments)
2613  t += rSubArgument->DumpOpName();
2614  return t;
2615  }
2616  virtual void DumpInlineFun( std::set<std::string>& decls,
2617  std::set<std::string>& funs ) const override
2618  {
2619  mpCodeGen->BinInlineFun(decls, funs);
2620  for (const auto & rSubArgument : mvSubArguments)
2621  rSubArgument->DumpInlineFun(decls, funs);
2622  }
2623  virtual bool IsEmpty() const override
2624  {
2625  for (const auto & rSubArgument : mvSubArguments)
2626  if( !rSubArgument->IsEmpty())
2627  return false;
2628  return true;
2629  }
2630  virtual ~DynamicKernelSoPArguments() override
2631  {
2632  if (mpClmem2)
2633  {
2634  cl_int err;
2635  err = clReleaseMemObject(mpClmem2);
2636  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
2637  mpClmem2 = nullptr;
2638  }
2639  }
2640 
2641 private:
2642  SubArgumentsType mvSubArguments;
2643  std::shared_ptr<SlidingFunctionBase> mpCodeGen;
2644  cl_mem mpClmem2;
2645 };
2646 
2647 }
2648 
2650  const std::string& ts, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
2651  int nResultSize )
2652 {
2653  return std::make_shared<DynamicKernelSoPArguments>(config, ts, ft, std::move(pCodeGen), nResultSize);
2654 }
2655 
2656 template<class Base>
2657 static std::shared_ptr<DynamicKernelArgument> VectorRefFactory( const ScCalcConfig& config, const std::string& s,
2658  const FormulaTreeNodeRef& ft,
2659  std::shared_ptr<SlidingFunctionBase>& pCodeGen,
2660  int index )
2661 {
2662  //Black lists ineligible classes here ..
2663  // SUMIFS does not perform parallel reduction at DoubleVectorRef level
2664  if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
2665  {
2666  // coverity[identical_branches] - only identical if Base happens to be VectorRef
2667  if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
2668  return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(config, s, ft, pCodeGen, index);
2669  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2670  }
2671  // AVERAGE is not supported yet
2672  //Average has been supported by reduction kernel
2673  /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
2674  {
2675  return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
2676  }*/
2677  // MUL is not supported yet
2678  else if (dynamic_cast<OpMul*>(pCodeGen.get()))
2679  {
2680  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2681  }
2682  // Sub is not a reduction per se
2683  else if (dynamic_cast<OpSub*>(pCodeGen.get()))
2684  {
2685  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2686  }
2687  // Only child class of Reduction is supported
2688  else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
2689  {
2690  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2691  }
2692 
2693  const formula::DoubleVectorRefToken* pDVR =
2694  static_cast<const formula::DoubleVectorRefToken*>(
2695  ft->GetFormulaToken());
2696  // Window being too small to justify a parallel reduction
2697  if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
2698  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2699  if (pDVR->IsStartFixed() == pDVR->IsEndFixed())
2700  return std::make_shared<ParallelReductionVectorRef<Base>>(config, s, ft, pCodeGen, index);
2701  else // Other cases are not supported as well
2702  return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
2703 }
2704 
2705 DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
2706  const std::string& s, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize ) :
2707  DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(nullptr)
2708 {
2709  size_t nChildren = ft->Children.size();
2710 
2711  for (size_t i = 0; i < nChildren; i++)
2712  {
2713  FormulaTreeNodeRef rChild = ft->Children[i];
2714  if (!rChild)
2715  throw Unhandled(__FILE__, __LINE__);
2716  FormulaToken* pChild = rChild->GetFormulaToken();
2717  if (!pChild)
2718  throw Unhandled(__FILE__, __LINE__);
2719  OpCode opc = pChild->GetOpCode();
2720  std::stringstream tmpname;
2721  tmpname << s << "_" << i;
2722  std::string ts = tmpname.str();
2723  switch (opc)
2724  {
2725  case ocPush:
2726  if (pChild->GetType() == formula::svDoubleVectorRef)
2727  {
2728  const formula::DoubleVectorRefToken* pDVR =
2729  static_cast<const formula::DoubleVectorRefToken*>(pChild);
2730 
2731  // The code below will split one svDoubleVectorRef into one subargument
2732  // for each column of data, and then all these subarguments will be later
2733  // passed to the code generating the function. Most of the code then
2734  // simply treats each subargument as one argument to the function, and thus
2735  // could break in this case.
2736  // As a simple solution, simply prevent this case, unless the code in question
2737  // explicitly claims it will handle this situation properly.
2738  if( pDVR->GetArrays().size() > 1 )
2739  {
2740  if( pCodeGen->canHandleMultiVector())
2741  SAL_INFO("sc.opencl", "multi-column DoubleRef");
2742  else
2743  throw UnhandledToken(("Function '" + pCodeGen->BinFuncName()
2744  + "' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
2745  }
2746 
2747  // FIXME: The Right Thing to do would be to compare the accumulated kernel
2748  // parameter size against the CL_DEVICE_MAX_PARAMETER_SIZE of the device, but
2749  // let's just do this sanity check for now. The kernel compilation will
2750  // hopefully fail anyway if the size of parameters exceeds the limit and this
2751  // sanity check is just to make us bail out a bit earlier.
2752 
2753  // The number 50 comes from the fact that the minimum size of
2754  // CL_DEVICE_MAX_PARAMETER_SIZE is 256, which for 32-bit code probably means 64
2755  // of them. Round down a bit.
2756 
2757  if (pDVR->GetArrays().size() > 50)
2758  throw UnhandledToken(("Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->GetArrays().size()) + ")").c_str(), __FILE__, __LINE__);
2759 
2760  for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
2761  {
2762  SAL_INFO("sc.opencl", "i=" << i << " j=" << j <<
2763  " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
2764  " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
2765  " allStringsAreNull=" << (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength())?"YES":"NO") <<
2766  " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2767  " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2768 
2769  if (pDVR->GetArrays()[j].mpNumericArray &&
2770  pCodeGen->takeNumeric() &&
2771  pDVR->GetArrays()[j].mpStringArray &&
2772  pCodeGen->takeString())
2773  {
2774  // Function takes numbers or strings, there are both
2775  SAL_INFO("sc.opencl", "Numbers and strings");
2776  mvSubArguments.push_back(
2777  std::make_shared<DynamicKernelMixedSlidingArgument>(mCalcConfig,
2778  ts, ft->Children[i], mpCodeGen, j));
2779  }
2780  else if (pDVR->GetArrays()[j].mpNumericArray &&
2781  pCodeGen->takeNumeric() &&
2782  (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) || mCalcConfig.meStringConversion == ScCalcConfig::StringConversion::ZERO))
2783  {
2784  // Function takes numbers, and either there
2785  // are no strings, or there are strings but
2786  // they are to be treated as zero
2787  SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2788  mvSubArguments.push_back(
2789  VectorRefFactory<VectorRef>(mCalcConfig,
2790  ts, ft->Children[i], mpCodeGen, j));
2791  }
2792  else if (pDVR->GetArrays()[j].mpNumericArray == nullptr &&
2793  pCodeGen->takeNumeric() &&
2794  pDVR->GetArrays()[j].mpStringArray &&
2796  {
2797  // Function takes numbers, and there are only
2798  // strings, but they are to be treated as zero
2799  SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2800  mvSubArguments.push_back(
2801  VectorRefFactory<VectorRef>(mCalcConfig,
2802  ts, ft->Children[i], mpCodeGen, j));
2803  }
2804  else if (pDVR->GetArrays()[j].mpStringArray &&
2805  pCodeGen->takeString())
2806  {
2807  // There are strings, and the function takes strings.
2808  SAL_INFO("sc.opencl", "Strings only");
2809  mvSubArguments.push_back(
2811  <DynamicKernelStringArgument>(mCalcConfig,
2812  ts, ft->Children[i], mpCodeGen, j));
2813  }
2814  else if (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) &&
2815  pDVR->GetArrays()[j].mpNumericArray == nullptr)
2816  {
2817  // There are only empty cells. Push as an
2818  // array of NANs
2819  SAL_INFO("sc.opencl", "Only empty cells");
2820  mvSubArguments.push_back(
2821  VectorRefFactory<VectorRef>(mCalcConfig,
2822  ts, ft->Children[i], mpCodeGen, j));
2823  }
2824  else
2825  {
2826  SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2827  throw UnhandledToken(("Unhandled numbers/strings combination for '"
2828  + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2829  }
2830  }
2831  }
2832  else if (pChild->GetType() == formula::svSingleVectorRef)
2833  {
2834  const formula::SingleVectorRefToken* pSVR =
2835  static_cast<const formula::SingleVectorRefToken*>(pChild);
2836 
2837  SAL_INFO("sc.opencl", "i=" << i <<
2838  " mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
2839  " mpStringArray=" << pSVR->GetArray().mpStringArray <<
2840  " allStringsAreNull=" << (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength())?"YES":"NO") <<
2841  " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2842  " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2843 
2844  if (pSVR->GetArray().mpNumericArray &&
2845  pCodeGen->takeNumeric() &&
2846  pSVR->GetArray().mpStringArray &&
2847  pCodeGen->takeString())
2848  {
2849  // Function takes numbers or strings, there are both
2850  SAL_INFO("sc.opencl", "Numbers and strings");
2851  mvSubArguments.push_back(
2852  std::make_shared<DynamicKernelMixedArgument>(mCalcConfig,
2853  ts, ft->Children[i]));
2854  }
2855  else if (pSVR->GetArray().mpNumericArray &&
2856  pCodeGen->takeNumeric() &&
2858  {
2859  // Function takes numbers, and either there
2860  // are no strings, or there are strings but
2861  // they are to be treated as zero
2862  SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2863  mvSubArguments.push_back(
2864  std::make_shared<VectorRef>(mCalcConfig, ts,
2865  ft->Children[i]));
2866  }
2867  else if (pSVR->GetArray().mpNumericArray == nullptr &&
2868  pCodeGen->takeNumeric() &&
2869  pSVR->GetArray().mpStringArray &&
2871  {
2872  // Function takes numbers, and there are only
2873  // strings, but they are to be treated as zero
2874  SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2875  mvSubArguments.push_back(
2876  std::make_shared<VectorRef>(mCalcConfig, ts,
2877  ft->Children[i]));
2878  }
2879  else if (pSVR->GetArray().mpStringArray &&
2880  pCodeGen->takeString())
2881  {
2882  // There are strings, and the function takes strings.
2883  SAL_INFO("sc.opencl", "Strings only");
2884  mvSubArguments.push_back(
2885  std::make_shared<DynamicKernelStringArgument>(mCalcConfig,
2886  ts, ft->Children[i]));
2887  }
2888  else if (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) &&
2889  pSVR->GetArray().mpNumericArray == nullptr)
2890  {
2891  // There are only empty cells. Push as an
2892  // array of NANs
2893  SAL_INFO("sc.opencl", "Only empty cells");
2894  mvSubArguments.push_back(
2895  std::make_shared<VectorRef>(mCalcConfig, ts,
2896  ft->Children[i]));
2897  }
2898  else
2899  {
2900  SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2901  throw UnhandledToken(("Unhandled numbers/strings combination for '"
2902  + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2903  }
2904  }
2905  else if (pChild->GetType() == formula::svDouble)
2906  {
2907  SAL_INFO("sc.opencl", "Constant number case");
2908  mvSubArguments.push_back(
2909  std::make_shared<DynamicKernelConstantArgument>(mCalcConfig, ts,
2910  ft->Children[i]));
2911  }
2912  else if (pChild->GetType() == formula::svString
2913  && pCodeGen->takeString())
2914  {
2915  SAL_INFO("sc.opencl", "Constant string case");
2916  mvSubArguments.push_back(
2917  std::make_shared<ConstStringArgument>(mCalcConfig, ts,
2918  ft->Children[i]));
2919  }
2920  else
2921  {
2922  SAL_INFO("sc.opencl", "Unhandled operand, rejecting for OpenCL");
2923  throw UnhandledToken(("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str(), __FILE__, __LINE__);
2924  }
2925  break;
2926  case ocDiv:
2927  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDiv>(nResultSize), nResultSize));
2928  break;
2929  case ocMul:
2930  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMul>(nResultSize), nResultSize));
2931  break;
2932  case ocSub:
2933  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSub>(nResultSize), nResultSize));
2934  break;
2935  case ocAdd:
2936  case ocSum:
2937  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSum>(nResultSize), nResultSize));
2938  break;
2939  case ocAverage:
2940  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAverage>(nResultSize), nResultSize));
2941  break;
2942  case ocMin:
2943  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMin>(nResultSize), nResultSize));
2944  break;
2945  case ocMax:
2946  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMax>(nResultSize), nResultSize));
2947  break;
2948  case ocCount:
2949  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCount>(nResultSize), nResultSize));
2950  break;
2951  case ocSumProduct:
2952  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSumProduct>(), nResultSize));
2953  break;
2954  case ocIRR:
2955  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIRR>(), nResultSize));
2956  break;
2957  case ocMIRR:
2958  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpMIRR>(), nResultSize));
2959  break;
2960  case ocPMT:
2961  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPMT>(), nResultSize));
2962  break;
2963  case ocRate:
2964  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpIntrate>(), nResultSize));
2965  break;
2966  case ocRRI:
2967  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRRI>(), nResultSize));
2968  break;
2969  case ocPpmt:
2970  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPPMT>(), nResultSize));
2971  break;
2972  case ocFisher:
2973  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisher>(), nResultSize));
2974  break;
2975  case ocFisherInv:
2976  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFisherInv>(), nResultSize));
2977  break;
2978  case ocGamma:
2979  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGamma>(), nResultSize));
2980  break;
2981  case ocSLN:
2982  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSLN>(), nResultSize));
2983  break;
2984  case ocGammaLn:
2985  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGammaLn>(), nResultSize));
2986  break;
2987  case ocGauss:
2988  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGauss>(), nResultSize));
2989  break;
2990  /*case ocGeoMean:
2991  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGeoMean));
2992  break;*/
2993  case ocHarMean:
2994  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpHarMean>(), nResultSize));
2995  break;
2996  case ocLessEqual:
2997  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLessEqual>(), nResultSize));
2998  break;
2999  case ocLess:
3000  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpLess>(), nResultSize));
3001  break;
3002  case ocEqual:
3003  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEqual>(), nResultSize));
3004  break;
3005  case ocGreater:
3006  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpGreater>(), nResultSize));
3007  break;
3008  case ocSYD:
3009  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpSYD>(), nResultSize));
3010  break;
3011  case ocCorrel:
3012  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCorrel>(), nResultSize));
3013  break;
3014  case ocCos:
3015  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCos>(), nResultSize));
3016  break;
3017  case ocNegBinomVert :
3018  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNegbinomdist>(), nResultSize));
3019  break;
3020  case ocPearson:
3021  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPearson>(), nResultSize));
3022  break;
3023  case ocRSQ:
3024  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpRsq>(), nResultSize));
3025  break;
3026  case ocCosecant:
3027  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCsc>(), nResultSize));
3028  break;
3029  case ocISPMT:
3030  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpISPMT>(), nResultSize));
3031  break;
3032  case ocPDuration:
3033  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3034  ft->Children[i], std::make_shared<OpPDuration>(), nResultSize));
3035  break;
3036  case ocSinHyp:
3037  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3038  ft->Children[i], std::make_shared<OpSinh>(), nResultSize));
3039  break;
3040  case ocAbs:
3041  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3042  ft->Children[i], std::make_shared<OpAbs>(), nResultSize));
3043  break;
3044  case ocPV:
3045  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3046  ft->Children[i], std::make_shared<OpPV>(), nResultSize));
3047  break;
3048  case ocSin:
3049  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3050  ft->Children[i], std::make_shared<OpSin>(), nResultSize));
3051  break;
3052  case ocTan:
3053  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3054  ft->Children[i], std::make_shared<OpTan>(), nResultSize));
3055  break;
3056  case ocTanHyp:
3057  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3058  ft->Children[i], std::make_shared<OpTanH>(), nResultSize));
3059  break;
3060  case ocStandard:
3061  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3062  ft->Children[i], std::make_shared<OpStandard>(), nResultSize));
3063  break;
3064  case ocWeibull:
3065  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3066  ft->Children[i], std::make_shared<OpWeibull>(), nResultSize));
3067  break;
3068  /*case ocMedian:
3069  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3070  ft->Children[i],std::make_shared<OpMedian));
3071  break;*/
3072  case ocDDB:
3073  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3074  ft->Children[i], std::make_shared<OpDDB>(), nResultSize));
3075  break;
3076  case ocFV:
3077  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3078  ft->Children[i], std::make_shared<OpFV>(), nResultSize));
3079  break;
3080  case ocSumIfs:
3081  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3082  ft->Children[i], std::make_shared<OpSumIfs>(), nResultSize));
3083  break;
3084  /*case ocVBD:
3085  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3086  ft->Children[i],std::make_shared<OpVDB));
3087  break;*/
3088  case ocKurt:
3089  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3090  ft->Children[i], std::make_shared<OpKurt>(), nResultSize));
3091  break;
3092  /*case ocNper:
3093  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3094  ft->Children[i], std::make_shared<OpNper));
3095  break;*/
3096  case ocNormDist:
3097  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3098  ft->Children[i], std::make_shared<OpNormdist>(), nResultSize));
3099  break;
3100  case ocArcCos:
3101  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3102  ft->Children[i], std::make_shared<OpArcCos>(), nResultSize));
3103  break;
3104  case ocSqrt:
3105  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3106  ft->Children[i], std::make_shared<OpSqrt>(), nResultSize));
3107  break;
3108  case ocArcCosHyp:
3109  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3110  ft->Children[i], std::make_shared<OpArcCosHyp>(), nResultSize));
3111  break;
3112  case ocNPV:
3113  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3114  ft->Children[i], std::make_shared<OpNPV>(), nResultSize));
3115  break;
3116  case ocStdNormDist:
3117  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3118  ft->Children[i], std::make_shared<OpNormsdist>(), nResultSize));
3119  break;
3120  case ocNormInv:
3121  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3122  ft->Children[i], std::make_shared<OpNorminv>(), nResultSize));
3123  break;
3124  case ocSNormInv:
3125  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3126  ft->Children[i], std::make_shared<OpNormsinv>(), nResultSize));
3127  break;
3128  case ocPermut:
3129  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3130  ft->Children[i], std::make_shared<OpPermut>(), nResultSize));
3131  break;
3132  case ocPermutationA:
3133  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3134  ft->Children[i], std::make_shared<OpPermutationA>(), nResultSize));
3135  break;
3136  case ocPhi:
3137  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3138  ft->Children[i], std::make_shared<OpPhi>(), nResultSize));
3139  break;
3140  case ocIpmt:
3141  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3142  ft->Children[i], std::make_shared<OpIPMT>(), nResultSize));
3143  break;
3144  case ocConfidence:
3145  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3146  ft->Children[i], std::make_shared<OpConfidence>(), nResultSize));
3147  break;
3148  case ocIntercept:
3149  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3150  ft->Children[i], std::make_shared<OpIntercept>(), nResultSize));
3151  break;
3152  case ocDB:
3153  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3154  std::make_shared<OpDB>(), nResultSize));
3155  break;
3156  case ocLogInv:
3157  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3158  ft->Children[i], std::make_shared<OpLogInv>(), nResultSize));
3159  break;
3160  case ocArcCot:
3161  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3162  ft->Children[i], std::make_shared<OpArcCot>(), nResultSize));
3163  break;
3164  case ocCosHyp:
3165  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3166  ft->Children[i], std::make_shared<OpCosh>(), nResultSize));
3167  break;
3168  case ocCritBinom:
3169  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3170  ft->Children[i], std::make_shared<OpCritBinom>(), nResultSize));
3171  break;
3172  case ocArcCotHyp:
3173  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3174  ft->Children[i], std::make_shared<OpArcCotHyp>(), nResultSize));
3175  break;
3176  case ocArcSin:
3177  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3178  ft->Children[i], std::make_shared<OpArcSin>(), nResultSize));
3179  break;
3180  case ocArcSinHyp:
3181  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3182  ft->Children[i], std::make_shared<OpArcSinHyp>(), nResultSize));
3183  break;
3184  case ocArcTan:
3185  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3186  ft->Children[i], std::make_shared<OpArcTan>(), nResultSize));
3187  break;
3188  case ocArcTanHyp:
3189  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3190  ft->Children[i], std::make_shared<OpArcTanH>(), nResultSize));
3191  break;
3192  case ocBitAnd:
3193  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3194  ft->Children[i], std::make_shared<OpBitAnd>(), nResultSize));
3195  break;
3196  case ocForecast:
3197  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3198  ft->Children[i], std::make_shared<OpForecast>(), nResultSize));
3199  break;
3200  case ocLogNormDist:
3201  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3202  ft->Children[i], std::make_shared<OpLogNormDist>(), nResultSize));
3203  break;
3204  /*case ocGammaDist:
3205  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3206  ft->Children[i], std::make_shared<OpGammaDist));
3207  break;*/
3208  case ocLn:
3209  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3210  ft->Children[i], std::make_shared<OpLn>(), nResultSize));
3211  break;
3212  case ocRound:
3213  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3214  ft->Children[i], std::make_shared<OpRound>(), nResultSize));
3215  break;
3216  case ocCot:
3217  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3218  ft->Children[i], std::make_shared<OpCot>(), nResultSize));
3219  break;
3220  case ocCotHyp:
3221  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3222  ft->Children[i], std::make_shared<OpCoth>(), nResultSize));
3223  break;
3224  case ocFDist:
3225  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3226  ft->Children[i], std::make_shared<OpFdist>(), nResultSize));
3227  break;
3228  case ocVar:
3229  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3230  ft->Children[i], std::make_shared<OpVar>(), nResultSize));
3231  break;
3232  /*case ocChiDist:
3233  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3234  ft->Children[i],std::make_shared<OpChiDist));
3235  break;*/
3236  case ocPow:
3237  case ocPower:
3238  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3239  ft->Children[i], std::make_shared<OpPower>(), nResultSize));
3240  break;
3241  case ocOdd:
3242  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3243  ft->Children[i], std::make_shared<OpOdd>(), nResultSize));
3244  break;
3245  /*case ocChiSqDist:
3246  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3247  ft->Children[i],std::make_shared<OpChiSqDist));
3248  break;
3249  case ocChiSqInv:
3250  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3251  ft->Children[i],std::make_shared<OpChiSqInv));
3252  break;
3253  case ocGammaInv:
3254  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3255  ft->Children[i], std::make_shared<OpGammaInv));
3256  break;*/
3257  case ocFloor:
3258  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3259  ft->Children[i], std::make_shared<OpFloor>(), nResultSize));
3260  break;
3261  /*case ocFInv:
3262  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3263  ft->Children[i], std::make_shared<OpFInv));
3264  break;*/
3265  case ocFTest:
3266  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3267  ft->Children[i], std::make_shared<OpFTest>(), nResultSize));
3268  break;
3269  case ocB:
3270  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3271  ft->Children[i], std::make_shared<OpB>(), nResultSize));
3272  break;
3273  case ocBetaDist:
3274  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3275  ft->Children[i], std::make_shared<OpBetaDist>(), nResultSize));
3276  break;
3277  case ocCosecantHyp:
3278  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3279  ft->Children[i], std::make_shared<OpCscH>(), nResultSize));
3280  break;
3281  case ocExp:
3282  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3283  ft->Children[i], std::make_shared<OpExp>(), nResultSize));
3284  break;
3285  case ocLog10:
3286  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3287  ft->Children[i], std::make_shared<OpLog10>(), nResultSize));
3288  break;
3289  case ocExpDist:
3290  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3291  ft->Children[i], std::make_shared<OpExponDist>(), nResultSize));
3292  break;
3293  case ocAverageIfs:
3294  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3295  ft->Children[i], std::make_shared<OpAverageIfs>(), nResultSize));
3296  break;
3297  case ocCountIfs:
3298  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3299  ft->Children[i], std::make_shared<OpCountIfs>(), nResultSize));
3300  break;
3301  case ocCombinA:
3302  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3303  ft->Children[i], std::make_shared<OpCombinA>(), nResultSize));
3304  break;
3305  case ocEven:
3306  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3307  ft->Children[i], std::make_shared<OpEven>(), nResultSize));
3308  break;
3309  case ocLog:
3310  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3311  ft->Children[i], std::make_shared<OpLog>(), nResultSize));
3312  break;
3313  case ocMod:
3314  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3315  ft->Children[i], std::make_shared<OpMod>(), nResultSize));
3316  break;
3317  case ocTrunc:
3318  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3319  ft->Children[i], std::make_shared<OpTrunc>(), nResultSize));
3320  break;
3321  case ocSkew:
3322  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3323  ft->Children[i], std::make_shared<OpSkew>(), nResultSize));
3324  break;
3325  case ocArcTan2:
3326  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3327  ft->Children[i], std::make_shared<OpArcTan2>(), nResultSize));
3328  break;
3329  case ocBitOr:
3330  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3331  ft->Children[i], std::make_shared<OpBitOr>(), nResultSize));
3332  break;
3333  case ocBitLshift:
3334  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3335  ft->Children[i], std::make_shared<OpBitLshift>(), nResultSize));
3336  break;
3337  case ocBitRshift:
3338  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3339  ft->Children[i], std::make_shared<OpBitRshift>(), nResultSize));
3340  break;
3341  case ocBitXor:
3342  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3343  ft->Children[i], std::make_shared<OpBitXor>(), nResultSize));
3344  break;
3345  /*case ocChiInv:
3346  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3347  ft->Children[i],std::make_shared<OpChiInv));
3348  break;*/
3349  case ocPoissonDist:
3350  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3351  ft->Children[i], std::make_shared<OpPoisson>(), nResultSize));
3352  break;
3353  case ocSumSQ:
3354  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3355  ft->Children[i], std::make_shared<OpSumSQ>(), nResultSize));
3356  break;
3357  case ocSkewp:
3358  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3359  ft->Children[i], std::make_shared<OpSkewp>(), nResultSize));
3360  break;
3361  case ocBinomDist:
3362  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3363  ft->Children[i], std::make_shared<OpBinomdist>(), nResultSize));
3364  break;
3365  case ocVarP:
3366  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3367  ft->Children[i], std::make_shared<OpVarP>(), nResultSize));
3368  break;
3369  case ocCeil:
3370  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3371  ft->Children[i], std::make_shared<OpCeil>(), nResultSize));
3372  break;
3373  case ocCombin:
3374  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3375  ft->Children[i], std::make_shared<OpCombin>(), nResultSize));
3376  break;
3377  case ocDevSq:
3378  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3379  ft->Children[i], std::make_shared<OpDevSq>(), nResultSize));
3380  break;
3381  case ocStDev:
3382  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3383  ft->Children[i], std::make_shared<OpStDev>(), nResultSize));
3384  break;
3385  case ocSlope:
3386  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3387  ft->Children[i], std::make_shared<OpSlope>(), nResultSize));
3388  break;
3389  case ocSTEYX:
3390  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3391  ft->Children[i], std::make_shared<OpSTEYX>(), nResultSize));
3392  break;
3393  case ocZTest:
3394  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3395  ft->Children[i], std::make_shared<OpZTest>(), nResultSize));
3396  break;
3397  case ocPi:
3398  mvSubArguments.push_back(
3399  std::make_shared<DynamicKernelPiArgument>(mCalcConfig, ts,
3400  ft->Children[i]));
3401  break;
3402  case ocRandom:
3403  mvSubArguments.push_back(
3404  std::make_shared<DynamicKernelRandomArgument>(mCalcConfig, ts,
3405  ft->Children[i]));
3406  break;
3407  case ocProduct:
3408  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3409  ft->Children[i], std::make_shared<OpProduct>(), nResultSize));
3410  break;
3411  /*case ocHypGeomDist:
3412  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3413  ft->Children[i],std::make_shared<OpHypGeomDist));
3414  break;*/
3415  case ocSumX2MY2:
3416  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3417  ft->Children[i], std::make_shared<OpSumX2MY2>(), nResultSize));
3418  break;
3419  case ocSumX2DY2:
3420  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3421  ft->Children[i], std::make_shared<OpSumX2PY2>(), nResultSize));
3422  break;
3423  /*case ocBetaInv:
3424  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3425  ft->Children[i],std::make_shared<OpBetainv));
3426  break;*/
3427  case ocTTest:
3428  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3429  ft->Children[i], std::make_shared<OpTTest>(), nResultSize));
3430  break;
3431  case ocTDist:
3432  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3433  ft->Children[i], std::make_shared<OpTDist>(), nResultSize));
3434  break;
3435  /*case ocTInv:
3436  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3437  ft->Children[i], std::make_shared<OpTInv));
3438  break;*/
3439  case ocSumXMY2:
3440  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3441  ft->Children[i], std::make_shared<OpSumXMY2>(), nResultSize));
3442  break;
3443  case ocStDevP:
3444  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3445  ft->Children[i], std::make_shared<OpStDevP>(), nResultSize));
3446  break;
3447  case ocCovar:
3448  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3449  ft->Children[i], std::make_shared<OpCovar>(), nResultSize));
3450  break;
3451  case ocAnd:
3452  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3453  ft->Children[i], std::make_shared<OpAnd>(), nResultSize));
3454  break;
3455  case ocVLookup:
3456  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3457  ft->Children[i], std::make_shared<OpVLookup>(), nResultSize));
3458  break;
3459  case ocOr:
3460  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3461  ft->Children[i], std::make_shared<OpOr>(), nResultSize));
3462  break;
3463  case ocNot:
3464  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3465  ft->Children[i], std::make_shared<OpNot>(), nResultSize));
3466  break;
3467  case ocXor:
3468  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3469  ft->Children[i], std::make_shared<OpXor>(), nResultSize));
3470  break;
3471  case ocDBMax:
3472  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3473  ft->Children[i], std::make_shared<OpDmax>(), nResultSize));
3474  break;
3475  case ocDBMin:
3476  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3477  ft->Children[i], std::make_shared<OpDmin>(), nResultSize));
3478  break;
3479  case ocDBProduct:
3480  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3481  ft->Children[i], std::make_shared<OpDproduct>(), nResultSize));
3482  break;
3483  case ocDBAverage:
3484  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3485  ft->Children[i], std::make_shared<OpDaverage>(), nResultSize));
3486  break;
3487  case ocDBStdDev:
3488  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3489  ft->Children[i], std::make_shared<OpDstdev>(), nResultSize));
3490  break;
3491  case ocDBStdDevP:
3492  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3493  ft->Children[i], std::make_shared<OpDstdevp>(), nResultSize));
3494  break;
3495  case ocDBSum:
3496  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3497  ft->Children[i], std::make_shared<OpDsum>(), nResultSize));
3498  break;
3499  case ocDBVar:
3500  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3501  ft->Children[i], std::make_shared<OpDvar>(), nResultSize));
3502  break;
3503  case ocDBVarP:
3504  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3505  ft->Children[i], std::make_shared<OpDvarp>(), nResultSize));
3506  break;
3507  case ocAverageIf:
3508  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3509  ft->Children[i], std::make_shared<OpAverageIf>(), nResultSize));
3510  break;
3511  case ocDBCount:
3512  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3513  ft->Children[i], std::make_shared<OpDcount>(), nResultSize));
3514  break;
3515  case ocDBCount2:
3516  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3517  ft->Children[i], std::make_shared<OpDcount2>(), nResultSize));
3518  break;
3519  case ocDeg:
3520  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3521  ft->Children[i], std::make_shared<OpDeg>(), nResultSize));
3522  break;
3523  case ocRoundUp:
3524  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3525  ft->Children[i], std::make_shared<OpRoundUp>(), nResultSize));
3526  break;
3527  case ocRoundDown:
3528  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3529  ft->Children[i], std::make_shared<OpRoundDown>(), nResultSize));
3530  break;
3531  case ocInt:
3532  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3533  ft->Children[i], std::make_shared<OpInt>(), nResultSize));
3534  break;
3535  case ocRad:
3536  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3537  ft->Children[i], std::make_shared<OpRadians>(), nResultSize));
3538  break;
3539  case ocCountIf:
3540  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3541  ft->Children[i], std::make_shared<OpCountIf>(), nResultSize));
3542  break;
3543  case ocIsEven:
3544  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3545  ft->Children[i], std::make_shared<OpIsEven>(), nResultSize));
3546  break;
3547  case ocIsOdd:
3548  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3549  ft->Children[i], std::make_shared<OpIsOdd>(), nResultSize));
3550  break;
3551  case ocFact:
3552  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3553  ft->Children[i], std::make_shared<OpFact>(), nResultSize));
3554  break;
3555  case ocMinA:
3556  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3557  ft->Children[i], std::make_shared<OpMinA>(), nResultSize));
3558  break;
3559  case ocCount2:
3560  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3561  ft->Children[i], std::make_shared<OpCountA>(), nResultSize));
3562  break;
3563  case ocMaxA:
3564  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3565  ft->Children[i], std::make_shared<OpMaxA>(), nResultSize));
3566  break;
3567  case ocAverageA:
3568  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3569  ft->Children[i], std::make_shared<OpAverageA>(), nResultSize));
3570  break;
3571  case ocVarA:
3572  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3573  ft->Children[i], std::make_shared<OpVarA>(), nResultSize));
3574  break;
3575  case ocVarPA:
3576  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3577  ft->Children[i], std::make_shared<OpVarPA>(), nResultSize));
3578  break;
3579  case ocStDevA:
3580  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3581  ft->Children[i], std::make_shared<OpStDevA>(), nResultSize));
3582  break;
3583  case ocStDevPA:
3584  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3585  ft->Children[i], std::make_shared<OpStDevPA>(), nResultSize));
3586  break;
3587  case ocSecant:
3588  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3589  ft->Children[i], std::make_shared<OpSec>(), nResultSize));
3590  break;
3591  case ocSecantHyp:
3592  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3593  ft->Children[i], std::make_shared<OpSecH>(), nResultSize));
3594  break;
3595  case ocSumIf:
3596  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3597  ft->Children[i], std::make_shared<OpSumIf>(), nResultSize));
3598  break;
3599  case ocNegSub:
3600  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3601  ft->Children[i], std::make_shared<OpNegSub>(), nResultSize));
3602  break;
3603  case ocAveDev:
3604  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3605  ft->Children[i], std::make_shared<OpAveDev>(), nResultSize));
3606  break;
3607  case ocIf:
3608  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3609  ft->Children[i], std::make_shared<OpIf>(), nResultSize));
3610  break;
3611  case ocExternal:
3612  if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getEffect")
3613  {
3614  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpEffective>(), nResultSize));
3615  }
3616  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumipmt")
3617  {
3618  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumipmt>(), nResultSize));
3619  }
3620  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getNominal")
3621  {
3622  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpNominal>(), nResultSize));
3623  }
3624  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCumprinc")
3625  {
3626  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCumprinc>(), nResultSize));
3627  }
3628  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXnpv")
3629  {
3630  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpXNPV>(), nResultSize));
3631  }
3632  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricemat")
3633  {
3634  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpPriceMat>(), nResultSize));
3635  }
3636  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getReceived")
3637  {
3638  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpReceived>(), nResultSize));
3639  }
3640  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbilleq")
3641  {
3642  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbilleq>(), nResultSize));
3643  }
3644  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillprice")
3645  {
3646  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillprice>(), nResultSize));
3647  }
3648  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getTbillyield")
3649  {
3650  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpTbillyield>(), nResultSize));
3651  }
3652  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getFvschedule")
3653  {
3654  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpFvschedule>(), nResultSize));
3655  }
3656  /*else if ( pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYield")
3657  {
3658  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYield));
3659  }*/
3660  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYielddisc")
3661  {
3662  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYielddisc>(), nResultSize));
3663  }
3664  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getYieldmat")
3665  {
3666  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpYieldmat>(), nResultSize));
3667  }
3668  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrintm")
3669  {
3670  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpAccrintm>(), nResultSize));
3671  }
3672  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaybs")
3673  {
3674  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaybs>(), nResultSize));
3675  }
3676  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarde")
3677  {
3678  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarde>(), nResultSize));
3679  }
3680  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDollarfr")
3681  {
3682  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDollarfr>(), nResultSize));
3683  }
3684  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdays")
3685  {
3686  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdays>(), nResultSize));
3687  }
3688  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupdaysnc")
3689  {
3690  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpCoupdaysnc>(), nResultSize));
3691  }
3692  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDisc")
3693  {
3694  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDISC>(), nResultSize));
3695  }
3696  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIntrate")
3697  {
3698  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpINTRATE>(), nResultSize));
3699  }
3700  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPrice")
3701  {
3702  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3703  ft->Children[i], std::make_shared<OpPrice>(), nResultSize));
3704  }
3705  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupnum")
3706  {
3707  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3708  std::make_shared<OpCoupnum>(), nResultSize));
3709  }
3710  /*else if pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getDuration"))
3711  {
3712  mvSubArguments.push_back(
3713  SoPHelper(mCalcConfig, ts, ft->Children[i], std::make_shared<OpDuration_ADD));
3714  }*/
3715  /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmordegrc")
3716  {
3717  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3718  std::make_shared<OpAmordegrc, nResultSize));
3719  }*/
3720  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAmorlinc")
3721  {
3722  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3723  std::make_shared<OpAmorlinc>(), nResultSize));
3724  }
3725  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMduration")
3726  {
3727  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3728  std::make_shared<OpMDuration>(), nResultSize));
3729  }
3730  /*else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getXirr")
3731  {
3732  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3733  std::make_shared<OpXirr, nResultSize));
3734  }*/
3735  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlprice")
3736  {
3737  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3738  ft->Children[i], std::make_shared<OpOddlprice>(), nResultSize));
3739  }
3740  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getOddlyield")
3741  {
3742  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3743  std::make_shared<OpOddlyield>(), nResultSize));
3744  }
3745  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getPricedisc")
3746  {
3747  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts,
3748  ft->Children[i], std::make_shared<OpPriceDisc>(), nResultSize));
3749  }
3750  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCouppcd")
3751  {
3752  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3753  std::make_shared<OpCouppcd>(), nResultSize));
3754  }
3755  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getCoupncd")
3756  {
3757  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3758  std::make_shared<OpCoupncd>(), nResultSize));
3759  }
3760  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getAccrint")
3761  {
3762  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3763  std::make_shared<OpAccrint>(), nResultSize));
3764  }
3765  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSqrtpi")
3766  {
3767  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3768  std::make_shared<OpSqrtPi>(), nResultSize));
3769  }
3770  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getConvert")
3771  {
3772  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3773  std::make_shared<OpConvert>(), nResultSize));
3774  }
3775  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIseven")
3776  {
3777  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3778  std::make_shared<OpIsEven>(), nResultSize));
3779  }
3780  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getIsodd")
3781  {
3782  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3783  std::make_shared<OpIsOdd>(), nResultSize));
3784  }
3785  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getMround")
3786  {
3787  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3788  std::make_shared<OpMROUND>(), nResultSize));
3789  }
3790  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getQuotient")
3791  {
3792  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3793  std::make_shared<OpQuotient>(), nResultSize));
3794  }
3795  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getSeriessum")
3796  {
3797  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3798  std::make_shared<OpSeriesSum>(), nResultSize));
3799  }
3800  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getBesselj")
3801  {
3802  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3803  std::make_shared<OpBesselj>(), nResultSize));
3804  }
3805  else if (pChild->GetExternal() == "com.sun.star.sheet.addin.Analysis.getGestep")
3806  {
3807  mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i],
3808  std::make_shared<OpGestep>(), nResultSize));
3809  }
3810  else
3811  throw UnhandledToken(OUString("unhandled external " + pChild->GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
3812  break;
3813 
3814  default:
3815  throw UnhandledToken(OUString("unhandled opcode "
3816  + formula::FormulaCompiler().GetOpCodeMap(com::sun::star::sheet::FormulaLanguage::ENGLISH)->getSymbol(opc)
3817  + "(" + OUString::number(opc) + ")").toUtf8().getStr(), __FILE__, __LINE__);
3818  }
3819  }
3820 }
3821 
3822 namespace {
3823 
3824 class DynamicKernel : public CompiledFormula
3825 {
3826 public:
3827  DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize );
3828  virtual ~DynamicKernel() override;
3829 
3830  static std::shared_ptr<DynamicKernel> create( const ScCalcConfig& config, const ScTokenArray& rCode, int nResultSize );
3831 
3833  void CodeGen();
3834 
3836  std::string const & GetMD5();
3837 
3841  void CreateKernel();
3842 
3845  void Launch( size_t nr );
3846 
3847  cl_mem GetResultBuffer() const { return mpResClmem; }
3848 
3849 private:
3852  SymbolTable mSyms;
3854  std::string mFullProgramSrc;
3855  cl_program mpProgram;
3856  cl_kernel mpKernel;
3857  cl_mem mpResClmem; // Results
3858  std::set<std::string> inlineDecl;
3859  std::set<std::string> inlineFun;
3860 
3861  int mnResultSize;
3862 };
3863 
3864 }
3865 
3866 DynamicKernel::DynamicKernel( const ScCalcConfig& config, const FormulaTreeNodeRef& r, int nResultSize ) :
3867  mCalcConfig(config),
3868  mpRoot(r),
3869  mpProgram(nullptr),
3870  mpKernel(nullptr),
3871  mpResClmem(nullptr),
3872  mnResultSize(nResultSize) {}
3873 
3874 DynamicKernel::~DynamicKernel()
3875 {
3876  cl_int err;
3877  if (mpResClmem)
3878  {
3879  err = clReleaseMemObject(mpResClmem);
3880  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
3881  }
3882  if (mpKernel)
3883  {
3884  SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
3885  err = clReleaseKernel(mpKernel);
3886  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
3887  }
3888  // mpProgram is not going to be released here -- it's cached.
3889 }
3890 
3891 void DynamicKernel::CodeGen()
3892 {
3893  // Traverse the tree of expression and declare symbols used
3894  const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, std::make_shared<OpNop>(mnResultSize), mnResultSize);
3895 
3896  std::stringstream decl;
3897  if (openclwrapper::gpuEnv.mnKhrFp64Flag)
3898  {
3899  decl << "#if __OPENCL_VERSION__ < 120\n";
3900  decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
3901  decl << "#endif\n";
3902  }
3903  else if (openclwrapper::gpuEnv.mnAmdFp64Flag)
3904  {
3905  decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
3906  }
3907  // preambles
3908  decl << publicFunc;
3909  DK->DumpInlineFun(inlineDecl, inlineFun);
3910  for (const auto& rItem : inlineDecl)
3911  {
3912  decl << rItem;
3913  }
3914 
3915  for (const auto& rItem : inlineFun)
3916  {
3917  decl << rItem;
3918  }
3919  mSyms.DumpSlidingWindowFunctions(decl);
3920  mKernelSignature = DK->DumpOpName();
3921  decl << "__kernel void DynamicKernel" << mKernelSignature;
3922  decl << "(__global double *result";
3923  if( !DK->IsEmpty())
3924  {
3925  decl << ", ";
3926  DK->GenSlidingWindowDecl(decl);
3927  }
3928  decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
3929  DK->GenSlidingWindowDeclRef() << ";\n}\n";
3930  mFullProgramSrc = decl.str();
3931  SAL_INFO(
3932  "sc.opencl.source",
3933  (mKernelSignature[0] == '_'
3934  ? mKernelSignature.substr(1, std::string::npos) : mKernelSignature)
3935  << " program to be compiled:\n" << linenumberify(mFullProgramSrc));
3936 }
3937 
3938 std::string const & DynamicKernel::GetMD5()
3939 {
3940  if (mKernelHash.empty())
3941  {
3942  std::stringstream md5s;
3943  // Compute MD5SUM of kernel body to obtain the name
3944  sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
3945  rtl_digest_MD5(
3946  mFullProgramSrc.c_str(),
3947  mFullProgramSrc.length(), result,
3948  RTL_DIGEST_LENGTH_MD5);
3949  for (sal_uInt8 i : result)
3950  {
3951  md5s << std::hex << static_cast<int>(i);
3952  }
3953  mKernelHash = md5s.str();
3954  }
3955  return mKernelHash;
3956 }
3957 
3959 void DynamicKernel::CreateKernel()
3960 {
3961  if (mpKernel)
3962  // already created.
3963  return;
3964 
3965  cl_int err;
3966  std::string kname = "DynamicKernel" + mKernelSignature;
3967  // Compile kernel here!!!
3968 
3969  OpenCLZone zone;
3972  const char* src = mFullProgramSrc.c_str();
3973  static std::string lastOneKernelHash;
3974  static std::string lastSecondKernelHash;
3975  static cl_program lastOneProgram = nullptr;
3976  static cl_program lastSecondProgram = nullptr;
3977  std::string KernelHash = mKernelSignature + GetMD5();
3978  if (lastOneKernelHash == KernelHash && lastOneProgram)
3979  {
3980  mpProgram = lastOneProgram;
3981  }
3982  else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
3983  {
3984  mpProgram = lastSecondProgram;
3985  }
3986  else
3987  { // doesn't match the last compiled formula.
3988 
3989  if (lastSecondProgram)
3990  {
3991  SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
3992  err = clReleaseProgram(lastSecondProgram);
3993  SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << openclwrapper::errorString(err));
3994  lastSecondProgram = nullptr;
3995  }
3997  &openclwrapper::gpuEnv, KernelHash.c_str(), 0))
3998  {
3999  mpProgram = openclwrapper::gpuEnv.mpArryPrograms[0];
4001  }
4002  else
4003  {
4004  mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
4005  &src, nullptr, &err);
4006  if (err != CL_SUCCESS)
4007  throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
4008  SAL_INFO("sc.opencl", "Created program " << mpProgram);
4009 
4010  err = clBuildProgram(mpProgram, 1,
4011  &openclwrapper::gpuEnv.mpDevID, "", nullptr, nullptr);
4012  if (err != CL_SUCCESS)
4013  {
4014 #if OSL_DEBUG_LEVEL > 0
4015  if (err == CL_BUILD_PROGRAM_FAILURE)
4016  {
4017  cl_build_status stat;
4018  cl_int e = clGetProgramBuildInfo(
4019  mpProgram, openclwrapper::gpuEnv.mpDevID,
4020  CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
4021  &stat, nullptr);
4022  SAL_WARN_IF(
4023  e != CL_SUCCESS, "sc.opencl",
4024  "after CL_BUILD_PROGRAM_FAILURE,"
4025  " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
4026  " fails with " << openclwrapper::errorString(e));
4027  if (e == CL_SUCCESS)
4028  {
4029  size_t n;
4030  e = clGetProgramBuildInfo(
4031  mpProgram, openclwrapper::gpuEnv.mpDevID,
4032  CL_PROGRAM_BUILD_LOG, 0, nullptr, &n);
4033  SAL_WARN_IF(
4034  e != CL_SUCCESS || n == 0, "sc.opencl",
4035  "after CL_BUILD_PROGRAM_FAILURE,"
4036  " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
4037  " fails with " << openclwrapper::errorString(e) << ", n=" << n);
4038  if (e == CL_SUCCESS && n != 0)
4039  {
4040  std::vector<char> log(n);
4041  e = clGetProgramBuildInfo(
4042  mpProgram, openclwrapper::gpuEnv.mpDevID,
4043  CL_PROGRAM_BUILD_LOG, n, log.data(), nullptr);
4044  SAL_WARN_IF(
4045  e != CL_SUCCESS || n == 0, "sc.opencl",
4046  "after CL_BUILD_PROGRAM_FAILURE,"
4047  " clGetProgramBuildInfo("
4048  "CL_PROGRAM_BUILD_LOG) fails with " << openclwrapper::errorString(e));
4049  if (e == CL_SUCCESS)
4050  SAL_WARN(
4051  "sc.opencl",
4052  "CL_BUILD_PROGRAM_FAILURE, status " << stat
4053  << ", log \"" << log.data() << "\"");
4054  }
4055  }
4056  }
4057 #endif
4058 #ifdef DBG_UTIL
4059  SAL_WARN("sc.opencl", "Program failed to build, aborting.");
4060  abort(); // make sure errors such as typos don't accidentally go unnoticed
4061 #else
4062  throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
4063 #endif
4064  }
4065  SAL_INFO("sc.opencl", "Built program " << mpProgram);
4066 
4067  // Generate binary out of compiled kernel.
4069  (mKernelSignature + GetMD5()).c_str());
4070  }
4071  lastSecondKernelHash = lastOneKernelHash;
4072  lastSecondProgram = lastOneProgram;
4073  lastOneKernelHash = KernelHash;
4074  lastOneProgram = mpProgram;
4075  }
4076  mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
4077  if (err != CL_SUCCESS)
4078  throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
4079  SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
4080 }
4081 
4082 void DynamicKernel::Launch( size_t nr )
4083 {
4084  OpenCLZone zone;
4087  cl_int err;
4088  // The results
4089  mpResClmem = clCreateBuffer(kEnv.mpkContext,
4090  cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
4091  nr * sizeof(double), nullptr, &err);
4092  if (CL_SUCCESS != err)
4093  throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
4094  SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
4095 
4096  SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem << " (result)");
4097  err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), static_cast<void*>(&mpResClmem));
4098  if (CL_SUCCESS != err)
4099  throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
4100  // The rest of buffers
4101  mSyms.Marshal(mpKernel, nr, mpProgram);
4102  size_t global_work_size[] = { nr };
4103  SAL_INFO("sc.opencl", "Enqueuing kernel " << mpKernel);
4104  err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, nullptr,
4105  global_work_size, nullptr, 0, nullptr, nullptr);
4106  if (CL_SUCCESS != err)
4107  throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
4108  err = clFlush(kEnv.mpkCmdQueue);
4109  if (CL_SUCCESS != err)
4110  throw OpenCLError("clFlush", err, __FILE__, __LINE__);
4111 }
4112 
4113 // Symbol lookup. If there is no such symbol created, allocate one
4114 // kernel with argument with unique name and return so.
4115 // The template argument T must be a subclass of DynamicKernelArgument
4116 template <typename T>
4117 const DynamicKernelArgument* SymbolTable::DeclRefArg(const ScCalcConfig& config,
4118  const FormulaTreeNodeRef& t,
4119  std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize)
4120 {
4121  FormulaToken* ref = t->GetFormulaToken();
4122  ArgumentMap::iterator it = mSymbols.find(ref);
4123  if (it == mSymbols.end())
4124  {
4125  // Allocate new symbols
4126  std::stringstream ss;
4127  ss << "tmp" << mCurId++;
4128  DynamicKernelArgumentRef new_arg = std::make_shared<T>(config, ss.str(), t, std::move(pCodeGen), nResultSize);
4129  mSymbols[ref] = new_arg;
4130  mParams.push_back(new_arg);
4131  return new_arg.get();
4132  }
4133  else
4134  {
4135  return it->second.get();
4136  }
4137 }
4138 
4139 FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() :
4141 
4143 
4145 {
4146  return nullptr;
4147 }
4148 
4149 std::shared_ptr<DynamicKernel> DynamicKernel::create( const ScCalcConfig& rConfig, const ScTokenArray& rCode, int nResultSize )
4150 {
4151  // Constructing "AST"
4152  FormulaTokenIterator aCode(rCode);
4153  std::vector<FormulaToken*> aTokenVector;
4154  std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
4155  FormulaToken* pCur;
4156  while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != nullptr)
4157  {
4158  OpCode eOp = pCur->GetOpCode();
4159  if (eOp != ocPush)
4160  {
4161  FormulaTreeNodeRef pCurNode = std::make_shared<FormulaTreeNode>(pCur);
4163  for (sal_uInt8 i = 0; i < nParamCount; i++)
4164  {
4165  if( aTokenVector.empty())
4166  return nullptr;
4167  FormulaToken* pTempFormula = aTokenVector.back();
4168  aTokenVector.pop_back();
4169  if (pTempFormula->GetOpCode() != ocPush)
4170  {
4171  if (aHashMap.find(pTempFormula) == aHashMap.end())
4172  return nullptr;
4173  pCurNode->Children.push_back(aHashMap[pTempFormula]);
4174  }
4175  else
4176  {
4177  FormulaTreeNodeRef pChildTreeNode =
4178  std::make_shared<FormulaTreeNode>(pTempFormula);
4179  pCurNode->Children.push_back(pChildTreeNode);
4180  }
4181  }
4182  std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
4183  aHashMap[pCur] = pCurNode;
4184  }
4185  aTokenVector.push_back(pCur);
4186  }
4187 
4188  FormulaTreeNodeRef Root = std::make_shared<FormulaTreeNode>(nullptr);
4189  Root->Children.push_back(aHashMap[aTokenVector.back()]);
4190 
4191  auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
4192 
4193  // OpenCL source code generation and kernel compilation
4194  try
4195  {
4196  pDynamicKernel->CodeGen();
4197  pDynamicKernel->CreateKernel();
4198  }
4199  catch (const UnhandledToken& ut)
4200  {
4201  SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4202  return nullptr;
4203  }
4204  catch (const InvalidParameterCount& ipc)
4205  {
4206  SAL_INFO("sc.opencl", "Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
4207  << " at " << ipc.mFile << ":" << ipc.mLineNumber);
4208  return nullptr;
4209  }
4210  catch (const OpenCLError& oce)
4211  {
4212  // I think OpenCLError exceptions are actually exceptional (unexpected), so do use SAL_WARN
4213  // here.
4214  SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4215 
4216  // OpenCLError used to go to the catch-all below, and not delete pDynamicKernel. Was that
4217  // intentional, should we not do it here then either?
4219  return nullptr;
4220  }
4221  catch (const Unhandled& uh)
4222  {
4223  SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
4224 
4225  // Unhandled used to go to the catch-all below, and not delete pDynamicKernel. Was that
4226  // intentional, should we not do it here then either?
4228  return nullptr;
4229  }
4230  catch (...)
4231  {
4232  // FIXME: Do we really want to catch random exceptions here?
4233  SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
4235  return nullptr;
4236  }
4237  return pDynamicKernel;
4238 }
4239 
4240 namespace {
4241 
4242 class CLInterpreterResult
4243 {
4244  DynamicKernel* mpKernel;
4245 
4247 
4248  cl_mem mpCLResBuf;
4249  double* mpResBuf;
4250 
4251 public:
4252  CLInterpreterResult() : mpKernel(nullptr), mnGroupLength(0), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4253  CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
4254  mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
4255 
4256  bool isValid() const { return mpKernel != nullptr; }
4257 
4258  void fetchResultFromKernel()
4259  {
4260  if (!isValid())
4261  return;
4262 
4263  OpenCLZone zone;
4264 
4265  // Map results back
4266  mpCLResBuf = mpKernel->GetResultBuffer();
4267 
4270 
4271  cl_int err;
4272  mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
4273  mpCLResBuf,
4274  CL_TRUE, CL_MAP_READ, 0,
4275  mnGroupLength * sizeof(double), 0, nullptr, nullptr,
4276  &err));
4277 
4278  if (err != CL_SUCCESS)
4279  {
4280  SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << openclwrapper::errorString(err));
4281  mpResBuf = nullptr;
4282  return;
4283  }
4284  SAL_INFO("sc.opencl", "Kernel results: cl_mem: " << mpResBuf << " (" << DebugPeekDoubles(mpResBuf, mnGroupLength) << ")");
4285  }
4286 
4287  bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
4288  {
4289  if (!mpResBuf)
4290  return false;
4291 
4292  OpenCLZone zone;
4293 
4294  rDoc.SetFormulaResults(rTopPos, mpResBuf, mnGroupLength);
4295 
4298 
4299  cl_int err;
4300  err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, nullptr, nullptr);
4301 
4302  if (err != CL_SUCCESS)
4303  {
4304  SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
4305  return false;
4306  }
4307 
4308  return true;
4309  }
4310 };
4311 
4312 class CLInterpreterContext
4313 {
4314  std::shared_ptr<DynamicKernel> mpKernelStore;
4315  DynamicKernel* mpKernel;
4316 
4318 
4319 public:
4320  explicit CLInterpreterContext(SCROW nGroupLength)
4321  : mpKernel(nullptr)
4322  , mnGroupLength(nGroupLength) {}
4323 
4324  bool isValid() const
4325  {
4326  return mpKernel != nullptr;
4327  }
4328 
4329  void setManagedKernel( std::shared_ptr<DynamicKernel> pKernel )
4330  {
4331  mpKernelStore = std::move(pKernel);
4332  mpKernel = mpKernelStore.get();
4333  }
4334 
4335  CLInterpreterResult launchKernel()
4336  {
4337  if (!isValid())
4338  return CLInterpreterResult();
4339 
4340  try
4341  {
4342  // Run the kernel.
4343  mpKernel->Launch(mnGroupLength);
4344  }
4345  catch (const UnhandledToken& ut)
4346  {
4347  SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
4349  return CLInterpreterResult();
4350  }
4351  catch (const OpenCLError& oce)
4352  {
4353  SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
4355  return CLInterpreterResult();
4356  }
4357  catch (const Unhandled& uh)
4358  {
4359  SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
4361  return CLInterpreterResult();
4362  }
4363  catch (...)
4364  {
4365  SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
4367  return CLInterpreterResult();
4368  }
4369 
4370  return CLInterpreterResult(mpKernel, mnGroupLength);
4371  }
4372 };
4373 
4374 
4375 CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
4376  const ScFormulaCellGroupRef& xGroup, const ScTokenArray& rCode )
4377 {
4378  CLInterpreterContext aCxt(xGroup->mnLength);
4379 
4380  aCxt.setManagedKernel(DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
4381 
4382  return aCxt;
4383 }
4384 
4385 void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
4386 {
4387  ScCompiler aComp(&rDoc, rTopPos, rCode, rDoc.GetGrammar());
4388  // Disable special ordering for jump commands for the OpenCL interpreter.
4389  aComp.EnableJumpCommandReorder(false);
4390  aComp.CompileTokenArray(); // Regenerate RPN tokens.
4391 }
4392 
4393 bool waitForResults()
4394 {
4395  OpenCLZone zone;
4398 
4399  cl_int err = clFinish(kEnv.mpkCmdQueue);
4400  if (err != CL_SUCCESS)
4401  SAL_WARN("sc.opencl", "clFinish failed: " << openclwrapper::errorString(err));
4402 
4403  return err == CL_SUCCESS;
4404 }
4405 
4406 }
4407 
4409  const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
4410  ScTokenArray& rCode )
4411 {
4412  MergeCalcConfig(rDoc);
4413 
4414  genRPNTokens(rDoc, rTopPos, rCode);
4415 
4416  if( rCode.GetCodeLen() == 0 )
4417  return false;
4418 
4419  CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
4420  if (!aCxt.isValid())
4421  return false;
4422 
4423  CLInterpreterResult aRes = aCxt.launchKernel();
4424  if (!aRes.isValid())
4425  return false;
4426 
4427  if (!waitForResults())
4428  return false;
4429 
4430  aRes.fetchResultFromKernel();
4431 
4432  return aRes.pushResultToDocument(rDoc, rTopPos);
4433 }
4434 
4435 } // namespace sc::opencl
4436 
4437 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
Matrix data type that can store values of mixed types.
Definition: scmatrix.hxx:113
ocDBCount
bool generatBinFromKernelSource(cl_program program, const char *clFileName)
ocStDevP
ocVarPA
ocSqrt
ocBitAnd
ocPermutationA
ocBinomDist
const size_t count(pCandidateA->getBorderLines().size())
::boost::intrusive_ptr< ScFormulaCellGroup > ScFormulaCellGroupRef
Definition: types.hxx:44
virtual svl::SharedString GetString() const
sal_uInt64 kernelFailures
ocEven
DynamicKernelSlidingArgument< VectorRef > mDoubleArgument
OUString getString() const
std::string mFullProgramSrc
std::shared_ptr< DynamicKernelArgument > DynamicKernelArgumentRef
Definition: opbase.hxx:152
std::string mKernelSignature
ocSumSQ
double mConst
ocArcCotHyp
ocCosecantHyp
void SC_DLLPUBLIC SetFormulaResults(const ScAddress &rTopPos, const double *pResults, size_t nLen)
Set an array of numerical formula results to a group of contiguous formula cells. ...
Definition: documen8.cxx:402
ocVar
const VectorRefArray & GetArray() const
ocPi
ocDevSq
Failed in marshaling.
Definition: opbase.hxx:42
Inconsistent state.
Definition: opbase.hxx:54
ocCos
ocPDuration
cl_program mpArryPrograms[MAX_CLFILE_NUM]
std::set< std::string > inlineDecl
ocPpmt
ocDBMin
ocIntercept
SCROW mnGroupLength
ocDBAverage
ocB
unsigned int mCurId
Abstract base class for vectorised formula group interpreters, plus a global instance factory...
sal_Int64 n
cl_mem mpClmem2
ocCosecant
virtual bool interpret(ScDocument &rDoc, const ScAddress &rTopPos, ScFormulaCellGroupRef &xGroup, ScTokenArray &rCode) override
const std::vector< VectorRefArray > & GetArrays() const
ocDBSum
ocExp
ocSLN
void setKernelEnv(KernelEnv *envInfo)
ocLogNormDist
sal_uInt16 GetCodeLen() const
void MergeCalcConfig(const ScDocument &rDoc)
Merge global and document specific settings.
std::shared_ptr< FormulaTreeNode > FormulaTreeNodeRef
Definition: opbase.hxx:82
virtual double GetDouble() const
ocPV
ocProduct
OpCode GetOpCode() const
SymbolTable mSyms
ocPearson
SC_DLLPUBLIC formula::FormulaGrammar::Grammar GetGrammar() const
Definition: document.hxx:985
(Partially) abstract base class for an operand
Definition: opbase.hxx:102
ocCount
ocRad
ocMin
ocGauss
ocSinHyp
ocCount2
cl_kernel mpKernel
for managed kernel instance.
ocSTEYX
ocIf
ocLogInv
ocSYD
ocFisherInv
tuple log
#define SAL_MAX_UINT32
virtual ScMatrixRef inverseMatrix(const ScMatrix &rMat) override
ocPhi
char sal_uInt16 & nParamCount
Definition: callform.cxx:54
ocDBProduct
ocNegSub
#define REDUCE_THRESHOLD
=1+"1" or =1+"x" give 1
ocRSQ
virtual const OUString & GetExternal() const
const BorderLinePrimitive2D *pCandidateB assert(pCandidateA)
ocBetaDist
ocRound
OpCode
ocSumProduct
ocAbs
ocStDevA
ocForecast
ocWeibull
ocGreater
ocSNormInv
ocIsOdd
ocEqual
::boost::intrusive_ptr< ScMatrix > ScMatrixRef
Definition: types.hxx:26
ocNot
StringConversion meStringConversion
Definition: calcconfig.hxx:54
Reference< deployment::XPackageRegistry > create(Reference< deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, Reference< XComponentContext > const &xComponentContext)
std::string StackVarEnumToString(StackVar const e)
ocCot
ocCorrel
ocBitLshift
ScCalcConfig mCalcConfig
ocDDB
ocVarP
std::shared_ptr< DynamicKernel > mpKernelStore
ocSumIfs
ocArcTanHyp
err
ocFisher
ocSumX2MY2
ocDBMax
ocAverageA
ocPower
ocNormInv
ocBitOr
const formula::DoubleVectorRefToken * mpDVR
Base
ocFTest
ocArcCos
ocLog
int mnResultSize
ocFV
int i
ocSkew
ocNPV
cl_mem mCLMem
ocMul
ocCosHyp
ocArcCosHyp
static DynamicKernelArgumentRef SoPHelper(const ScCalcConfig &config, const std::string &ts, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > pCodeGen, int nResultSize)
ocDBVar
ocPush
std::set< std::string > inlineFun
std::shared_ptr< SlidingFunctionBase > mpCodeGen
std::string mKernelHash
ocSub
ocTDist
#define VR
Definition: xlformula.cxx:62
void EnableJumpCommandReorder(bool bEnable)
ocSecantHyp
ocFloor
ocPoissonDist
ocMIRR
ocSlope
ocExternal
ocAverageIfs
ocMaxA
ocLog10
size
ocCeil
Arguments that are actually compile-time constant string Currently, only the hash is passed...
::boost::spirit::classic::rule< ScannerT > argument
ocMax
SubArgumentsType mvSubArguments
double * mpResBuf
ocInt
bool bIsEndFixed
ocDBStdDevP
ocSkewp
ocSin
tuple index
ArgumentMap mSymbols
ocVLookup
ocCritBinom
ocRate
ocLess
sal_uInt8 GetParamCount() const
exports com.sun.star.chart2. data
XPropertyListType t
bool buildProgramFromBinary(const char *buildOption, GPUEnv *gpuInfo, const char *filename, int idx)
ocNegBinomVert
const double * mpNumericArray
FormulaTreeNodeRef mpRoot
ocSumIf
Configuration options for formula interpreter.
Definition: calcconfig.hxx:43
ocVarA
ocISPMT
ocDBCount2
ocOdd
ocOr
ocBitRshift
static std::shared_ptr< DynamicKernelArgument > VectorRefFactory(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > &pCodeGen, int index)
DynamicKernelStringArgument mStringArgument
ocCotHyp
sal_Int32 SCROW
Definition: types.hxx:18
ocFact
ocCombin
ocAdd
ocRoundUp
ocStDevPA
ocSumX2DY2
ocArcTan
bool bIsStartFixed
ocDB
ocAverage
ocHarMean
#define UNROLLING_FACTOR
ocMod
#define SAL_WARN_IF(condition, area, stream)
rtl_uString ** mpStringArray
unsigned char sal_uInt8
ocRandom
ocDBVarP
ocArcSin
cl_mem mpResClmem
ocAnd
#define SAL_INFO(area, stream)
ocArcCot
ocLn
cl_command_queue mpkCmdQueue
ocStdNormDist
int uniform_int_distribution(int a, int b)
tuple config
ocCountIfs
cl_mem mpCLResBuf
const char * name
ocGammaLn
ocRoundDown
ocRRI
ocBitXor
ocSumXMY2
std::vector< DynamicKernelArgumentRef > mParams
ocStandard
const char *const publicFunc
ocDiv
ocAveDev
ocGamma
ocPow
ocTrunc
cl_program mpProgram
Any result
ocIRR
#define SAL_WARN(area, stream)
ocNormDist
ocSecant
const char * errorString(cl_int nError)
ocPMT
ocDeg
ocFDist
ocPermut
ocTanHyp
sal_Int32 nLength
ocLessEqual
ocConfidence
size_t GetArrayLength() const
size_t GetArrayLength() const
ocIpmt