LibreOffice Module sc (master) 1
formulagroupcl.cxx
Go to the documentation of this file.
1/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
2/*
3 * This file is part of the LibreOffice project.
4 *
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
9
10#include <formulagroup.hxx>
11#include <formulagroupcl.hxx>
12#include <document.hxx>
13#include <formulacell.hxx>
14#include <tokenarray.hxx>
15#include <compiler.hxx>
16#include <comphelper/random.hxx>
17#include <scmatrix.hxx>
18#include <sal/log.hxx>
19
21#include <opencl/OpenCLZone.hxx>
22
23#include "op_financial.hxx"
24#include "op_math.hxx"
25#include "op_logical.hxx"
26#include "op_statistical.hxx"
27#include "op_array.hxx"
28#include "op_spreadsheet.hxx"
29#include "op_addin.hxx"
30
31#include <limits>
32
33#include <com/sun/star/sheet/FormulaLanguage.hpp>
34
35const char* const publicFunc =
36 "\n"
37 "#define IllegalArgument 502\n"
38 "#define IllegalFPOperation 503 // #NUM!\n"
39 "#define NoValue 519 // #VALUE!\n"
40 "#define NoConvergence 523\n"
41 "#define DivisionByZero 532 // #DIV/0!\n"
42 "#define NOTAVAILABLE 0x7fff // #N/A\n"
43 "\n"
44 "double CreateDoubleError(ulong nErr)\n"
45 "{\n"
46 // At least nVidia on Linux and Intel on Windows seem to ignore the argument to nan(),
47 // so using that would not propagate the type of error, work that around
48 // by directly constructing the proper IEEE double NaN value
49 // TODO: maybe use a better way to detect such systems?
50 " return as_double(0x7FF8000000000000+nErr);\n"
51// " return nan(nErr);\n"
52 "}\n"
53 "\n"
54 "double fsum(double a, double b) { return isnan(a)?b:a+b; }\n"
55 "double legalize(double a, double b) { return isnan(a)?b:a;}\n"
56 ;
57
58#include <utility>
59#include <vector>
60#include <map>
61#include <iostream>
62#include <algorithm>
63
64#include <rtl/digest.h>
65
66#include <memory>
67
68using namespace formula;
69
70namespace sc::opencl {
71
72namespace {
73
74std::string linenumberify(const std::string& s)
75{
76 outputstream ss;
77 int linenumber = 1;
78 size_t start = 0;
79 size_t newline;
80 while ((newline = s.find('\n', start)) != std::string::npos)
81 {
82 ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, newline-start+1);
83 start = newline + 1;
84 }
85 if (start < s.size())
86 ss << "/*" << std::setw(4) << linenumber++ << "*/ " << s.substr(start, std::string::npos);
87 return ss.str();
88}
89
90bool AllStringsAreNull(const rtl_uString* const* pStringArray, size_t nLength)
91{
92 if (pStringArray == nullptr)
93 return true;
94
95 for (size_t i = 0; i < nLength; i++)
96 if (pStringArray[i] != nullptr)
97 return false;
98
99 return true;
100}
101
102OUString LimitedString( std::u16string_view str )
103{
104 if( str.size() < 20 )
105 return OUString::Concat("\"") + str + "\"";
106 else
107 return OUString::Concat("\"") + str.substr( 0, 20 ) + "\"...";
108}
109
110const int MAX_PEEK_ELEMENTS = 5;
111// Returns formatted contents of the data (possibly shortened), to be used in debug output.
112std::string DebugPeekData(const FormulaToken* ref, int doubleRefIndex = 0)
113{
115 {
117 static_cast<const formula::SingleVectorRefToken*>(ref);
118 outputstream buf;
119 buf << "SingleRef {";
120 for( size_t i = 0; i < std::min< size_t >( MAX_PEEK_ELEMENTS, pSVR->GetArrayLength()); ++i )
121 {
122 if( i != 0 )
123 buf << ",";
124 if( pSVR->GetArray().mpStringArray != nullptr
125 && pSVR->GetArray().mpStringArray[ i ] != nullptr )
126 {
127 buf << LimitedString( OUString( pSVR->GetArray().mpStringArray[ i ] ));
128 }
129 else if( pSVR->GetArray().mpNumericArray != nullptr )
130 buf << pSVR->GetArray().mpNumericArray[ i ];
131 }
132 if( pSVR->GetArrayLength() > MAX_PEEK_ELEMENTS )
133 buf << ",...";
134 buf << "}";
135 return buf.str();
136 }
137 else if (ref->GetType() == formula::svDoubleVectorRef)
138 {
140 static_cast<const formula::DoubleVectorRefToken*>(ref);
141 outputstream buf;
142 buf << "DoubleRef {";
143 for( size_t i = 0; i < std::min< size_t >( MAX_PEEK_ELEMENTS, pDVR->GetArrayLength()); ++i )
144 {
145 if( i != 0 )
146 buf << ",";
147 if( pDVR->GetArrays()[doubleRefIndex].mpStringArray != nullptr
148 && pDVR->GetArrays()[doubleRefIndex].mpStringArray[ i ] != nullptr )
149 {
150 buf << LimitedString( OUString( pDVR->GetArrays()[doubleRefIndex].mpStringArray[ i ] ));
151 }
152 else if( pDVR->GetArrays()[doubleRefIndex].mpNumericArray != nullptr )
153 buf << pDVR->GetArrays()[doubleRefIndex].mpNumericArray[ i ];
154 }
155 if( pDVR->GetArrayLength() > MAX_PEEK_ELEMENTS )
156 buf << ",...";
157 buf << "}";
158 return buf.str();
159 }
160 else if (ref->GetType() == formula::svString)
161 {
162 outputstream buf;
163 buf << "String " << LimitedString( ref->GetString().getString());
164 return buf.str();
165 }
166 else if (ref->GetType() == formula::svDouble)
167 {
168 return preciseFloat(ref->GetDouble());
169 }
170 else
171 {
172 return "?";
173 }
174}
175
176// Returns formatted contents of a doubles buffer, to be used in debug output.
177std::string DebugPeekDoubles(const double* data, int size)
178{
179 outputstream buf;
180 buf << "{";
181 for( int i = 0; i < std::min( MAX_PEEK_ELEMENTS, size ); ++i )
182 {
183 if( i != 0 )
184 buf << ",";
185 buf << data[ i ];
186 }
187 if( size > MAX_PEEK_ELEMENTS )
188 buf << ",...";
189 buf << "}";
190 return buf.str();
191}
192
193} // anonymous namespace
194
196size_t VectorRef::Marshal( cl_kernel k, int argno, int, cl_program )
197{
198 OpenCLZone zone;
199 FormulaToken* ref = mFormulaTree->GetFormulaToken();
200 double* pHostBuffer = nullptr;
201 size_t szHostBuffer = 0;
203 {
205 static_cast<const formula::SingleVectorRefToken*>(ref);
206
207 SAL_INFO("sc.opencl", "SingleVectorRef len=" << pSVR->GetArrayLength() << " mpNumericArray=" << pSVR->GetArray().mpNumericArray << " (mpStringArray=" << pSVR->GetArray().mpStringArray << ")");
208
209 if( forceStringsToZero && pSVR->GetArray().mpStringArray != nullptr )
210 {
211 dataBuffer.resize( pSVR->GetArrayLength());
212 for( size_t i = 0; i < pSVR->GetArrayLength(); ++i )
213 if( pSVR->GetArray().mpStringArray[ i ] != nullptr )
214 dataBuffer[ i ] = 0;
215 else
216 dataBuffer[ i ] = pSVR->GetArray().mpNumericArray[ i ];
217 pHostBuffer = dataBuffer.data();
218 SAL_INFO("sc.opencl", "Forced strings to zero : " << DebugPeekDoubles( pHostBuffer, pSVR->GetArrayLength()));
219 }
220 else
221 {
222 pHostBuffer = const_cast<double*>(pSVR->GetArray().mpNumericArray);
223 }
224 szHostBuffer = pSVR->GetArrayLength() * sizeof(double);
225 }
226 else if (ref->GetType() == formula::svDoubleVectorRef)
227 {
229 static_cast<const formula::DoubleVectorRefToken*>(ref);
230
231 SAL_INFO("sc.opencl", "DoubleVectorRef index=" << mnIndex << " len=" << pDVR->GetArrayLength() << " mpNumericArray=" << pDVR->GetArrays()[mnIndex].mpNumericArray << " (mpStringArray=" << pDVR->GetArrays()[mnIndex].mpStringArray << ")");
232
233 if( forceStringsToZero && pDVR->GetArrays()[mnIndex].mpStringArray != nullptr )
234 {
235 dataBuffer.resize( pDVR->GetArrayLength());
236 for( size_t i = 0; i < pDVR->GetArrayLength(); ++i )
237 if( pDVR->GetArrays()[mnIndex].mpStringArray[ i ] != nullptr )
238 dataBuffer[ i ] = 0;
239 else
240 dataBuffer[ i ] = pDVR->GetArrays()[mnIndex].mpNumericArray[ i ];
241 pHostBuffer = dataBuffer.data();
242 SAL_INFO("sc.opencl", "Forced strings to zero : " << DebugPeekDoubles( pHostBuffer, pDVR->GetArrayLength()));
243 }
244 else
245 {
246 pHostBuffer = const_cast<double*>(pDVR->GetArrays()[mnIndex].mpNumericArray);
247 }
248 szHostBuffer = pDVR->GetArrayLength() * sizeof(double);
249 }
250 else
251 {
252 throw Unhandled(__FILE__, __LINE__);
253 }
254
257 cl_int err;
258 if (pHostBuffer)
259 {
260 mpClmem = clCreateBuffer(kEnv.mpkContext,
261 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
262 szHostBuffer,
263 pHostBuffer, &err);
264 if (CL_SUCCESS != err)
265 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
266 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer << " using host buffer " << pHostBuffer);
267 }
268 else
269 {
270 if (szHostBuffer == 0)
271 szHostBuffer = sizeof(double); // a dummy small value
272 // Marshal as a buffer of NANs
273 mpClmem = clCreateBuffer(kEnv.mpkContext,
274 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
275 szHostBuffer, nullptr, &err);
276 if (CL_SUCCESS != err)
277 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
278 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
279
280 double* pNanBuffer = static_cast<double*>(clEnqueueMapBuffer(
281 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
282 szHostBuffer, 0, nullptr, nullptr, &err));
283 if (CL_SUCCESS != err)
284 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
285
286 for (size_t i = 0; i < szHostBuffer / sizeof(double); i++)
287 pNanBuffer[i] = std::numeric_limits<double>::quiet_NaN();
288 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
289 pNanBuffer, 0, nullptr, nullptr);
290 // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
291 if (CL_SUCCESS != err)
292 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
293 }
294
295 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem << " (" << DebugPeekData(ref, mnIndex) << ")");
296 err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
297 if (CL_SUCCESS != err)
298 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
299 return 1;
300}
301
302namespace {
303
304class DynamicKernelPiArgument : public DynamicKernelArgument
305{
306public:
307 DynamicKernelPiArgument( const ScCalcConfig& config, const std::string& s,
308 const FormulaTreeNodeRef& ft ) :
311 virtual void GenDecl( outputstream& ss ) const override
312 {
313 ss << "double " << mSymName;
314 }
315 virtual void GenDeclRef( outputstream& ss ) const override
316 {
317 ss << "M_PI";
318 }
319 virtual void GenSlidingWindowDecl( outputstream& ss ) const override
320 {
321 GenDecl(ss);
322 }
323 virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
324 {
325 return mSymName;
326 }
327 virtual size_t GetWindowSize() const override
328 {
329 return 1;
330 }
332 virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
333 {
334 OpenCLZone zone;
335 double tmp = 0.0;
336 // Pass the scalar result back to the rest of the formula kernel
337 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": double: " << preciseFloat( tmp ) << " (PI)");
338 cl_int err = clSetKernelArg(k, argno, sizeof(double), static_cast<void*>(&tmp));
339 if (CL_SUCCESS != err)
340 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
341 return 1;
342 }
343};
344
345class DynamicKernelRandomArgument : public DynamicKernelArgument
346{
347public:
348 DynamicKernelRandomArgument( const ScCalcConfig& config, const std::string& s,
349 const FormulaTreeNodeRef& ft ) :
350 DynamicKernelArgument(config, s, ft) { }
352 virtual void GenDecl( outputstream& ss ) const override
353 {
354 ss << "double " << mSymName;
355 }
356 virtual void GenDeclRef( outputstream& ss ) const override
357 {
358 ss << mSymName;
359 }
360 virtual void GenSlidingWindowDecl( outputstream& ss ) const override
361 {
362 ss << "int " << mSymName;
363 }
364 virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
365 {
366 return mSymName + "_Random(" + mSymName + ")";
367 }
368 virtual void GenSlidingWindowFunction( outputstream& ss ) override
369 {
370 // This string is from the pi_opencl_kernel.i file as
371 // generated when building the Random123 examples. Unused
372 // stuff has been removed, and the actual kernel is not the
373 // same as in the totally different use case of that example,
374 // of course. Only the code that calculates the counter-based
375 // random number and what it needs is left.
376 ss << "\
377\n\
378#ifndef DEFINED_RANDOM123_STUFF\n\
379#define DEFINED_RANDOM123_STUFF\n\
380\n\
381/*\n\
382Copyright 2010-2011, D. E. Shaw Research.\n\
383All rights reserved.\n\
384\n\
385Redistribution and use in source and binary forms, with or without\n\
386modification, are permitted provided that the following conditions are\n\
387met:\n\
388\n\
389* Redistributions of source code must retain the above copyright\n\
390 notice, this list of conditions, and the following disclaimer.\n\
391\n\
392* Redistributions in binary form must reproduce the above copyright\n\
393 notice, this list of conditions, and the following disclaimer in the\n\
394 documentation and/or other materials provided with the distribution.\n\
395\n\
396* Neither the name of D. E. Shaw Research nor the names of its\n\
397 contributors may be used to endorse or promote products derived from\n\
398 this software without specific prior written permission.\n\
399\n\
400THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS\n\
401\"AS IS\" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT\n\
402LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR\n\
403A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT\n\
404OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,\n\
405SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT\n\
406LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,\n\
407DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY\n\
408THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT\n\
409(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\n\
410OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
411*/\n\
412\n\
413typedef uint uint32_t;\n\
414struct r123array2x32\n\
415{\n\
416 uint32_t v[2];\n\
417};\n\
418enum r123_enum_threefry32x2\n\
419{\n\
420 R_32x2_0_0 = 13,\n\
421 R_32x2_1_0 = 15,\n\
422 R_32x2_2_0 = 26,\n\
423 R_32x2_3_0 = 6,\n\
424 R_32x2_4_0 = 17,\n\
425 R_32x2_5_0 = 29,\n\
426 R_32x2_6_0 = 16,\n\
427 R_32x2_7_0 = 24\n\
428};\n\
429inline uint32_t RotL_32 (uint32_t x, unsigned int N)\n\
430 __attribute__ ((always_inline));\n\
431inline uint32_t\n\
432RotL_32 (uint32_t x, unsigned int N)\n\
433{\n\
434 return (x << (N & 31)) | (x >> ((32 - N) & 31));\n\
435}\n\
436\n\
437typedef struct r123array2x32 threefry2x32_ctr_t;\n\
438typedef struct r123array2x32 threefry2x32_key_t;\n\
439typedef struct r123array2x32 threefry2x32_ukey_t;\n\
440inline threefry2x32_key_t\n\
441threefry2x32keyinit (threefry2x32_ukey_t uk)\n\
442{\n\
443 return uk;\n\
444}\n\
445\n\
446inline threefry2x32_ctr_t threefry2x32_R (unsigned int Nrounds,\n\
447 threefry2x32_ctr_t in,\n\
448 threefry2x32_key_t k)\n\
449 __attribute__ ((always_inline));\n\
450inline threefry2x32_ctr_t\n\
451threefry2x32_R (unsigned int Nrounds, threefry2x32_ctr_t in,\n\
452 threefry2x32_key_t k)\n\
453{\n\
454 threefry2x32_ctr_t X;\n\
455 uint32_t ks[2 + 1];\n\
456 int i;\n\
457 ks[2] = 0x1BD11BDA;\n\
458 for (i = 0; i < 2; i++) {\n\
459 ks[i] = k.v[i];\n\
460 X.v[i] = in.v[i];\n\
461 ks[2] ^= k.v[i];\n\
462 }\n\
463 X.v[0] += ks[0];\n\
464 X.v[1] += ks[1];\n\
465 if (Nrounds > 0) {\n\
466 X.v[0] += X.v[1];\n\
467 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
468 X.v[1] ^= X.v[0];\n\
469 }\n\
470 if (Nrounds > 1) {\n\
471 X.v[0] += X.v[1];\n\
472 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
473 X.v[1] ^= X.v[0];\n\
474 }\n\
475 if (Nrounds > 2) {\n\
476 X.v[0] += X.v[1];\n\
477 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
478 X.v[1] ^= X.v[0];\n\
479 }\n\
480 if (Nrounds > 3) {\n\
481 X.v[0] += X.v[1];\n\
482 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
483 X.v[1] ^= X.v[0];\n\
484 }\n\
485 if (Nrounds > 3) {\n\
486 X.v[0] += ks[1];\n\
487 X.v[1] += ks[2];\n\
488 X.v[1] += 1;\n\
489 }\n\
490 if (Nrounds > 4) {\n\
491 X.v[0] += X.v[1];\n\
492 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
493 X.v[1] ^= X.v[0];\n\
494 }\n\
495 if (Nrounds > 5) {\n\
496 X.v[0] += X.v[1];\n\
497 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
498 X.v[1] ^= X.v[0];\n\
499 }\n\
500 if (Nrounds > 6) {\n\
501 X.v[0] += X.v[1];\n\
502 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
503 X.v[1] ^= X.v[0];\n\
504 }\n\
505 if (Nrounds > 7) {\n\
506 X.v[0] += X.v[1];\n\
507 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
508 X.v[1] ^= X.v[0];\n\
509 }\n\
510 if (Nrounds > 7) {\n\
511 X.v[0] += ks[2];\n\
512 X.v[1] += ks[0];\n\
513 X.v[1] += 2;\n\
514 }\n\
515 if (Nrounds > 8) {\n\
516 X.v[0] += X.v[1];\n\
517 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
518 X.v[1] ^= X.v[0];\n\
519 }\n\
520 if (Nrounds > 9) {\n\
521 X.v[0] += X.v[1];\n\
522 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
523 X.v[1] ^= X.v[0];\n\
524 }\n\
525 if (Nrounds > 10) {\n\
526 X.v[0] += X.v[1];\n\
527 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
528 X.v[1] ^= X.v[0];\n\
529 }\n\
530 if (Nrounds > 11) {\n\
531 X.v[0] += X.v[1];\n\
532 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
533 X.v[1] ^= X.v[0];\n\
534 }\n\
535 if (Nrounds > 11) {\n\
536 X.v[0] += ks[0];\n\
537 X.v[1] += ks[1];\n\
538 X.v[1] += 3;\n\
539 }\n\
540 if (Nrounds > 12) {\n\
541 X.v[0] += X.v[1];\n\
542 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
543 X.v[1] ^= X.v[0];\n\
544 }\n\
545 if (Nrounds > 13) {\n\
546 X.v[0] += X.v[1];\n\
547 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
548 X.v[1] ^= X.v[0];\n\
549 }\n\
550 if (Nrounds > 14) {\n\
551 X.v[0] += X.v[1];\n\
552 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
553 X.v[1] ^= X.v[0];\n\
554 }\n\
555 if (Nrounds > 15) {\n\
556 X.v[0] += X.v[1];\n\
557 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
558 X.v[1] ^= X.v[0];\n\
559 }\n\
560 if (Nrounds > 15) {\n\
561 X.v[0] += ks[1];\n\
562 X.v[1] += ks[2];\n\
563 X.v[1] += 4;\n\
564 }\n\
565 if (Nrounds > 16) {\n\
566 X.v[0] += X.v[1];\n\
567 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
568 X.v[1] ^= X.v[0];\n\
569 }\n\
570 if (Nrounds > 17) {\n\
571 X.v[0] += X.v[1];\n\
572 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
573 X.v[1] ^= X.v[0];\n\
574 }\n\
575 if (Nrounds > 18) {\n\
576 X.v[0] += X.v[1];\n\
577 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
578 X.v[1] ^= X.v[0];\n\
579 }\n\
580 if (Nrounds > 19) {\n\
581 X.v[0] += X.v[1];\n\
582 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
583 X.v[1] ^= X.v[0];\n\
584 }\n\
585 if (Nrounds > 19) {\n\
586 X.v[0] += ks[2];\n\
587 X.v[1] += ks[0];\n\
588 X.v[1] += 5;\n\
589 }\n\
590 if (Nrounds > 20) {\n\
591 X.v[0] += X.v[1];\n\
592 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
593 X.v[1] ^= X.v[0];\n\
594 }\n\
595 if (Nrounds > 21) {\n\
596 X.v[0] += X.v[1];\n\
597 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
598 X.v[1] ^= X.v[0];\n\
599 }\n\
600 if (Nrounds > 22) {\n\
601 X.v[0] += X.v[1];\n\
602 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
603 X.v[1] ^= X.v[0];\n\
604 }\n\
605 if (Nrounds > 23) {\n\
606 X.v[0] += X.v[1];\n\
607 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
608 X.v[1] ^= X.v[0];\n\
609 }\n\
610 if (Nrounds > 23) {\n\
611 X.v[0] += ks[0];\n\
612 X.v[1] += ks[1];\n\
613 X.v[1] += 6;\n\
614 }\n\
615 if (Nrounds > 24) {\n\
616 X.v[0] += X.v[1];\n\
617 X.v[1] = RotL_32 (X.v[1], R_32x2_0_0);\n\
618 X.v[1] ^= X.v[0];\n\
619 }\n\
620 if (Nrounds > 25) {\n\
621 X.v[0] += X.v[1];\n\
622 X.v[1] = RotL_32 (X.v[1], R_32x2_1_0);\n\
623 X.v[1] ^= X.v[0];\n\
624 }\n\
625 if (Nrounds > 26) {\n\
626 X.v[0] += X.v[1];\n\
627 X.v[1] = RotL_32 (X.v[1], R_32x2_2_0);\n\
628 X.v[1] ^= X.v[0];\n\
629 }\n\
630 if (Nrounds > 27) {\n\
631 X.v[0] += X.v[1];\n\
632 X.v[1] = RotL_32 (X.v[1], R_32x2_3_0);\n\
633 X.v[1] ^= X.v[0];\n\
634 }\n\
635 if (Nrounds > 27) {\n\
636 X.v[0] += ks[1];\n\
637 X.v[1] += ks[2];\n\
638 X.v[1] += 7;\n\
639 }\n\
640 if (Nrounds > 28) {\n\
641 X.v[0] += X.v[1];\n\
642 X.v[1] = RotL_32 (X.v[1], R_32x2_4_0);\n\
643 X.v[1] ^= X.v[0];\n\
644 }\n\
645 if (Nrounds > 29) {\n\
646 X.v[0] += X.v[1];\n\
647 X.v[1] = RotL_32 (X.v[1], R_32x2_5_0);\n\
648 X.v[1] ^= X.v[0];\n\
649 }\n\
650 if (Nrounds > 30) {\n\
651 X.v[0] += X.v[1];\n\
652 X.v[1] = RotL_32 (X.v[1], R_32x2_6_0);\n\
653 X.v[1] ^= X.v[0];\n\
654 }\n\
655 if (Nrounds > 31) {\n\
656 X.v[0] += X.v[1];\n\
657 X.v[1] = RotL_32 (X.v[1], R_32x2_7_0);\n\
658 X.v[1] ^= X.v[0];\n\
659 }\n\
660 if (Nrounds > 31) {\n\
661 X.v[0] += ks[2];\n\
662 X.v[1] += ks[0];\n\
663 X.v[1] += 8;\n\
664 }\n\
665 return X;\n\
666}\n\
667\n\
668enum r123_enum_threefry2x32\n\
669{ threefry2x32_rounds = 20 };\n\
670inline threefry2x32_ctr_t threefry2x32 (threefry2x32_ctr_t in,\n\
671 threefry2x32_key_t k)\n\
672 __attribute__ ((always_inline));\n\
673inline threefry2x32_ctr_t\n\
674threefry2x32 (threefry2x32_ctr_t in, threefry2x32_key_t k)\n\
675{\n\
676 return threefry2x32_R (threefry2x32_rounds, in, k);\n\
677}\n\
678#endif\n\
679\n\
680";
681 ss << "double " << mSymName << "_Random (int seed)\n\
682{\n\
683 unsigned tid = get_global_id(0);\n\
684 threefry2x32_key_t k = { {tid, 0xdecafbad} };\n\
685 threefry2x32_ctr_t c = { {seed, 0xf00dcafe} };\n\
686 c = threefry2x32_R(threefry2x32_rounds, c, k);\n\
687 const double factor = 1./(" << SAL_MAX_UINT32 << ".0 + 1.0);\n\
688 const double halffactor = 0.5*factor;\n\
689 return c.v[0] * factor + halffactor;\n\
690}\n\
691";
692 }
693 virtual size_t GetWindowSize() const override
694 {
695 return 1;
696 }
698 virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
699 {
700 OpenCLZone zone;
701 cl_int seed = comphelper::rng::uniform_int_distribution(0, SAL_MAX_INT32);
702 // Pass the scalar result back to the rest of the formula kernel
703 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_int: " << seed << "(RANDOM)");
704 cl_int err = clSetKernelArg(k, argno, sizeof(cl_int), static_cast<void*>(&seed));
705 if (CL_SUCCESS != err)
706 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
707 return 1;
708 }
709};
710
711// Arguments that are actually compile-time constant string
712class ConstStringArgument : public DynamicKernelArgument
713{
714public:
715 ConstStringArgument( const ScCalcConfig& config, const std::string& s,
716 const FormulaTreeNodeRef& ft ) :
717 DynamicKernelArgument(config, s, ft) { }
719 virtual void GenDecl( outputstream& ss ) const override
720 {
721 ss << "double " << mSymName;
722 }
723 virtual void GenDeclRef( outputstream& ss ) const override
724 {
725 ss << GenSlidingWindowDeclRef();
726 }
727 virtual void GenSlidingWindowDecl( outputstream& ss ) const override
728 {
729 GenDecl(ss);
730 }
731 virtual std::string GenSlidingWindowDeclRef( bool = false ) const override
732 {
733 outputstream ss;
734 if (GetFormulaToken()->GetType() != formula::svString)
735 throw Unhandled(__FILE__, __LINE__);
736 FormulaToken* Tok = GetFormulaToken();
737 ss << GetStringId(Tok->GetString().getData());
738 return ss.str();
739 }
740 virtual std::string GenIsString( bool = false ) const override
741 {
742 return "true";
743 }
744 virtual size_t GetWindowSize() const override
745 {
746 return 1;
747 }
748 virtual size_t Marshal( cl_kernel k, int argno, int, cl_program ) override
749 {
750 FormulaToken* ref = mFormulaTree->GetFormulaToken();
751 if (ref->GetType() != formula::svString)
752 {
753 throw Unhandled(__FILE__, __LINE__);
754 }
755 cl_double stringId = GetStringId(ref->GetString().getData());
756
757 // Pass the scalar result back to the rest of the formula kernel
758 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno
759 << ": stringId: " << stringId << " (" << DebugPeekData(ref) << ")" );
760 cl_int err = clSetKernelArg(k, argno, sizeof(cl_double), static_cast<void*>(&stringId));
761 if (CL_SUCCESS != err)
762 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
763 return 1;
764 }
765};
766
767} // namespace
768
769// Marshal a string vector reference
770size_t DynamicKernelStringArgument::Marshal( cl_kernel k, int argno, int, cl_program )
771{
772 OpenCLZone zone;
773 FormulaToken* ref = mFormulaTree->GetFormulaToken();
774
777 cl_int err;
779 size_t nStrings = 0;
781 {
783 static_cast<const formula::SingleVectorRefToken*>(ref);
784 nStrings = pSVR->GetArrayLength();
785 vRef = pSVR->GetArray();
786 }
787 else if (ref->GetType() == formula::svDoubleVectorRef)
788 {
790 static_cast<const formula::DoubleVectorRefToken*>(ref);
791 nStrings = pDVR->GetArrayLength();
792 vRef = pDVR->GetArrays()[mnIndex];
793 }
794 size_t szHostBuffer = nStrings * sizeof(cl_double);
795 cl_double* pStringIdsBuffer = nullptr;
796
797 if (vRef.mpStringArray != nullptr)
798 {
799 // Marshal strings. See GetStringId().
800 mpClmem = clCreateBuffer(kEnv.mpkContext,
801 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
802 szHostBuffer, nullptr, &err);
803 if (CL_SUCCESS != err)
804 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
805 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
806
807 pStringIdsBuffer = static_cast<cl_double*>(clEnqueueMapBuffer(
808 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
809 szHostBuffer, 0, nullptr, nullptr, &err));
810 if (CL_SUCCESS != err)
811 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
812
813 for (size_t i = 0; i < nStrings; i++)
814 {
815 if (vRef.mpStringArray[i])
816 pStringIdsBuffer[i] = GetStringId(vRef.mpStringArray[i]);
817 else
818 rtl::math::setNan(&pStringIdsBuffer[i]);
819 }
820 }
821 else
822 {
823 if (nStrings == 0)
824 szHostBuffer = sizeof(cl_double); // a dummy small value
825 // Marshal as a buffer of NANs
826 mpClmem = clCreateBuffer(kEnv.mpkContext,
827 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_ALLOC_HOST_PTR,
828 szHostBuffer, nullptr, &err);
829 if (CL_SUCCESS != err)
830 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
831 SAL_INFO("sc.opencl", "Created buffer " << mpClmem << " size " << szHostBuffer);
832
833 pStringIdsBuffer = static_cast<cl_double*>(clEnqueueMapBuffer(
834 kEnv.mpkCmdQueue, mpClmem, CL_TRUE, CL_MAP_WRITE, 0,
835 szHostBuffer, 0, nullptr, nullptr, &err));
836 if (CL_SUCCESS != err)
837 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
838
839 for (size_t i = 0; i < szHostBuffer / sizeof(cl_double); i++)
840 rtl::math::setNan(&pStringIdsBuffer[i]);
841 }
842 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem,
843 pStringIdsBuffer, 0, nullptr, nullptr);
844 if (CL_SUCCESS != err)
845 throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
846
847 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem
848 << " (stringIds: " << DebugPeekDoubles(pStringIdsBuffer, nStrings) << " "
849 << DebugPeekData(ref,mnIndex) << ")");
850 err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem));
851 if (CL_SUCCESS != err)
852 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
853 return 1;
854}
855
856std::string DynamicKernelStringArgument::GenIsString( bool nested ) const
857{
858 if( nested )
859 return "!isnan(" + mSymName + "[gid0])";
860 FormulaToken* ref = mFormulaTree->GetFormulaToken();
861 size_t nStrings = 0;
863 {
865 static_cast<const formula::SingleVectorRefToken*>(ref);
866 nStrings = pSVR->GetArrayLength();
867 }
868 else if (ref->GetType() == formula::svDoubleVectorRef)
869 {
871 static_cast<const formula::DoubleVectorRefToken*>(ref);
872 nStrings = pDVR->GetArrayLength();
873 }
874 else
875 return "!isnan(" + mSymName + "[gid0])";
876 outputstream ss;
877 ss << "(gid0 < " << nStrings << "? !isnan(" << mSymName << "[gid0]):NAN)";
878 return ss.str();
879}
880
881namespace {
882
884class DynamicKernelMixedArgument : public VectorRef
885{
886public:
887 DynamicKernelMixedArgument( const ScCalcConfig& config, const std::string& s,
888 const FormulaTreeNodeRef& ft ) :
889 VectorRef(config, s, ft), mStringArgument(config, s + "s", ft) { }
890 virtual void GenSlidingWindowDecl( outputstream& ss ) const override
891 {
893 ss << ", ";
894 mStringArgument.GenSlidingWindowDecl(ss);
895 }
896 virtual void GenSlidingWindowFunction( outputstream& ) override { }
898 virtual void GenDecl( outputstream& ss ) const override
899 {
901 ss << ", ";
902 mStringArgument.GenDecl(ss);
903 }
904 virtual void GenDeclRef( outputstream& ss ) const override
905 {
907 ss << ",";
908 mStringArgument.GenDeclRef(ss);
909 }
910 virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
911 {
912 outputstream ss;
913 ss << "(!isnan(" << VectorRef::GenSlidingWindowDeclRef(nested);
914 ss << ")?" << VectorRef::GenSlidingWindowDeclRef(nested);
915 ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
916 ss << ")";
917 return ss.str();
918 }
919 virtual std::string GenDoubleSlidingWindowDeclRef( bool nested = false ) const override
920 {
921 outputstream ss;
923 return ss.str();
924 }
925 virtual std::string GenStringSlidingWindowDeclRef( bool nested = false ) const override
926 {
927 outputstream ss;
928 ss << mStringArgument.GenSlidingWindowDeclRef( nested );
929 return ss.str();
930 }
931 virtual std::string GenIsString( bool nested = false ) const override
932 {
933 return mStringArgument.GenIsString( nested );
934 }
935 virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
936 {
937 int i = VectorRef::Marshal(k, argno, vw, p);
938 i += mStringArgument.Marshal(k, argno + i, vw, p);
939 return i;
940 }
941
942protected:
943 DynamicKernelStringArgument mStringArgument;
944};
945
946}
947
948template<class Base>
950 const ScCalcConfig& config, const std::string& s, const FormulaTreeNodeRef& ft,
951 std::shared_ptr<SlidingFunctionBase> CodeGen, int index)
952 : Base(config, s, ft, index)
953 , mpCodeGen(std::move(CodeGen))
954{
955 FormulaToken* t = ft->GetFormulaToken();
956 if (t->GetType() != formula::svDoubleVectorRef)
957 throw Unhandled(__FILE__, __LINE__);
958 mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
961}
962
963template<class Base>
965{
966 assert(dynamic_cast<OpSumIfs*>(mpCodeGen.get()));
967 return GetWindowSize() > 100 &&
968 ((GetStartFixed() && GetEndFixed()) ||
969 (!GetStartFixed() && !GetEndFixed()));
970}
971
972template<class Base>
974{
975 size_t nArrayLength = mpDVR->GetArrayLength();
976 outputstream ss;
977 if (!bIsStartFixed && !bIsEndFixed)
978 {
979 if (!nested)
980 ss << "((i+gid0) <" << nArrayLength << "?";
981 ss << Base::GetName() << "[i + gid0]";
982 if (!nested)
983 ss << ":NAN)";
984 }
985 else
986 {
987 if (!nested)
988 ss << "(i <" << nArrayLength << "?";
989 ss << Base::GetName() << "[i]";
990 if (!nested)
991 ss << ":NAN)";
992 }
993 return ss.str();
994}
995
996template<class Base>
998{
999 assert(mpDVR);
1000 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1001
1002 if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1003 {
1004 ss << "for (int i = ";
1005 ss << "gid0; i < " << mpDVR->GetArrayLength();
1006 ss << " && i < " << nCurWindowSize << "; i++){\n\t\t";
1007 needBody = true;
1008 return nCurWindowSize;
1009 }
1010 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1011 {
1012 ss << "for (int i = ";
1013 ss << "0; i < " << mpDVR->GetArrayLength();
1014 ss << " && i < gid0+" << nCurWindowSize << "; i++){\n\t\t";
1015 needBody = true;
1016 return nCurWindowSize;
1017 }
1018 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1019 {
1020 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1021 ss << "{int i;\n\t";
1022 outputstream temp1, temp2;
1023 int outLoopSize = UNROLLING_FACTOR;
1024 if (nCurWindowSize / outLoopSize != 0)
1025 {
1026 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1027 for (int count = 0; count < outLoopSize; count++)
1028 {
1029 ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1030 if (count == 0)
1031 {
1032 temp1 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1033 temp1 << "){\n\t\t";
1034 temp1 << "tmp = legalize(";
1035 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1036 temp1 << ", tmp);\n\t\t\t";
1037 temp1 << "}\n\t";
1038 }
1039 ss << temp1.str();
1040 }
1041 ss << "}\n\t";
1042 }
1043 // The residual of mod outLoopSize
1044 for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1045 {
1046 ss << "i = " << count << ";\n\t";
1047 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1048 {
1049 temp2 << "if(i + gid0 < " << mpDVR->GetArrayLength();
1050 temp2 << "){\n\t\t";
1051 temp2 << "tmp = legalize(";
1052 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1053 temp2 << ", tmp);\n\t\t\t";
1054 temp2 << "}\n\t";
1055 }
1056 ss << temp2.str();
1057 }
1058 ss << "}\n";
1059 needBody = false;
1060 return nCurWindowSize;
1061 }
1062 // (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1063 else
1064 {
1065 ss << "\n\t";
1066 ss << "tmpBottom = " << mpCodeGen->GetBottom() << ";\n\t";
1067 ss << "{int i;\n\t";
1068 outputstream temp1, temp2;
1069 int outLoopSize = UNROLLING_FACTOR;
1070 if (nCurWindowSize / outLoopSize != 0)
1071 {
1072 ss << "for(int outLoop=0; outLoop<" << nCurWindowSize / outLoopSize << "; outLoop++){\n\t";
1073 for (int count = 0; count < outLoopSize; count++)
1074 {
1075 ss << "i = outLoop*" << outLoopSize << "+" << count << ";\n\t";
1076 if (count == 0)
1077 {
1078 temp1 << "if(i < " << mpDVR->GetArrayLength();
1079 temp1 << "){\n\t\t";
1080 temp1 << "tmp = legalize(";
1081 temp1 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1082 temp1 << ", tmp);\n\t\t\t";
1083 temp1 << "}\n\t";
1084 }
1085 ss << temp1.str();
1086 }
1087 ss << "}\n\t";
1088 }
1089 // The residual of mod outLoopSize
1090 for (size_t count = nCurWindowSize / outLoopSize * outLoopSize; count < nCurWindowSize; count++)
1091 {
1092 ss << "i = " << count << ";\n\t";
1093 if (count == nCurWindowSize / outLoopSize * outLoopSize)
1094 {
1095 temp2 << "if(i < " << mpDVR->GetArrayLength();
1096 temp2 << "){\n\t\t";
1097 temp2 << "tmp = legalize(";
1098 temp2 << mpCodeGen->Gen2(GenSlidingWindowDeclRef(), "tmp");
1099 temp2 << ", tmp);\n\t\t\t";
1100 temp2 << "}\n\t";
1101 }
1102 ss << temp2.str();
1103 }
1104 ss << "}\n";
1105 needBody = false;
1106 return nCurWindowSize;
1107 }
1108}
1109
1113
1114namespace {
1115
1117class DynamicKernelMixedSlidingArgument : public VectorRef
1118{
1119public:
1120 DynamicKernelMixedSlidingArgument( const ScCalcConfig& config, const std::string& s,
1121 const FormulaTreeNodeRef& ft, const std::shared_ptr<SlidingFunctionBase>& CodeGen,
1122 int index ) :
1123 VectorRef(config, s, ft),
1124 mDoubleArgument(mCalcConfig, s, ft, CodeGen, index),
1125 mStringArgument(mCalcConfig, s + "s", ft, CodeGen, index) { }
1126 virtual void GenSlidingWindowDecl( outputstream& ss ) const override
1127 {
1128 mDoubleArgument.GenSlidingWindowDecl(ss);
1129 ss << ", ";
1131 }
1132 virtual void GenSlidingWindowFunction( outputstream& ) override { }
1134 virtual void GenDecl( outputstream& ss ) const override
1135 {
1136 mDoubleArgument.GenDecl(ss);
1137 ss << ", ";
1139 }
1140 virtual void GenDeclRef( outputstream& ss ) const override
1141 {
1142 mDoubleArgument.GenDeclRef(ss);
1143 ss << ",";
1145 }
1146 virtual std::string GenSlidingWindowDeclRef( bool nested ) const override
1147 {
1148 outputstream ss;
1149 ss << "(!isnan(" << mDoubleArgument.GenSlidingWindowDeclRef(nested);
1150 ss << ")?" << mDoubleArgument.GenSlidingWindowDeclRef(nested);
1151 ss << ":" << mStringArgument.GenSlidingWindowDeclRef(nested);
1152 ss << ")";
1153 return ss.str();
1154 }
1155 virtual std::string GenDoubleSlidingWindowDeclRef( bool = false ) const override
1156 {
1157 outputstream ss;
1158 ss << mDoubleArgument.GenSlidingWindowDeclRef();
1159 return ss.str();
1160 }
1161 virtual std::string GenStringSlidingWindowDeclRef( bool = false ) const override
1162 {
1163 outputstream ss;
1165 return ss.str();
1166 }
1167 virtual size_t Marshal( cl_kernel k, int argno, int vw, cl_program p ) override
1168 {
1169 int i = mDoubleArgument.Marshal(k, argno, vw, p);
1170 i += mStringArgument.Marshal(k, argno + i, vw, p);
1171 return i;
1172 }
1173
1174protected:
1175 DynamicKernelSlidingArgument<VectorRef> mDoubleArgument;
1176 DynamicKernelSlidingArgument<DynamicKernelStringArgument> mStringArgument;
1177};
1178
1180class SymbolTable
1181{
1182public:
1183 typedef std::map<const formula::FormulaToken*, DynamicKernelArgumentRef> ArgumentMap;
1184 // This avoids instability caused by using pointer as the key type
1185 SymbolTable() : mCurId(0) { }
1186 template <class T>
1187 const DynamicKernelArgument* DeclRefArg(const ScCalcConfig& config, const FormulaTreeNodeRef&,
1188 std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize);
1190 void DumpSlidingWindowFunctions( outputstream& ss )
1191 {
1192 for (auto const& argument : mParams)
1193 {
1194 argument->GenSlidingWindowFunction(ss);
1195 ss << "\n";
1196 }
1197 }
1200 void Marshal( cl_kernel, int, cl_program );
1201
1202private:
1203 unsigned int mCurId;
1204 ArgumentMap mSymbols;
1205 std::vector<DynamicKernelArgumentRef> mParams;
1206};
1207
1208void SymbolTable::Marshal( cl_kernel k, int nVectorWidth, cl_program pProgram )
1209{
1210 int i = 1; //The first argument is reserved for results
1211 for (auto const& argument : mParams)
1212 {
1213 i += argument->Marshal(k, i, nVectorWidth, pProgram);
1214 }
1215}
1216
1217}
1218
1219template<class Base>
1221 const ScCalcConfig& config, const std::string& s, const FormulaTreeNodeRef& ft,
1222 std::shared_ptr<SlidingFunctionBase> CodeGen, int index)
1223 : Base(config, s, ft, index)
1224 , mpCodeGen(std::move(CodeGen))
1225 , mpClmem2(nullptr)
1226{
1227 FormulaToken* t = ft->GetFormulaToken();
1228 if (t->GetType() != formula::svDoubleVectorRef)
1229 throw Unhandled(__FILE__, __LINE__);
1230 mpDVR = static_cast<const formula::DoubleVectorRefToken*>(t);
1233}
1234
1235template<class Base>
1237{
1238 if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1239 {
1240 std::string name = Base::GetName();
1241 ss << "__kernel void " << name;
1242 ss << "_reduction(__global double* A, "
1243 "__global double *result,int arrayLength,int windowSize){\n";
1244 ss << " double tmp, current_result =" <<
1245 mpCodeGen->GetBottom();
1246 ss << ";\n";
1247 ss << " int writePos = get_group_id(1);\n";
1248 ss << " int lidx = get_local_id(0);\n";
1249 ss << " __local double shm_buf[256];\n";
1250 if (mpDVR->IsStartFixed())
1251 ss << " int offset = 0;\n";
1252 else // if (!mpDVR->IsStartFixed())
1253 ss << " int offset = get_group_id(1);\n";
1254 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1255 ss << " int end = windowSize;\n";
1256 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1257 ss << " int end = offset + windowSize;\n";
1258 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1259 ss << " int end = windowSize + get_group_id(1);\n";
1260 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1261 ss << " int end = windowSize;\n";
1262 ss << " end = min(end, arrayLength);\n";
1263
1264 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1265 ss << " int loop = arrayLength/512 + 1;\n";
1266 ss << " for (int l=0; l<loop; l++){\n";
1267 ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1268 ss << " int loopOffset = l*512;\n";
1269 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1270 ss << " tmp = legalize(" << mpCodeGen->Gen2(
1271 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1272 ss << " tmp = legalize(" << mpCodeGen->Gen2(
1273 "A[loopOffset + lidx + offset + 256]", "tmp") << ", tmp);\n";
1274 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1275 ss << " tmp = legalize(" << mpCodeGen->Gen2(
1276 "A[loopOffset + lidx + offset]", "tmp") << ", tmp);\n";
1277 ss << " shm_buf[lidx] = tmp;\n";
1278 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1279 ss << " for (int i = 128; i >0; i/=2) {\n";
1280 ss << " if (lidx < i)\n";
1281 ss << " shm_buf[lidx] = ";
1282 // Special case count
1283 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1284 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1285 else
1286 ss << mpCodeGen->Gen2("shm_buf[lidx]", "shm_buf[lidx + i]") << ";\n";
1287 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1288 ss << " }\n";
1289 ss << " if (lidx == 0)\n";
1290 ss << " current_result =";
1291 if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1292 ss << "current_result + shm_buf[0]";
1293 else
1294 ss << mpCodeGen->Gen2("current_result", "shm_buf[0]");
1295 ss << ";\n";
1296 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1297 ss << " }\n";
1298 ss << " if (lidx == 0)\n";
1299 ss << " result[writePos] = current_result;\n";
1300 ss << "}\n";
1301 }
1302 else
1303 {
1304 std::string name = Base::GetName();
1305 /*sum reduction*/
1306 ss << "__kernel void " << name << "_sum";
1307 ss << "_reduction(__global double* A, "
1308 "__global double *result,int arrayLength,int windowSize){\n";
1309 ss << " double tmp, current_result =" <<
1310 mpCodeGen->GetBottom();
1311 ss << ";\n";
1312 ss << " int writePos = get_group_id(1);\n";
1313 ss << " int lidx = get_local_id(0);\n";
1314 ss << " __local double shm_buf[256];\n";
1315 if (mpDVR->IsStartFixed())
1316 ss << " int offset = 0;\n";
1317 else // if (!mpDVR->IsStartFixed())
1318 ss << " int offset = get_group_id(1);\n";
1319 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1320 ss << " int end = windowSize;\n";
1321 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1322 ss << " int end = offset + windowSize;\n";
1323 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1324 ss << " int end = windowSize + get_group_id(1);\n";
1325 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1326 ss << " int end = windowSize;\n";
1327 ss << " end = min(end, arrayLength);\n";
1328 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1329 ss << " int loop = arrayLength/512 + 1;\n";
1330 ss << " for (int l=0; l<loop; l++){\n";
1331 ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1332 ss << " int loopOffset = l*512;\n";
1333 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1334 ss << " tmp = legalize(";
1335 ss << "(A[loopOffset + lidx + offset]+ tmp)";
1336 ss << ", tmp);\n";
1337 ss << " tmp = legalize((A[loopOffset + lidx + offset + 256]+ tmp)";
1338 ss << ", tmp);\n";
1339 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1340 ss << " tmp = legalize((A[loopOffset + lidx + offset] + tmp)";
1341 ss << ", tmp);\n";
1342 ss << " shm_buf[lidx] = tmp;\n";
1343 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1344 ss << " for (int i = 128; i >0; i/=2) {\n";
1345 ss << " if (lidx < i)\n";
1346 ss << " shm_buf[lidx] = ";
1347 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1348 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1349 ss << " }\n";
1350 ss << " if (lidx == 0)\n";
1351 ss << " current_result =";
1352 ss << "current_result + shm_buf[0]";
1353 ss << ";\n";
1354 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1355 ss << " }\n";
1356 ss << " if (lidx == 0)\n";
1357 ss << " result[writePos] = current_result;\n";
1358 ss << "}\n";
1359 /*count reduction*/
1360 ss << "__kernel void " << name << "_count";
1361 ss << "_reduction(__global double* A, "
1362 "__global double *result,int arrayLength,int windowSize){\n";
1363 ss << " double tmp, current_result =" <<
1364 mpCodeGen->GetBottom();
1365 ss << ";\n";
1366 ss << " int writePos = get_group_id(1);\n";
1367 ss << " int lidx = get_local_id(0);\n";
1368 ss << " __local double shm_buf[256];\n";
1369 if (mpDVR->IsStartFixed())
1370 ss << " int offset = 0;\n";
1371 else // if (!mpDVR->IsStartFixed())
1372 ss << " int offset = get_group_id(1);\n";
1373 if (mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1374 ss << " int end = windowSize;\n";
1375 else if (!mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1376 ss << " int end = offset + windowSize;\n";
1377 else if (mpDVR->IsStartFixed() && !mpDVR->IsEndFixed())
1378 ss << " int end = windowSize + get_group_id(1);\n";
1379 else if (!mpDVR->IsStartFixed() && mpDVR->IsEndFixed())
1380 ss << " int end = windowSize;\n";
1381 ss << " end = min(end, arrayLength);\n";
1382 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1383 ss << " int loop = arrayLength/512 + 1;\n";
1384 ss << " for (int l=0; l<loop; l++){\n";
1385 ss << " tmp = " << mpCodeGen->GetBottom() << ";\n";
1386 ss << " int loopOffset = l*512;\n";
1387 ss << " if((loopOffset + lidx + offset + 256) < end) {\n";
1388 ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1389 ss << ", tmp);\n";
1390 ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset+256])?tmp:tmp+1.0)";
1391 ss << ", tmp);\n";
1392 ss << " } else if ((loopOffset + lidx + offset) < end)\n";
1393 ss << " tmp = legalize((isnan(A[loopOffset + lidx + offset])?tmp:tmp+1.0)";
1394 ss << ", tmp);\n";
1395 ss << " shm_buf[lidx] = tmp;\n";
1396 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1397 ss << " for (int i = 128; i >0; i/=2) {\n";
1398 ss << " if (lidx < i)\n";
1399 ss << " shm_buf[lidx] = ";
1400 ss << "shm_buf[lidx] + shm_buf[lidx + i];\n";
1401 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1402 ss << " }\n";
1403 ss << " if (lidx == 0)\n";
1404 ss << " current_result =";
1405 ss << "current_result + shm_buf[0];";
1406 ss << ";\n";
1407 ss << " barrier(CLK_LOCAL_MEM_FENCE);\n";
1408 ss << " }\n";
1409 ss << " if (lidx == 0)\n";
1410 ss << " result[writePos] = current_result;\n";
1411 ss << "}\n";
1412 }
1413}
1414
1415template<class Base>
1417{
1418 outputstream ss;
1419 if (!bIsStartFixed && !bIsEndFixed)
1420 ss << Base::GetName() << "[i + gid0]";
1421 else
1422 ss << Base::GetName() << "[i]";
1423 return ss.str();
1424}
1425
1426template<class Base>
1428 outputstream& ss, int nResultSize, bool& needBody )
1429{
1430 assert(mpDVR);
1431 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1432 std::string temp = Base::GetName() + "[gid0]";
1433 ss << "tmp = ";
1434 // Special case count
1435 if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1436 {
1437 ss << mpCodeGen->Gen2(temp, "tmp") << ";\n";
1438 ss << "nCount = nCount-1;\n";
1439 ss << "nCount = nCount +"; /*re-assign nCount from count reduction*/
1440 ss << Base::GetName() << "[gid0+" << nResultSize << "]" << ";\n";
1441 }
1442 else if (dynamic_cast<OpCount*>(mpCodeGen.get()))
1443 ss << temp << "+ tmp";
1444 else
1445 ss << mpCodeGen->Gen2(temp, "tmp");
1446 ss << ";\n\t";
1447 needBody = false;
1448 return nCurWindowSize;
1449}
1450
1451template<class Base>
1452size_t ParallelReductionVectorRef<Base>::Marshal( cl_kernel k, int argno, int w, cl_program mpProgram )
1453{
1454 assert(Base::mpClmem == nullptr);
1455
1456 OpenCLZone zone;
1459 cl_int err;
1460 size_t nInput = mpDVR->GetArrayLength();
1461 size_t nCurWindowSize = mpDVR->GetRefRowSize();
1462 // create clmem buffer
1463 if (mpDVR->GetArrays()[Base::mnIndex].mpNumericArray == nullptr)
1464 throw Unhandled(__FILE__, __LINE__);
1465 double* pHostBuffer = const_cast<double*>(
1466 mpDVR->GetArrays()[Base::mnIndex].mpNumericArray);
1467 size_t szHostBuffer = nInput * sizeof(double);
1468 Base::mpClmem = clCreateBuffer(kEnv.mpkContext,
1469 cl_mem_flags(CL_MEM_READ_ONLY) | CL_MEM_USE_HOST_PTR,
1470 szHostBuffer,
1471 pHostBuffer, &err);
1472 SAL_INFO("sc.opencl", "Created buffer " << Base::mpClmem << " size " << nInput << "*" << sizeof(double) << "=" << szHostBuffer << " using host buffer " << pHostBuffer);
1473
1474 mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1475 CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
1476 sizeof(double) * w, nullptr, nullptr);
1477 if (CL_SUCCESS != err)
1478 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
1479 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << w << "=" << (sizeof(double)*w));
1480
1481 // reproduce the reduction function name
1482 std::string kernelName;
1483 if (!dynamic_cast<OpAverage*>(mpCodeGen.get()))
1484 kernelName = Base::GetName() + "_reduction";
1485 else
1486 kernelName = Base::GetName() + "_sum_reduction";
1487 cl_kernel redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1488 if (err != CL_SUCCESS)
1489 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
1490 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
1491
1492 // set kernel arg of reduction kernel
1493 // TODO(Wei Wei): use unique name for kernel
1494 cl_mem buf = Base::GetCLBuffer();
1495 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
1496 err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1497 static_cast<void*>(&buf));
1498 if (CL_SUCCESS != err)
1499 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1500
1501 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
1502 err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
1503 if (CL_SUCCESS != err)
1504 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1505
1506 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
1507 err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
1508 if (CL_SUCCESS != err)
1509 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1510
1511 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
1512 err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
1513 if (CL_SUCCESS != err)
1514 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1515
1516 // set work group size and execute
1517 size_t global_work_size[] = { 256, static_cast<size_t>(w) };
1518 size_t const local_work_size[] = { 256, 1 };
1519 SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
1520 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
1521 global_work_size, local_work_size, 0, nullptr, nullptr);
1522 if (CL_SUCCESS != err)
1523 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
1524 err = clFinish(kEnv.mpkCmdQueue);
1525 if (CL_SUCCESS != err)
1526 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
1527 if (dynamic_cast<OpAverage*>(mpCodeGen.get()))
1528 {
1529 /*average need more reduction kernel for count computing*/
1530 std::unique_ptr<double[]> pAllBuffer(new double[2 * w]);
1531 double* resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1532 mpClmem2,
1533 CL_TRUE, CL_MAP_READ, 0,
1534 sizeof(double) * w, 0, nullptr, nullptr,
1535 &err));
1536 if (err != CL_SUCCESS)
1537 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
1538
1539 for (int i = 0; i < w; i++)
1540 pAllBuffer[i] = resbuf[i];
1541 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
1542 if (err != CL_SUCCESS)
1543 throw OpenCLError("clEnqueueUnmapMemObject", err, __FILE__, __LINE__);
1544
1545 kernelName = Base::GetName() + "_count_reduction";
1546 redKernel = clCreateKernel(mpProgram, kernelName.c_str(), &err);
1547 if (err != CL_SUCCESS)
1548 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
1549 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << mpProgram);
1550
1551 // set kernel arg of reduction kernel
1552 buf = Base::GetCLBuffer();
1553 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 0 << ": cl_mem: " << buf);
1554 err = clSetKernelArg(redKernel, 0, sizeof(cl_mem),
1555 static_cast<void*>(&buf));
1556 if (CL_SUCCESS != err)
1557 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1558
1559 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 1 << ": cl_mem: " << mpClmem2);
1560 err = clSetKernelArg(redKernel, 1, sizeof(cl_mem), &mpClmem2);
1561 if (CL_SUCCESS != err)
1562 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1563
1564 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 2 << ": cl_int: " << nInput);
1565 err = clSetKernelArg(redKernel, 2, sizeof(cl_int), static_cast<void*>(&nInput));
1566 if (CL_SUCCESS != err)
1567 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1568
1569 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << 3 << ": cl_int: " << nCurWindowSize);
1570 err = clSetKernelArg(redKernel, 3, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
1571 if (CL_SUCCESS != err)
1572 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1573
1574 // set work group size and execute
1575 size_t global_work_size1[] = { 256, static_cast<size_t>(w) };
1576 size_t const local_work_size1[] = { 256, 1 };
1577 SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
1578 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
1579 global_work_size1, local_work_size1, 0, nullptr, nullptr);
1580 if (CL_SUCCESS != err)
1581 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
1582 err = clFinish(kEnv.mpkCmdQueue);
1583 if (CL_SUCCESS != err)
1584 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
1585 resbuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
1586 mpClmem2,
1587 CL_TRUE, CL_MAP_READ, 0,
1588 sizeof(double) * w, 0, nullptr, nullptr,
1589 &err));
1590 if (err != CL_SUCCESS)
1591 throw OpenCLError("clEnqueueMapBuffer", err, __FILE__, __LINE__);
1592 for (int i = 0; i < w; i++)
1593 pAllBuffer[i + w] = resbuf[i];
1594 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpClmem2, resbuf, 0, nullptr, nullptr);
1595 // FIXME: Is it intentional to not throw an OpenCLError even if the clEnqueueUnmapMemObject() fails?
1596 if (CL_SUCCESS != err)
1597 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
1598 if (mpClmem2)
1599 {
1600 err = clReleaseMemObject(mpClmem2);
1601 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
1602 mpClmem2 = nullptr;
1603 }
1604 mpClmem2 = clCreateBuffer(kEnv.mpkContext,
1605 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_COPY_HOST_PTR,
1606 w * sizeof(double) * 2, pAllBuffer.get(), &err);
1607 if (CL_SUCCESS != err)
1608 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
1609 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << w << "*" << sizeof(double) << "=" << (w*sizeof(double)) << " copying host buffer " << pAllBuffer.get());
1610 }
1611 // set kernel arg
1612 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
1613 err = clSetKernelArg(k, argno, sizeof(cl_mem), &mpClmem2);
1614 if (CL_SUCCESS != err)
1615 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1616 return 1;
1617}
1618
1619template<class Base>
1621{
1622 if (mpClmem2)
1623 {
1624 cl_int err;
1625 err = clReleaseMemObject(mpClmem2);
1626 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
1627 mpClmem2 = nullptr;
1628 }
1629}
1630
1632
1633namespace {
1634
1635struct SumIfsArgs
1636{
1637 explicit SumIfsArgs(cl_mem x) : mCLMem(x), mConst(0.0) { }
1638 explicit SumIfsArgs(double x) : mCLMem(nullptr), mConst(x) { }
1639 cl_mem mCLMem;
1640 double mConst;
1641};
1642
1644class DynamicKernelSoPArguments : public DynamicKernelArgument
1645{
1646public:
1647 typedef std::vector<DynamicKernelArgumentRef> SubArgumentsType;
1648
1649 DynamicKernelSoPArguments( const ScCalcConfig& config,
1650 const std::string& s, const FormulaTreeNodeRef& ft,
1651 std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize );
1652
1654 virtual size_t Marshal( cl_kernel k, int argno, int nVectorWidth, cl_program pProgram ) override
1655 {
1656 OpenCLZone zone;
1657 unsigned i = 0;
1658 for (const auto& rxSubArgument : mvSubArguments)
1659 {
1660 i += rxSubArgument->Marshal(k, argno + i, nVectorWidth, pProgram);
1661 }
1662 if (OpSumIfs* OpSumCodeGen = dynamic_cast<OpSumIfs*>(mpCodeGen.get()))
1663 {
1666 cl_int err;
1667 DynamicKernelArgument* Arg = mvSubArguments[0].get();
1668 DynamicKernelSlidingArgument<VectorRef>* slidingArgPtr =
1669 static_cast<DynamicKernelSlidingArgument<VectorRef>*>(Arg);
1670 mpClmem2 = nullptr;
1671
1672 if (OpSumCodeGen->NeedReductionKernel())
1673 {
1674 size_t nInput = slidingArgPtr->GetArrayLength();
1675 size_t nCurWindowSize = slidingArgPtr->GetWindowSize();
1676 std::vector<SumIfsArgs> vclmem;
1677
1678 for (const auto& rxSubArgument : mvSubArguments)
1679 {
1680 if (VectorRef* VR = dynamic_cast<VectorRef*>(rxSubArgument.get()))
1681 vclmem.emplace_back(VR->GetCLBuffer());
1682 else if (DynamicKernelConstantArgument* CA = dynamic_cast<DynamicKernelConstantArgument*>(rxSubArgument.get()))
1683 vclmem.emplace_back(CA->GetDouble());
1684 else
1685 vclmem.emplace_back(nullptr);
1686 }
1687 mpClmem2 = clCreateBuffer(kEnv.mpkContext, CL_MEM_READ_WRITE,
1688 sizeof(double) * nVectorWidth, nullptr, &err);
1689 if (CL_SUCCESS != err)
1690 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
1691 SAL_INFO("sc.opencl", "Created buffer " << mpClmem2 << " size " << sizeof(double) << "*" << nVectorWidth << "=" << (sizeof(double)*nVectorWidth));
1692
1693 std::string kernelName = mvSubArguments[0]->GetName() + "_SumIfs_reduction";
1694 cl_kernel redKernel = clCreateKernel(pProgram, kernelName.c_str(), &err);
1695 if (err != CL_SUCCESS)
1696 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
1697 SAL_INFO("sc.opencl", "Created kernel " << redKernel << " with name " << kernelName << " in program " << pProgram);
1698
1699 // set kernel arg of reduction kernel
1700 for (size_t j = 0; j < vclmem.size(); j++)
1701 {
1702 if (vclmem[j].mCLMem)
1703 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": cl_mem: " << vclmem[j].mCLMem);
1704 else
1705 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << j << ": double: " << preciseFloat( vclmem[j].mConst ));
1706 err = clSetKernelArg(redKernel, j,
1707 vclmem[j].mCLMem ? sizeof(cl_mem) : sizeof(double),
1708 vclmem[j].mCLMem ? static_cast<void*>(&vclmem[j].mCLMem) :
1709 static_cast<void*>(&vclmem[j].mConst));
1710 if (CL_SUCCESS != err)
1711 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1712 }
1713 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << vclmem.size() << ": cl_mem: " << mpClmem2);
1714 err = clSetKernelArg(redKernel, vclmem.size(), sizeof(cl_mem), static_cast<void*>(&mpClmem2));
1715 if (CL_SUCCESS != err)
1716 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1717
1718 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 1) << ": cl_int: " << nInput);
1719 err = clSetKernelArg(redKernel, vclmem.size() + 1, sizeof(cl_int), static_cast<void*>(&nInput));
1720 if (CL_SUCCESS != err)
1721 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1722
1723 SAL_INFO("sc.opencl", "Kernel " << redKernel << " arg " << (vclmem.size() + 2) << ": cl_int: " << nCurWindowSize);
1724 err = clSetKernelArg(redKernel, vclmem.size() + 2, sizeof(cl_int), static_cast<void*>(&nCurWindowSize));
1725 if (CL_SUCCESS != err)
1726 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1727 // set work group size and execute
1728 size_t global_work_size[] = { 256, static_cast<size_t>(nVectorWidth) };
1729 size_t const local_work_size[] = { 256, 1 };
1730 SAL_INFO("sc.opencl", "Enqueuing kernel " << redKernel);
1731 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, redKernel, 2, nullptr,
1732 global_work_size, local_work_size, 0, nullptr, nullptr);
1733 if (CL_SUCCESS != err)
1734 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
1735
1736 err = clFinish(kEnv.mpkCmdQueue);
1737 if (CL_SUCCESS != err)
1738 throw OpenCLError("clFinish", err, __FILE__, __LINE__);
1739
1740 SAL_INFO("sc.opencl", "Releasing kernel " << redKernel);
1741 err = clReleaseKernel(redKernel);
1742 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
1743
1744 // Pass mpClmem2 to the "real" kernel
1745 SAL_INFO("sc.opencl", "Kernel " << k << " arg " << argno << ": cl_mem: " << mpClmem2);
1746 err = clSetKernelArg(k, argno, sizeof(cl_mem), static_cast<void*>(&mpClmem2));
1747 if (CL_SUCCESS != err)
1748 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
1749 }
1750 }
1751 return i;
1752 }
1753
1754 virtual void GenSlidingWindowFunction( outputstream& ss ) override
1755 {
1757 rArg->GenSlidingWindowFunction(ss);
1758 mpCodeGen->GenSlidingWindowFunction(ss, mSymName, mvSubArguments);
1759 }
1760 virtual void GenDeclRef( outputstream& ss ) const override
1761 {
1762 for (size_t i = 0; i < mvSubArguments.size(); i++)
1763 {
1764 if (i)
1765 ss << ",";
1766 mvSubArguments[i]->GenDeclRef(ss);
1767 }
1768 }
1769 virtual void GenDecl( outputstream& ss ) const override
1770 {
1771 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
1772 ++it)
1773 {
1774 if (it != mvSubArguments.begin())
1775 ss << ", ";
1776 (*it)->GenDecl(ss);
1777 }
1778 }
1779
1780 virtual size_t GetWindowSize() const override
1781 {
1782 size_t nCurWindowSize = 0;
1783 for (const auto & rSubArgument : mvSubArguments)
1784 {
1785 size_t nCurChildWindowSize = rSubArgument->GetWindowSize();
1786 nCurWindowSize = (nCurWindowSize < nCurChildWindowSize) ?
1787 nCurChildWindowSize : nCurWindowSize;
1788 }
1789 return nCurWindowSize;
1790 }
1791
1793 virtual void GenSlidingWindowDecl( outputstream& ss ) const override
1794 {
1795 for (SubArgumentsType::const_iterator it = mvSubArguments.begin(), e = mvSubArguments.end(); it != e;
1796 ++it)
1797 {
1798 if (it != mvSubArguments.begin())
1799 ss << ", ";
1800 (*it)->GenSlidingWindowDecl(ss);
1801 }
1802 }
1805 virtual std::string GenSlidingWindowDeclRef( bool nested = false ) const override
1806 {
1807 outputstream ss;
1808 if (!nested)
1809 {
1810 ss << mSymName << "_" << mpCodeGen->BinFuncName() << "(";
1811 for (size_t i = 0; i < mvSubArguments.size(); i++)
1812 {
1813 if (i)
1814 ss << ", ";
1815 mvSubArguments[i]->GenDeclRef(ss);
1816 }
1817 ss << ")";
1818 }
1819 else
1820 {
1821 if (mvSubArguments.size() != 2)
1822 throw Unhandled(__FILE__, __LINE__);
1823 bool bArgument1_NeedNested =
1824 mvSubArguments[0]->GetFormulaToken()->GetType()
1826 bool bArgument2_NeedNested =
1827 mvSubArguments[1]->GetFormulaToken()->GetType()
1829 ss << "(";
1830 ss << mpCodeGen->
1831 Gen2(mvSubArguments[0]
1832 ->GenSlidingWindowDeclRef(bArgument1_NeedNested),
1834 ->GenSlidingWindowDeclRef(bArgument2_NeedNested));
1835 ss << ")";
1836 }
1837 return ss.str();
1838 }
1839 virtual std::string DumpOpName() const override
1840 {
1841 std::string t = "_" + mpCodeGen->BinFuncName();
1842 for (const auto & rSubArgument : mvSubArguments)
1843 t += rSubArgument->DumpOpName();
1844 return t;
1845 }
1846 virtual void DumpInlineFun( std::set<std::string>& decls,
1847 std::set<std::string>& funs ) const override
1848 {
1849 mpCodeGen->BinInlineFun(decls, funs);
1850 for (const auto & rSubArgument : mvSubArguments)
1851 rSubArgument->DumpInlineFun(decls, funs);
1852 }
1853 virtual bool IsEmpty() const override
1854 {
1855 for (const auto & rSubArgument : mvSubArguments)
1856 if( !rSubArgument->IsEmpty())
1857 return false;
1858 return true;
1859 }
1860 virtual ~DynamicKernelSoPArguments() override
1861 {
1862 if (mpClmem2)
1863 {
1864 cl_int err;
1865 err = clReleaseMemObject(mpClmem2);
1866 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
1867 mpClmem2 = nullptr;
1868 }
1869 }
1870
1871private:
1872 SubArgumentsType mvSubArguments;
1873 std::shared_ptr<SlidingFunctionBase> mpCodeGen;
1874 cl_mem mpClmem2;
1875};
1876
1877}
1878
1880 const std::string& ts, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen,
1881 int nResultSize )
1882{
1883 return std::make_shared<DynamicKernelSoPArguments>(config, ts, ft, std::move(pCodeGen), nResultSize);
1884}
1885
1886template<class Base>
1887static std::shared_ptr<DynamicKernelArgument> VectorRefFactory( const ScCalcConfig& config, const std::string& s,
1888 const FormulaTreeNodeRef& ft,
1889 std::shared_ptr<SlidingFunctionBase>& pCodeGen,
1890 int index )
1891{
1892 //Black lists ineligible classes here ..
1893 // SUMIFS does not perform parallel reduction at DoubleVectorRef level
1894 if (dynamic_cast<OpSumIfs*>(pCodeGen.get()))
1895 {
1896 // coverity[identical_branches] - only identical if Base happens to be VectorRef
1897 if (index == 0) // the first argument of OpSumIfs cannot be strings anyway
1898 return std::make_shared<DynamicKernelSlidingArgument<VectorRef>>(config, s, ft, pCodeGen, index);
1899 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
1900 }
1901 // AVERAGE is not supported yet
1902 //Average has been supported by reduction kernel
1903 /*else if (dynamic_cast<OpAverage*>(pCodeGen.get()))
1904 {
1905 return new DynamicKernelSlidingArgument<Base>(config, s, ft, pCodeGen, index);
1906 }*/
1907 // MUL is not supported yet
1908 else if (dynamic_cast<OpMul*>(pCodeGen.get()))
1909 {
1910 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
1911 }
1912 // Sub is not a reduction per se
1913 else if (dynamic_cast<OpSub*>(pCodeGen.get()))
1914 {
1915 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
1916 }
1917 // Only child class of Reduction is supported
1918 else if (!dynamic_cast<Reduction*>(pCodeGen.get()))
1919 {
1920 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
1921 }
1922
1923 const formula::DoubleVectorRefToken* pDVR =
1924 static_cast<const formula::DoubleVectorRefToken*>(
1925 ft->GetFormulaToken());
1926 // Window being too small to justify a parallel reduction
1927 if (pDVR->GetRefRowSize() < REDUCE_THRESHOLD)
1928 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
1929 if (pDVR->IsStartFixed() == pDVR->IsEndFixed())
1930 return std::make_shared<ParallelReductionVectorRef<Base>>(config, s, ft, pCodeGen, index);
1931 else // Other cases are not supported as well
1932 return std::make_shared<DynamicKernelSlidingArgument<Base>>(config, s, ft, pCodeGen, index);
1933}
1934
1935DynamicKernelSoPArguments::DynamicKernelSoPArguments(const ScCalcConfig& config,
1936 const std::string& s, const FormulaTreeNodeRef& ft, std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize ) :
1937 DynamicKernelArgument(config, s, ft), mpCodeGen(pCodeGen), mpClmem2(nullptr)
1938{
1939 size_t nChildren = ft->Children.size();
1940
1941 for (size_t i = 0; i < nChildren; i++)
1942 {
1943 FormulaTreeNodeRef rChild = ft->Children[i];
1944 if (!rChild)
1945 throw Unhandled(__FILE__, __LINE__);
1946 FormulaToken* pChild = rChild->GetFormulaToken();
1947 if (!pChild)
1948 throw Unhandled(__FILE__, __LINE__);
1949 OpCode opc = pChild->GetOpCode();
1950 outputstream tmpname;
1951 tmpname << s << "_" << i;
1952 std::string ts = tmpname.str();
1953 switch (opc)
1954 {
1955 case ocPush:
1956 if (pChild->GetType() == formula::svDoubleVectorRef)
1957 {
1958 const formula::DoubleVectorRefToken* pDVR =
1959 static_cast<const formula::DoubleVectorRefToken*>(pChild);
1960
1961 // The code below will split one svDoubleVectorRef into one subargument
1962 // for each column of data, and then all these subarguments will be later
1963 // passed to the code generating the function. Most of the code then
1964 // simply treats each subargument as one argument to the function, and thus
1965 // could break in this case.
1966 // As a simple solution, simply prevent this case, unless the code in question
1967 // explicitly claims it will handle this situation properly.
1968 if( pDVR->GetArrays().size() > 1 )
1969 {
1970 if( !pCodeGen->canHandleMultiVector())
1971 throw UnhandledToken(("Function '" + pCodeGen->BinFuncName()
1972 + "' cannot handle multi-column DoubleRef").c_str(), __FILE__, __LINE__);
1973
1974 SAL_INFO("sc.opencl", "multi-column DoubleRef");
1975
1976 }
1977
1978 // FIXME: The Right Thing to do would be to compare the accumulated kernel
1979 // parameter size against the CL_DEVICE_MAX_PARAMETER_SIZE of the device, but
1980 // let's just do this sanity check for now. The kernel compilation will
1981 // hopefully fail anyway if the size of parameters exceeds the limit and this
1982 // sanity check is just to make us bail out a bit earlier.
1983
1984 // The number 50 comes from the fact that the minimum size of
1985 // CL_DEVICE_MAX_PARAMETER_SIZE is 256, which for 32-bit code probably means 64
1986 // of them. Round down a bit.
1987
1988 if (pDVR->GetArrays().size() > 50)
1989 throw UnhandledToken(("Kernel would have ridiculously many parameters (" + std::to_string(2 + pDVR->GetArrays().size()) + ")").c_str(), __FILE__, __LINE__);
1990
1991 for (size_t j = 0; j < pDVR->GetArrays().size(); ++j)
1992 {
1993 SAL_INFO("sc.opencl", "i=" << i << " j=" << j <<
1994 " mpNumericArray=" << pDVR->GetArrays()[j].mpNumericArray <<
1995 " mpStringArray=" << pDVR->GetArrays()[j].mpStringArray <<
1996 " allStringsAreNull=" << (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength())?"YES":"NO") <<
1997 " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
1998 " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
1999
2000 if (pDVR->GetArrays()[j].mpNumericArray &&
2001 pCodeGen->takeNumeric() &&
2002 pDVR->GetArrays()[j].mpStringArray &&
2003 pCodeGen->takeString())
2004 {
2005 // Function takes numbers or strings, there are both
2006 SAL_INFO("sc.opencl", "Numbers and strings");
2007 mvSubArguments.push_back(
2008 std::make_shared<DynamicKernelMixedSlidingArgument>(mCalcConfig,
2009 ts, ft->Children[i], mpCodeGen, j));
2010 }
2011 else if (pDVR->GetArrays()[j].mpNumericArray &&
2012 pCodeGen->takeNumeric() &&
2013 (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength())
2015 || pCodeGen->forceStringsToZero()))
2016 {
2017 // Function takes numbers, and either there
2018 // are no strings, or there are strings but
2019 // they are to be treated as zero
2020 SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2021 if(!AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()))
2022 {
2023 mvSubArguments.push_back(
2024 VectorRefFactory<VectorRefStringsToZero>(mCalcConfig,
2025 ts, ft->Children[i], mpCodeGen, j));
2026 }
2027 else
2028 {
2029 mvSubArguments.push_back(
2030 VectorRefFactory<VectorRef>(mCalcConfig,
2031 ts, ft->Children[i], mpCodeGen, j));
2032 }
2033 }
2034 else if (pDVR->GetArrays()[j].mpNumericArray == nullptr &&
2035 pCodeGen->takeNumeric() &&
2036 pDVR->GetArrays()[j].mpStringArray &&
2038 || pCodeGen->forceStringsToZero()))
2039 {
2040 // Function takes numbers, and there are only
2041 // strings, but they are to be treated as zero
2042 SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2043 mvSubArguments.push_back(
2044 VectorRefFactory<VectorRefStringsToZero>(mCalcConfig,
2045 ts, ft->Children[i], mpCodeGen, j));
2046 }
2047 else if (pDVR->GetArrays()[j].mpStringArray &&
2048 pCodeGen->takeString())
2049 {
2050 // There are strings, and the function takes strings.
2051 SAL_INFO("sc.opencl", "Strings only");
2052 mvSubArguments.push_back(
2054 <DynamicKernelStringArgument>(mCalcConfig,
2055 ts, ft->Children[i], mpCodeGen, j));
2056 }
2057 else if (AllStringsAreNull(pDVR->GetArrays()[j].mpStringArray, pDVR->GetArrayLength()) &&
2058 pDVR->GetArrays()[j].mpNumericArray == nullptr)
2059 {
2060 // There are only empty cells. Push as an
2061 // array of NANs
2062 SAL_INFO("sc.opencl", "Only empty cells");
2063 mvSubArguments.push_back(
2064 VectorRefFactory<VectorRef>(mCalcConfig,
2065 ts, ft->Children[i], mpCodeGen, j));
2066 }
2067 else
2068 {
2069 SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2070 throw UnhandledToken(("Unhandled numbers/strings combination for '"
2071 + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2072 }
2073 }
2074 }
2075 else if (pChild->GetType() == formula::svSingleVectorRef)
2076 {
2077 const formula::SingleVectorRefToken* pSVR =
2078 static_cast<const formula::SingleVectorRefToken*>(pChild);
2079
2080 SAL_INFO("sc.opencl", "i=" << i <<
2081 " mpNumericArray=" << pSVR->GetArray().mpNumericArray <<
2082 " mpStringArray=" << pSVR->GetArray().mpStringArray <<
2083 " allStringsAreNull=" << (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength())?"YES":"NO") <<
2084 " takeNumeric=" << (pCodeGen->takeNumeric()?"YES":"NO") <<
2085 " takeString=" << (pCodeGen->takeString()?"YES":"NO"));
2086
2087 if (pSVR->GetArray().mpNumericArray &&
2088 pCodeGen->takeNumeric() &&
2089 pSVR->GetArray().mpStringArray &&
2090 pCodeGen->takeString())
2091 {
2092 // Function takes numbers or strings, there are both
2093 SAL_INFO("sc.opencl", "Numbers and strings");
2094 mvSubArguments.push_back(
2095 std::make_shared<DynamicKernelMixedArgument>(mCalcConfig,
2096 ts, ft->Children[i]));
2097 }
2098 else if (pSVR->GetArray().mpNumericArray &&
2099 pCodeGen->takeNumeric() &&
2100 (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength())
2102 || pCodeGen->forceStringsToZero()))
2103 {
2104 // Function takes numbers, and either there
2105 // are no strings, or there are strings but
2106 // they are to be treated as zero
2107 SAL_INFO("sc.opencl", "Numbers (no strings or strings treated as zero)");
2108 if( !AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()))
2109 mvSubArguments.push_back(
2110 std::make_shared<VectorRefStringsToZero>(mCalcConfig, ts,
2111 ft->Children[i]));
2112 else
2113 mvSubArguments.push_back(
2114 std::make_shared<VectorRef>(mCalcConfig, ts,
2115 ft->Children[i]));
2116 }
2117 else if (pSVR->GetArray().mpNumericArray == nullptr &&
2118 pCodeGen->takeNumeric() &&
2119 pSVR->GetArray().mpStringArray &&
2121 || pCodeGen->forceStringsToZero()))
2122 {
2123 // Function takes numbers, and there are only
2124 // strings, but they are to be treated as zero
2125 SAL_INFO("sc.opencl", "Only strings even if want numbers but should be treated as zero");
2126 mvSubArguments.push_back(
2127 std::make_shared<VectorRefStringsToZero>(mCalcConfig, ts,
2128 ft->Children[i]));
2129 }
2130 else if (pSVR->GetArray().mpStringArray &&
2131 pCodeGen->takeString())
2132 {
2133 // There are strings, and the function takes strings.
2134 SAL_INFO("sc.opencl", "Strings only");
2135 mvSubArguments.push_back(
2136 std::make_shared<DynamicKernelStringArgument>(mCalcConfig,
2137 ts, ft->Children[i]));
2138 }
2139 else if (AllStringsAreNull(pSVR->GetArray().mpStringArray, pSVR->GetArrayLength()) &&
2140 pSVR->GetArray().mpNumericArray == nullptr)
2141 {
2142 // There are only empty cells. Push as an
2143 // array of NANs
2144 SAL_INFO("sc.opencl", "Only empty cells");
2145 mvSubArguments.push_back(
2146 std::make_shared<VectorRef>(mCalcConfig, ts,
2147 ft->Children[i]));
2148 }
2149 else
2150 {
2151 SAL_INFO("sc.opencl", "Unhandled case, rejecting for OpenCL");
2152 throw UnhandledToken(("Unhandled numbers/strings combination for '"
2153 + pCodeGen->BinFuncName() + "'").c_str(), __FILE__, __LINE__);
2154 }
2155 }
2156 else if (pChild->GetType() == formula::svDouble)
2157 {
2158 SAL_INFO("sc.opencl", "Constant number case");
2159 mvSubArguments.push_back(
2160 std::make_shared<DynamicKernelConstantArgument>(mCalcConfig, ts,
2161 ft->Children[i]));
2162 }
2163 else if (pChild->GetType() == formula::svString
2164 && pCodeGen->takeString())
2165 {
2166 SAL_INFO("sc.opencl", "Constant string case");
2167 mvSubArguments.push_back(
2168 std::make_shared<ConstStringArgument>(mCalcConfig, ts,
2169 ft->Children[i]));
2170 }
2171 else if (pChild->GetType() == formula::svString
2172 && !pCodeGen->takeString()
2173 && pCodeGen->takeNumeric()
2174 && pCodeGen->forceStringsToZero())
2175 {
2176 SAL_INFO("sc.opencl", "Constant string case, treated as zero");
2177 mvSubArguments.push_back(
2178 DynamicKernelArgumentRef(new DynamicKernelStringToZeroArgument(mCalcConfig, ts,
2179 ft->Children[i])));
2180 }
2181 else
2182 {
2183 SAL_INFO("sc.opencl", "Unhandled operand, rejecting for OpenCL");
2184 throw UnhandledToken(("unhandled operand " + StackVarEnumToString(pChild->GetType()) + " for ocPush").c_str(), __FILE__, __LINE__);
2185 }
2186 break;
2187 case ocPi:
2188 mvSubArguments.push_back(
2189 std::make_shared<DynamicKernelPiArgument>(mCalcConfig, ts,
2190 ft->Children[i]));
2191 break;
2192 case ocRandom:
2193 mvSubArguments.push_back(
2194 std::make_shared<DynamicKernelRandomArgument>(mCalcConfig, ts,
2195 ft->Children[i]));
2196 break;
2197#define CASE(opcode, createCode) \
2198 case opcode: \
2199 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], createCode, nResultSize)); \
2200 break;
2201 CASE(ocAbs, std::make_shared<OpAbs>())
2202 CASE(ocAdd, std::make_shared<OpSum>(nResultSize))
2203 CASE(ocAnd, std::make_shared<OpAnd>())
2204 CASE(ocArcCos, std::make_shared<OpArcCos>())
2205 CASE(ocArcCosHyp, std::make_shared<OpArcCosHyp>())
2206 CASE(ocArcCot, std::make_shared<OpArcCot>())
2207 CASE(ocArcCotHyp, std::make_shared<OpArcCotHyp>())
2208 CASE(ocArcSin, std::make_shared<OpArcSin>())
2209 CASE(ocArcSinHyp, std::make_shared<OpArcSinHyp>())
2210 CASE(ocArcTan, std::make_shared<OpArcTan>())
2211 CASE(ocArcTan2, std::make_shared<OpArcTan2>())
2212 CASE(ocArcTanHyp, std::make_shared<OpArcTanH>())
2213 CASE(ocAveDev, std::make_shared<OpAveDev>())
2214 CASE(ocAverage, std::make_shared<OpAverage>(nResultSize))
2215 CASE(ocAverageA, std::make_shared<OpAverageA>(nResultSize))
2216 CASE(ocAverageIf, std::make_shared<OpAverageIf>())
2217 CASE(ocAverageIfs, std::make_shared<OpAverageIfs>())
2218 CASE(ocB, std::make_shared<OpB>())
2219 CASE(ocBetaDist, std::make_shared<OpBetaDist>())
2220 CASE(ocBetaInv, std::make_shared<OpBetainv>())
2221 CASE(ocBinomDist, std::make_shared<OpBinomdist>())
2222 CASE(ocBitAnd, std::make_shared<OpBitAnd>())
2223 CASE(ocBitLshift, std::make_shared<OpBitLshift>())
2224 CASE(ocBitOr, std::make_shared<OpBitOr>())
2225 CASE(ocBitRshift, std::make_shared<OpBitRshift>())
2226 CASE(ocBitXor, std::make_shared<OpBitXor>())
2227 CASE(ocCeil, std::make_shared<OpCeil>())
2228 CASE(ocChiDist, std::make_shared<OpChiDist>())
2229 CASE(ocChiInv, std::make_shared<OpChiInv>())
2230 CASE(ocChiSqDist, std::make_shared<OpChiSqDist>())
2231 CASE(ocChiSqInv, std::make_shared<OpChiSqInv>())
2232 CASE(ocCombin, std::make_shared<OpCombin>())
2233 CASE(ocCombinA, std::make_shared<OpCombinA>())
2234 CASE(ocConfidence, std::make_shared<OpConfidence>())
2235 CASE(ocCorrel, std::make_shared<OpCorrel>())
2236 CASE(ocCos, std::make_shared<OpCos>())
2237 CASE(ocCosHyp, std::make_shared<OpCosh>())
2238 CASE(ocCosecant, std::make_shared<OpCsc>())
2239 CASE(ocCosecantHyp, std::make_shared<OpCscH>())
2240 CASE(ocCot, std::make_shared<OpCot>())
2241 CASE(ocCotHyp, std::make_shared<OpCoth>())
2242 CASE(ocCount, std::make_shared<OpCount>(nResultSize))
2243 CASE(ocCount2, std::make_shared<OpCountA>(nResultSize))
2244 CASE(ocCountIf, std::make_shared<OpCountIf>())
2245 CASE(ocCountIfs, std::make_shared<OpCountIfs>())
2246 CASE(ocCovar, std::make_shared<OpCovar>())
2247 CASE(ocCritBinom, std::make_shared<OpCritBinom>())
2248 CASE(ocDB, std::make_shared<OpDB>())
2249 CASE(ocDDB, std::make_shared<OpDDB>())
2250 CASE(ocDeg, std::make_shared<OpDeg>())
2251 CASE(ocDevSq, std::make_shared<OpDevSq>())
2252 CASE(ocDiv, std::make_shared<OpDiv>(nResultSize))
2253 CASE(ocEqual, std::make_shared<OpEqual>())
2254 CASE(ocEven, std::make_shared<OpEven>())
2255 CASE(ocExp, std::make_shared<OpExp>())
2256 CASE(ocExpDist, std::make_shared<OpExponDist>())
2257 CASE(ocFDist, std::make_shared<OpFdist>())
2258 CASE(ocFInv, std::make_shared<OpFInv>())
2259 CASE(ocFTest, std::make_shared<OpFTest>())
2260 CASE(ocFV, std::make_shared<OpFV>())
2261 CASE(ocFact, std::make_shared<OpFact>())
2262 CASE(ocFisher, std::make_shared<OpFisher>())
2263 CASE(ocFisherInv, std::make_shared<OpFisherInv>())
2264 CASE(ocFloor, std::make_shared<OpFloor>())
2265 CASE(ocForecast, std::make_shared<OpForecast>())
2266 CASE(ocGamma, std::make_shared<OpGamma>())
2267 CASE(ocGammaDist, std::make_shared<OpGammaDist>())
2268 CASE(ocGammaInv, std::make_shared<OpGammaInv>())
2269 CASE(ocGammaLn, std::make_shared<OpGammaLn>())
2270 CASE(ocGauss, std::make_shared<OpGauss>())
2271 CASE(ocGeoMean, std::make_shared<OpGeoMean>())
2272 CASE(ocGreater, std::make_shared<OpGreater>())
2273 CASE(ocGreaterEqual, std::make_shared<OpGreaterEqual>())
2274 CASE(ocHarMean, std::make_shared<OpHarMean>())
2275 CASE(ocHypGeomDist, std::make_shared<OpHypGeomDist>())
2276 CASE(ocIRR, std::make_shared<OpIRR>())
2277 CASE(ocISPMT, std::make_shared<OpISPMT>())
2278 CASE(ocIf, std::make_shared<OpIf>())
2279 CASE(ocInt, std::make_shared<OpInt>())
2280 CASE(ocIntercept, std::make_shared<OpIntercept>())
2281 CASE(ocIpmt, std::make_shared<OpIPMT>())
2282 CASE(ocIsEven, std::make_shared<OpIsEven>())
2283 CASE(ocIsOdd, std::make_shared<OpIsOdd>())
2284 CASE(ocKurt, std::make_shared<OpKurt>())
2285 CASE(ocLess, std::make_shared<OpLess>())
2286 CASE(ocLessEqual, std::make_shared<OpLessEqual>())
2287 CASE(ocLn, std::make_shared<OpLn>())
2288 CASE(ocLog, std::make_shared<OpLog>())
2289 CASE(ocLog10, std::make_shared<OpLog10>())
2290 CASE(ocLogInv, std::make_shared<OpLogInv>())
2291 CASE(ocLogNormDist, std::make_shared<OpLogNormDist>())
2292 CASE(ocMIRR, std::make_shared<OpMIRR>())
2293 CASE(ocMax, std::make_shared<OpMax>(nResultSize))
2294 CASE(ocMaxA, std::make_shared<OpMaxA>(nResultSize))
2295 CASE(ocMin, std::make_shared<OpMin>(nResultSize))
2296 CASE(ocMinA, std::make_shared<OpMinA>(nResultSize))
2297 CASE(ocMod, std::make_shared<OpMod>())
2298 CASE(ocMul, std::make_shared<OpMul>(nResultSize))
2299 CASE(ocNPV, std::make_shared<OpNPV>())
2300 CASE(ocNegBinomVert , std::make_shared<OpNegbinomdist>())
2301 CASE(ocNegSub, std::make_shared<OpNegSub>())
2302 CASE(ocNormDist, std::make_shared<OpNormdist>())
2303 CASE(ocNormInv, std::make_shared<OpNorminv>())
2304 CASE(ocNot, std::make_shared<OpNot>())
2305 CASE(ocNotEqual, std::make_shared<OpNotEqual>())
2306 CASE(ocNper, std::make_shared<OpNper>())
2307 CASE(ocOdd, std::make_shared<OpOdd>())
2308 CASE(ocOr, std::make_shared<OpOr>())
2309 CASE(ocPDuration, std::make_shared<OpPDuration>())
2310 CASE(ocPMT, std::make_shared<OpPMT>())
2311 CASE(ocPV, std::make_shared<OpPV>())
2312 CASE(ocPearson, std::make_shared<OpPearson>())
2313 CASE(ocPermut, std::make_shared<OpPermut>())
2314 CASE(ocPermutationA, std::make_shared<OpPermutationA>())
2315 CASE(ocPhi, std::make_shared<OpPhi>())
2316 CASE(ocPoissonDist, std::make_shared<OpPoisson>())
2317 CASE(ocPow, std::make_shared<OpPower>())
2318 CASE(ocPower, std::make_shared<OpPower>())
2319 CASE(ocPpmt, std::make_shared<OpPPMT>())
2320 CASE(ocProduct, std::make_shared<OpProduct>())
2321 CASE(ocRRI, std::make_shared<OpRRI>())
2322 CASE(ocRSQ, std::make_shared<OpRsq>())
2323 CASE(ocRad, std::make_shared<OpRadians>())
2324 CASE(ocRate, std::make_shared<OpRate>())
2325 CASE(ocRound, std::make_shared<OpRound>())
2326 CASE(ocRoundDown, std::make_shared<OpRoundDown>())
2327 CASE(ocRoundUp, std::make_shared<OpRoundUp>())
2328 CASE(ocSLN, std::make_shared<OpSLN>())
2329 CASE(ocSNormInv, std::make_shared<OpNormsinv>())
2330 CASE(ocSTEYX, std::make_shared<OpSTEYX>())
2331 CASE(ocSYD, std::make_shared<OpSYD>())
2332 CASE(ocSecant, std::make_shared<OpSec>())
2333 CASE(ocSecantHyp, std::make_shared<OpSecH>())
2334 CASE(ocSin, std::make_shared<OpSin>())
2335 CASE(ocSinHyp, std::make_shared<OpSinh>())
2336 CASE(ocSkew, std::make_shared<OpSkew>())
2337 CASE(ocSkewp, std::make_shared<OpSkewp>())
2338 CASE(ocSlope, std::make_shared<OpSlope>())
2339 CASE(ocSqrt, std::make_shared<OpSqrt>())
2340 CASE(ocStDev, std::make_shared<OpStDev>())
2341 CASE(ocStDevA, std::make_shared<OpStDevA>())
2342 CASE(ocStDevP, std::make_shared<OpStDevP>())
2343 CASE(ocStDevPA, std::make_shared<OpStDevPA>())
2344 CASE(ocStandard, std::make_shared<OpStandard>())
2345 CASE(ocStdNormDist, std::make_shared<OpNormsdist>())
2346 CASE(ocSub, std::make_shared<OpSub>(nResultSize))
2347 CASE(ocSum, std::make_shared<OpSum>(nResultSize))
2348 CASE(ocSumIf, std::make_shared<OpSumIf>())
2349 CASE(ocSumIfs, std::make_shared<OpSumIfs>())
2350 CASE(ocSumProduct, std::make_shared<OpSumProduct>())
2351 CASE(ocSumSQ, std::make_shared<OpSumSQ>())
2352 CASE(ocSumX2DY2, std::make_shared<OpSumX2PY2>())
2353 CASE(ocSumX2MY2, std::make_shared<OpSumX2MY2>())
2354 CASE(ocSumXMY2, std::make_shared<OpSumXMY2>())
2355 CASE(ocTDist, std::make_shared<OpTDist>())
2356 CASE(ocTInv, std::make_shared<OpTInv>())
2357 CASE(ocTTest, std::make_shared<OpTTest>())
2358 CASE(ocTan, std::make_shared<OpTan>())
2359 CASE(ocTanHyp, std::make_shared<OpTanH>())
2360 CASE(ocTrunc, std::make_shared<OpTrunc>())
2361 CASE(ocVBD, std::make_shared<OpVDB>())
2362 CASE(ocVLookup, std::make_shared<OpVLookup>())
2363 CASE(ocVar, std::make_shared<OpVar>())
2364 CASE(ocVarA, std::make_shared<OpVarA>())
2365 CASE(ocVarP, std::make_shared<OpVarP>())
2366 CASE(ocVarPA, std::make_shared<OpVarPA>())
2367 CASE(ocWeibull, std::make_shared<OpWeibull>())
2368 CASE(ocXor, std::make_shared<OpXor>())
2369 CASE(ocZTest, std::make_shared<OpZTest>())
2370#undef CASE
2371 case ocExternal:
2372#define EXTCASE( name, createCode ) \
2373 else if (pChild->GetExternal() == name) \
2374 { \
2375 mvSubArguments.push_back(SoPHelper(mCalcConfig, ts, ft->Children[i], createCode, nResultSize)); \
2376 }
2377
2378 if(false) // start else-if chain
2379 ;
2380 EXTCASE("com.sun.star.sheet.addin.Analysis.getAccrint", std::make_shared<OpAccrint>())
2381 EXTCASE("com.sun.star.sheet.addin.Analysis.getAccrintm", std::make_shared<OpAccrintm>())
2382 EXTCASE("com.sun.star.sheet.addin.Analysis.getAmordegrc", std::make_shared<OpAmordegrc>())
2383 EXTCASE("com.sun.star.sheet.addin.Analysis.getAmorlinc", std::make_shared<OpAmorlinc>())
2384 EXTCASE("com.sun.star.sheet.addin.Analysis.getBesselj", std::make_shared<OpBesselj>())
2385 EXTCASE("com.sun.star.sheet.addin.Analysis.getCoupdaybs", std::make_shared<OpCoupdaybs>())
2386 EXTCASE("com.sun.star.sheet.addin.Analysis.getCoupdays", std::make_shared<OpCoupdays>())
2387 EXTCASE("com.sun.star.sheet.addin.Analysis.getCoupdaysnc", std::make_shared<OpCoupdaysnc>())
2388 EXTCASE("com.sun.star.sheet.addin.Analysis.getCoupncd", std::make_shared<OpCoupncd>())
2389 EXTCASE("com.sun.star.sheet.addin.Analysis.getCoupnum", std::make_shared<OpCoupnum>())
2390 EXTCASE("com.sun.star.sheet.addin.Analysis.getCouppcd", std::make_shared<OpCouppcd>())
2391 EXTCASE("com.sun.star.sheet.addin.Analysis.getCumipmt", std::make_shared<OpCumipmt>())
2392 EXTCASE("com.sun.star.sheet.addin.Analysis.getCumprinc", std::make_shared<OpCumprinc>())
2393 EXTCASE("com.sun.star.sheet.addin.Analysis.getDisc", std::make_shared<OpDISC>())
2394 EXTCASE("com.sun.star.sheet.addin.Analysis.getDollarde", std::make_shared<OpDollarde>())
2395 EXTCASE("com.sun.star.sheet.addin.Analysis.getDollarfr", std::make_shared<OpDollarfr>())
2396 EXTCASE("com.sun.star.sheet.addin.Analysis.getDuration", std::make_shared<OpDuration_ADD>())
2397 EXTCASE("com.sun.star.sheet.addin.Analysis.getEffect", std::make_shared<OpEffective>())
2398 EXTCASE("com.sun.star.sheet.addin.Analysis.getFvschedule", std::make_shared<OpFvschedule>())
2399 EXTCASE("com.sun.star.sheet.addin.Analysis.getGestep", std::make_shared<OpGestep>())
2400 EXTCASE("com.sun.star.sheet.addin.Analysis.getIntrate", std::make_shared<OpINTRATE>())
2401 EXTCASE("com.sun.star.sheet.addin.Analysis.getIseven", std::make_shared<OpIsEven>())
2402 EXTCASE("com.sun.star.sheet.addin.Analysis.getIsodd", std::make_shared<OpIsOdd>())
2403 EXTCASE("com.sun.star.sheet.addin.Analysis.getMduration", std::make_shared<OpMDuration>())
2404 EXTCASE("com.sun.star.sheet.addin.Analysis.getMround", std::make_shared<OpMROUND>())
2405 EXTCASE("com.sun.star.sheet.addin.Analysis.getNominal", std::make_shared<OpNominal>())
2406 EXTCASE("com.sun.star.sheet.addin.Analysis.getOddlprice", std::make_shared<OpOddlprice>())
2407 EXTCASE("com.sun.star.sheet.addin.Analysis.getOddlyield", std::make_shared<OpOddlyield>())
2408 EXTCASE("com.sun.star.sheet.addin.Analysis.getPrice", std::make_shared<OpPrice>())
2409 EXTCASE("com.sun.star.sheet.addin.Analysis.getPricedisc", std::make_shared<OpPriceDisc>())
2410 EXTCASE("com.sun.star.sheet.addin.Analysis.getPricemat", std::make_shared<OpPriceMat>())
2411 EXTCASE("com.sun.star.sheet.addin.Analysis.getQuotient", std::make_shared<OpQuotient>())
2412 EXTCASE("com.sun.star.sheet.addin.Analysis.getReceived", std::make_shared<OpReceived>())
2413 EXTCASE("com.sun.star.sheet.addin.Analysis.getSeriessum", std::make_shared<OpSeriesSum>())
2414 EXTCASE("com.sun.star.sheet.addin.Analysis.getSqrtpi", std::make_shared<OpSqrtPi>())
2415 EXTCASE("com.sun.star.sheet.addin.Analysis.getTbilleq", std::make_shared<OpTbilleq>())
2416 EXTCASE("com.sun.star.sheet.addin.Analysis.getTbillprice", std::make_shared<OpTbillprice>())
2417 EXTCASE("com.sun.star.sheet.addin.Analysis.getTbillyield", std::make_shared<OpTbillyield>())
2418 EXTCASE("com.sun.star.sheet.addin.Analysis.getXirr", std::make_shared<OpXirr>())
2419 EXTCASE("com.sun.star.sheet.addin.Analysis.getXnpv", std::make_shared<OpXNPV>())
2420 EXTCASE("com.sun.star.sheet.addin.Analysis.getYield", std::make_shared<OpYield>())
2421 EXTCASE("com.sun.star.sheet.addin.Analysis.getYielddisc", std::make_shared<OpYielddisc>())
2422 EXTCASE("com.sun.star.sheet.addin.Analysis.getYieldmat", std::make_shared<OpYieldmat>())
2423 else
2424 throw UnhandledToken(OUString("unhandled external " + pChild->GetExternal()).toUtf8().getStr(), __FILE__, __LINE__);
2425 break;
2426#undef EXTCASE
2427
2428 default:
2429 throw UnhandledToken(OUString("unhandled opcode "
2430 + formula::FormulaCompiler().GetOpCodeMap(com::sun::star::sheet::FormulaLanguage::ENGLISH)->getSymbol(opc)
2431 + "(" + OUString::number(opc) + ")").toUtf8().getStr(), __FILE__, __LINE__);
2432 }
2433 }
2434}
2435
2436namespace {
2437
2438class DynamicKernel : public CompiledFormula
2439{
2440public:
2441 DynamicKernel( ScCalcConfig config, FormulaTreeNodeRef r, int nResultSize );
2442 virtual ~DynamicKernel() override;
2443
2444 static std::shared_ptr<DynamicKernel> create( const ScCalcConfig& config, const ScTokenArray& rCode, int nResultSize );
2445
2447 void CodeGen();
2448
2450 std::string const & GetMD5();
2451
2455 void CreateKernel();
2456
2459 void Launch( size_t nr );
2460
2461 cl_mem GetResultBuffer() const { return mpResClmem; }
2462
2463private:
2466 SymbolTable mSyms;
2468 std::string mFullProgramSrc;
2469 cl_program mpProgram;
2470 cl_kernel mpKernel;
2471 cl_mem mpResClmem; // Results
2472 std::set<std::string> inlineDecl;
2473 std::set<std::string> inlineFun;
2474
2476};
2477
2478}
2479
2480DynamicKernel::DynamicKernel( ScCalcConfig config, FormulaTreeNodeRef x, int nResultSize ) :
2481 mCalcConfig(std::move(config)),
2482 mpRoot(std::move(x)),
2483 mpProgram(nullptr),
2484 mpKernel(nullptr),
2485 mpResClmem(nullptr),
2486 mnResultSize(nResultSize) {}
2487
2488DynamicKernel::~DynamicKernel()
2489{
2490 cl_int err;
2491 if (mpResClmem)
2492 {
2493 err = clReleaseMemObject(mpResClmem);
2494 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseMemObject failed: " << openclwrapper::errorString(err));
2495 }
2496 if (mpKernel)
2497 {
2498 SAL_INFO("sc.opencl", "Releasing kernel " << mpKernel);
2499 err = clReleaseKernel(mpKernel);
2500 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseKernel failed: " << openclwrapper::errorString(err));
2501 }
2502 // mpProgram is not going to be released here -- it's cached.
2503}
2504
2505void DynamicKernel::CodeGen()
2506{
2507 // Traverse the tree of expression and declare symbols used
2508 const DynamicKernelArgument* DK = mSyms.DeclRefArg<DynamicKernelSoPArguments>(mCalcConfig, mpRoot, std::make_shared<OpNop>(mnResultSize), mnResultSize);
2509
2510 outputstream decl;
2511 if (openclwrapper::gpuEnv.mnKhrFp64Flag)
2512 {
2513 decl << "#if __OPENCL_VERSION__ < 120\n";
2514 decl << "#pragma OPENCL EXTENSION cl_khr_fp64: enable\n";
2515 decl << "#endif\n";
2516 }
2517 else if (openclwrapper::gpuEnv.mnAmdFp64Flag)
2518 {
2519 decl << "#pragma OPENCL EXTENSION cl_amd_fp64: enable\n";
2520 }
2521 // preambles
2522 decl << publicFunc;
2523 DK->DumpInlineFun(inlineDecl, inlineFun);
2524 for (const auto& rItem : inlineDecl)
2525 {
2526 decl << rItem;
2527 }
2528
2529 for (const auto& rItem : inlineFun)
2530 {
2531 decl << rItem;
2532 }
2533 mSyms.DumpSlidingWindowFunctions(decl);
2534 mKernelSignature = DK->DumpOpName();
2535 decl << "__kernel void DynamicKernel" << mKernelSignature;
2536 decl << "(__global double *result";
2537 if( !DK->IsEmpty())
2538 {
2539 decl << ", ";
2540 DK->GenSlidingWindowDecl(decl);
2541 }
2542 decl << ") {\n\tint gid0 = get_global_id(0);\n\tresult[gid0] = " <<
2543 DK->GenSlidingWindowDeclRef() << ";\n}\n";
2544 mFullProgramSrc = decl.str();
2545 SAL_INFO(
2546 "sc.opencl.source",
2547 (mKernelSignature[0] == '_'
2548 ? mKernelSignature.substr(1, std::string::npos) : mKernelSignature)
2549 << " program to be compiled:\n" << linenumberify(mFullProgramSrc));
2550}
2551
2552std::string const & DynamicKernel::GetMD5()
2553{
2554 if (mKernelHash.empty())
2555 {
2556 outputstream md5s;
2557 // Compute MD5SUM of kernel body to obtain the name
2558 sal_uInt8 result[RTL_DIGEST_LENGTH_MD5];
2559 rtl_digest_MD5(
2560 mFullProgramSrc.c_str(),
2561 mFullProgramSrc.length(), result,
2562 RTL_DIGEST_LENGTH_MD5);
2563 for (sal_uInt8 i : result)
2564 {
2565 md5s << std::hex << static_cast<int>(i);
2566 }
2567 mKernelHash = md5s.str();
2568 }
2569 return mKernelHash;
2570}
2571
2573void DynamicKernel::CreateKernel()
2574{
2575 if (mpKernel)
2576 // already created.
2577 return;
2578
2579 cl_int err;
2580 std::string kname = "DynamicKernel" + mKernelSignature;
2581 // Compile kernel here!!!
2582
2583 OpenCLZone zone;
2586 const char* src = mFullProgramSrc.c_str();
2587 static std::string lastOneKernelHash;
2588 static std::string lastSecondKernelHash;
2589 static cl_program lastOneProgram = nullptr;
2590 static cl_program lastSecondProgram = nullptr;
2591 std::string KernelHash = mKernelSignature + GetMD5();
2592 if (lastOneKernelHash == KernelHash && lastOneProgram)
2593 {
2594 mpProgram = lastOneProgram;
2595 }
2596 else if (lastSecondKernelHash == KernelHash && lastSecondProgram)
2597 {
2598 mpProgram = lastSecondProgram;
2599 }
2600 else
2601 { // doesn't match the last compiled formula.
2602
2603 if (lastSecondProgram)
2604 {
2605 SAL_INFO("sc.opencl", "Releasing program " << lastSecondProgram);
2606 err = clReleaseProgram(lastSecondProgram);
2607 SAL_WARN_IF(err != CL_SUCCESS, "sc.opencl", "clReleaseProgram failed: " << openclwrapper::errorString(err));
2608 lastSecondProgram = nullptr;
2609 }
2611 &openclwrapper::gpuEnv, KernelHash.c_str(), 0))
2612 {
2615 }
2616 else
2617 {
2618 mpProgram = clCreateProgramWithSource(kEnv.mpkContext, 1,
2619 &src, nullptr, &err);
2620 if (err != CL_SUCCESS)
2621 throw OpenCLError("clCreateProgramWithSource", err, __FILE__, __LINE__);
2622 SAL_INFO("sc.opencl", "Created program " << mpProgram);
2623
2624 err = clBuildProgram(mpProgram, 1,
2625 &openclwrapper::gpuEnv.mpDevID, "", nullptr, nullptr);
2626 if (err != CL_SUCCESS)
2627 {
2628#if OSL_DEBUG_LEVEL > 0
2629 if (err == CL_BUILD_PROGRAM_FAILURE)
2630 {
2631 cl_build_status stat;
2632 cl_int e = clGetProgramBuildInfo(
2634 CL_PROGRAM_BUILD_STATUS, sizeof(cl_build_status),
2635 &stat, nullptr);
2637 e != CL_SUCCESS, "sc.opencl",
2638 "after CL_BUILD_PROGRAM_FAILURE,"
2639 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_STATUS)"
2640 " fails with " << openclwrapper::errorString(e));
2641 if (e == CL_SUCCESS)
2642 {
2643 size_t n;
2644 e = clGetProgramBuildInfo(
2646 CL_PROGRAM_BUILD_LOG, 0, nullptr, &n);
2648 e != CL_SUCCESS || n == 0, "sc.opencl",
2649 "after CL_BUILD_PROGRAM_FAILURE,"
2650 " clGetProgramBuildInfo(CL_PROGRAM_BUILD_LOG)"
2651 " fails with " << openclwrapper::errorString(e) << ", n=" << n);
2652 if (e == CL_SUCCESS && n != 0)
2653 {
2654 std::vector<char> log(n);
2655 e = clGetProgramBuildInfo(
2657 CL_PROGRAM_BUILD_LOG, n, log.data(), nullptr);
2659 e != CL_SUCCESS || n == 0, "sc.opencl",
2660 "after CL_BUILD_PROGRAM_FAILURE,"
2661 " clGetProgramBuildInfo("
2662 "CL_PROGRAM_BUILD_LOG) fails with " << openclwrapper::errorString(e));
2663 if (e == CL_SUCCESS)
2664 SAL_WARN(
2665 "sc.opencl",
2666 "CL_BUILD_PROGRAM_FAILURE, status " << stat
2667 << ", log \"" << log.data() << "\"");
2668 }
2669 }
2670 }
2671#endif
2672#ifdef DBG_UTIL
2673 SAL_WARN("sc.opencl", "Program failed to build, aborting.");
2674 abort(); // make sure errors such as typos don't accidentally go unnoticed
2675#else
2676 throw OpenCLError("clBuildProgram", err, __FILE__, __LINE__);
2677#endif
2678 }
2679 SAL_INFO("sc.opencl", "Built program " << mpProgram);
2680
2681 // Generate binary out of compiled kernel.
2683 (mKernelSignature + GetMD5()).c_str());
2684 }
2685 lastSecondKernelHash = lastOneKernelHash;
2686 lastSecondProgram = lastOneProgram;
2687 lastOneKernelHash = KernelHash;
2688 lastOneProgram = mpProgram;
2689 }
2690 mpKernel = clCreateKernel(mpProgram, kname.c_str(), &err);
2691 if (err != CL_SUCCESS)
2692 throw OpenCLError("clCreateKernel", err, __FILE__, __LINE__);
2693 SAL_INFO("sc.opencl", "Created kernel " << mpKernel << " with name " << kname << " in program " << mpProgram);
2694}
2695
2696void DynamicKernel::Launch( size_t nr )
2697{
2698 OpenCLZone zone;
2701 cl_int err;
2702 // The results
2703 mpResClmem = clCreateBuffer(kEnv.mpkContext,
2704 cl_mem_flags(CL_MEM_READ_WRITE) | CL_MEM_ALLOC_HOST_PTR,
2705 nr * sizeof(double), nullptr, &err);
2706 if (CL_SUCCESS != err)
2707 throw OpenCLError("clCreateBuffer", err, __FILE__, __LINE__);
2708 SAL_INFO("sc.opencl", "Created buffer " << mpResClmem << " size " << nr << "*" << sizeof(double) << "=" << (nr*sizeof(double)));
2709
2710 SAL_INFO("sc.opencl", "Kernel " << mpKernel << " arg " << 0 << ": cl_mem: " << mpResClmem << " (result)");
2711 err = clSetKernelArg(mpKernel, 0, sizeof(cl_mem), static_cast<void*>(&mpResClmem));
2712 if (CL_SUCCESS != err)
2713 throw OpenCLError("clSetKernelArg", err, __FILE__, __LINE__);
2714 // The rest of buffers
2715 mSyms.Marshal(mpKernel, nr, mpProgram);
2716 size_t global_work_size[] = { nr };
2717 SAL_INFO("sc.opencl", "Enqueuing kernel " << mpKernel);
2718 err = clEnqueueNDRangeKernel(kEnv.mpkCmdQueue, mpKernel, 1, nullptr,
2719 global_work_size, nullptr, 0, nullptr, nullptr);
2720 if (CL_SUCCESS != err)
2721 throw OpenCLError("clEnqueueNDRangeKernel", err, __FILE__, __LINE__);
2722 err = clFlush(kEnv.mpkCmdQueue);
2723 if (CL_SUCCESS != err)
2724 throw OpenCLError("clFlush", err, __FILE__, __LINE__);
2725}
2726
2727// Symbol lookup. If there is no such symbol created, allocate one
2728// kernel with argument with unique name and return so.
2729// The template argument T must be a subclass of DynamicKernelArgument
2730template <typename T>
2731const DynamicKernelArgument* SymbolTable::DeclRefArg(const ScCalcConfig& config,
2732 const FormulaTreeNodeRef& t,
2733 std::shared_ptr<SlidingFunctionBase> pCodeGen, int nResultSize)
2734{
2735 FormulaToken* ref = t->GetFormulaToken();
2736 ArgumentMap::iterator it = mSymbols.find(ref);
2737 if (it == mSymbols.end())
2738 {
2739 // Allocate new symbols
2740 outputstream ss;
2741 ss << "tmp" << mCurId++;
2742 DynamicKernelArgumentRef new_arg = std::make_shared<T>(config, ss.str(), t, std::move(pCodeGen), nResultSize);
2743 mSymbols[ref] = new_arg;
2744 mParams.push_back(new_arg);
2745 return new_arg.get();
2746 }
2747 else
2748 {
2749 return it->second.get();
2750 }
2751}
2752
2753FormulaGroupInterpreterOpenCL::FormulaGroupInterpreterOpenCL() {}
2754
2755FormulaGroupInterpreterOpenCL::~FormulaGroupInterpreterOpenCL() {}
2756
2757ScMatrixRef FormulaGroupInterpreterOpenCL::inverseMatrix( const ScMatrix& )
2758{
2759 return nullptr;
2760}
2761
2762std::shared_ptr<DynamicKernel> DynamicKernel::create( const ScCalcConfig& rConfig, const ScTokenArray& rCode, int nResultSize )
2763{
2764 // Constructing "AST"
2765 FormulaTokenIterator aCode(rCode);
2766 std::vector<FormulaToken*> aTokenVector;
2767 std::map<FormulaToken*, FormulaTreeNodeRef> aHashMap;
2768 FormulaToken* pCur;
2769 while ((pCur = const_cast<FormulaToken*>(aCode.Next())) != nullptr)
2770 {
2771 OpCode eOp = pCur->GetOpCode();
2772 if (eOp != ocPush)
2773 {
2774 FormulaTreeNodeRef pCurNode = std::make_shared<FormulaTreeNode>(pCur);
2775 sal_uInt8 nParamCount = pCur->GetParamCount();
2776 for (sal_uInt8 i = 0; i < nParamCount; i++)
2777 {
2778 if( aTokenVector.empty())
2779 return nullptr;
2780 FormulaToken* pTempFormula = aTokenVector.back();
2781 aTokenVector.pop_back();
2782 if (pTempFormula->GetOpCode() != ocPush)
2783 {
2784 if (aHashMap.find(pTempFormula) == aHashMap.end())
2785 return nullptr;
2786 pCurNode->Children.push_back(aHashMap[pTempFormula]);
2787 }
2788 else
2789 {
2790 FormulaTreeNodeRef pChildTreeNode =
2791 std::make_shared<FormulaTreeNode>(pTempFormula);
2792 pCurNode->Children.push_back(pChildTreeNode);
2793 }
2794 }
2795 std::reverse(pCurNode->Children.begin(), pCurNode->Children.end());
2796 aHashMap[pCur] = pCurNode;
2797 }
2798 aTokenVector.push_back(pCur);
2799 }
2800
2801 FormulaTreeNodeRef Root = std::make_shared<FormulaTreeNode>(nullptr);
2802 Root->Children.push_back(aHashMap[aTokenVector.back()]);
2803
2804 auto pDynamicKernel = std::make_shared<DynamicKernel>(rConfig, Root, nResultSize);
2805
2806 // OpenCL source code generation and kernel compilation
2807 try
2808 {
2809 pDynamicKernel->CodeGen();
2810 pDynamicKernel->CreateKernel();
2811 }
2812 catch (const UnhandledToken& ut)
2813 {
2814 SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
2815 return nullptr;
2816 }
2817 catch (const InvalidParameterCount& ipc)
2818 {
2819 SAL_INFO("sc.opencl", "Dynamic formula compiler: InvalidParameterCount " << ipc.mParameterCount
2820 << " at " << ipc.mFile << ":" << ipc.mLineNumber);
2821 return nullptr;
2822 }
2823 catch (const OpenCLError& oce)
2824 {
2825 // I think OpenCLError exceptions are actually exceptional (unexpected), so do use SAL_WARN
2826 // here.
2827 SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
2828
2829 // OpenCLError used to go to the catch-all below, and not delete pDynamicKernel. Was that
2830 // intentional, should we not do it here then either?
2832 return nullptr;
2833 }
2834 catch (const Unhandled& uh)
2835 {
2836 SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
2837
2838 // Unhandled used to go to the catch-all below, and not delete pDynamicKernel. Was that
2839 // intentional, should we not do it here then either?
2841 return nullptr;
2842 }
2843 catch (...)
2844 {
2845 // FIXME: Do we really want to catch random exceptions here?
2846 SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
2848 return nullptr;
2849 }
2850 return pDynamicKernel;
2851}
2852
2853namespace {
2854
2855class CLInterpreterResult
2856{
2857 DynamicKernel* mpKernel;
2858
2860
2862 double* mpResBuf;
2863
2864public:
2865 CLInterpreterResult() : mpKernel(nullptr), mnGroupLength(0), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
2866 CLInterpreterResult( DynamicKernel* pKernel, SCROW nGroupLength ) :
2867 mpKernel(pKernel), mnGroupLength(nGroupLength), mpCLResBuf(nullptr), mpResBuf(nullptr) {}
2868
2869 bool isValid() const { return mpKernel != nullptr; }
2870
2871 void fetchResultFromKernel()
2872 {
2873 if (!isValid())
2874 return;
2875
2876 OpenCLZone zone;
2877
2878 // Map results back
2879 mpCLResBuf = mpKernel->GetResultBuffer();
2880
2883
2884 cl_int err;
2885 mpResBuf = static_cast<double*>(clEnqueueMapBuffer(kEnv.mpkCmdQueue,
2886 mpCLResBuf,
2887 CL_TRUE, CL_MAP_READ, 0,
2888 mnGroupLength * sizeof(double), 0, nullptr, nullptr,
2889 &err));
2890
2891 if (err != CL_SUCCESS)
2892 {
2893 SAL_WARN("sc.opencl", "clEnqueueMapBuffer failed:: " << openclwrapper::errorString(err));
2894 mpResBuf = nullptr;
2895 return;
2896 }
2897 SAL_INFO("sc.opencl", "Kernel results: cl_mem: " << mpResBuf << " (" << DebugPeekDoubles(mpResBuf, mnGroupLength) << ")");
2898 }
2899
2900 bool pushResultToDocument( ScDocument& rDoc, const ScAddress& rTopPos )
2901 {
2902 if (!mpResBuf)
2903 return false;
2904
2905 OpenCLZone zone;
2906
2908
2911
2912 cl_int err;
2913 err = clEnqueueUnmapMemObject(kEnv.mpkCmdQueue, mpCLResBuf, mpResBuf, 0, nullptr, nullptr);
2914
2915 if (err != CL_SUCCESS)
2916 {
2917 SAL_WARN("sc.opencl", "clEnqueueUnmapMemObject failed: " << openclwrapper::errorString(err));
2918 return false;
2919 }
2920
2921 return true;
2922 }
2923};
2924
2925class CLInterpreterContext
2926{
2927 std::shared_ptr<DynamicKernel> mpKernelStore;
2928 DynamicKernel* mpKernel;
2929
2931
2932public:
2933 explicit CLInterpreterContext(SCROW nGroupLength, std::shared_ptr<DynamicKernel> pKernel )
2934 : mpKernelStore(std::move(pKernel))
2936 , mnGroupLength(nGroupLength) {}
2937
2938 ~CLInterpreterContext()
2939 {
2940 DynamicKernelArgument::ClearStringIds();
2941 }
2942
2943 bool isValid() const
2944 {
2945 return mpKernel != nullptr;
2946 }
2947
2948 CLInterpreterResult launchKernel()
2949 {
2950 if (!isValid())
2951 return CLInterpreterResult();
2952
2953 try
2954 {
2955 // Run the kernel.
2956 mpKernel->Launch(mnGroupLength);
2957 }
2958 catch (const UnhandledToken& ut)
2959 {
2960 SAL_INFO("sc.opencl", "Dynamic formula compiler: UnhandledToken: " << ut.mMessage << " at " << ut.mFile << ":" << ut.mLineNumber);
2962 return CLInterpreterResult();
2963 }
2964 catch (const OpenCLError& oce)
2965 {
2966 SAL_WARN("sc.opencl", "Dynamic formula compiler: OpenCLError from " << oce.mFunction << ": " << openclwrapper::errorString(oce.mError) << " at " << oce.mFile << ":" << oce.mLineNumber);
2968 return CLInterpreterResult();
2969 }
2970 catch (const Unhandled& uh)
2971 {
2972 SAL_INFO("sc.opencl", "Dynamic formula compiler: Unhandled at " << uh.mFile << ":" << uh.mLineNumber);
2974 return CLInterpreterResult();
2975 }
2976 catch (...)
2977 {
2978 SAL_WARN("sc.opencl", "Dynamic formula compiler: unexpected exception");
2980 return CLInterpreterResult();
2981 }
2982
2983 return CLInterpreterResult(mpKernel, mnGroupLength);
2984 }
2985};
2986
2987
2988CLInterpreterContext createCLInterpreterContext( const ScCalcConfig& rConfig,
2989 const ScFormulaCellGroupRef& xGroup, const ScTokenArray& rCode )
2990{
2991 return CLInterpreterContext(xGroup->mnLength, DynamicKernel::create(rConfig, rCode, xGroup->mnLength));
2992}
2993
2994void genRPNTokens( ScDocument& rDoc, const ScAddress& rTopPos, ScTokenArray& rCode )
2995{
2996 ScCompiler aComp(rDoc, rTopPos, rCode, rDoc.GetGrammar());
2997 // Disable special ordering for jump commands for the OpenCL interpreter.
2998 aComp.EnableJumpCommandReorder(false);
2999 aComp.CompileTokenArray(); // Regenerate RPN tokens.
3000}
3001
3002bool waitForResults()
3003{
3004 OpenCLZone zone;
3007
3008 cl_int err = clFinish(kEnv.mpkCmdQueue);
3009 if (err != CL_SUCCESS)
3010 SAL_WARN("sc.opencl", "clFinish failed: " << openclwrapper::errorString(err));
3011
3012 return err == CL_SUCCESS;
3013}
3014
3015}
3016
3017bool FormulaGroupInterpreterOpenCL::interpret( ScDocument& rDoc,
3018 const ScAddress& rTopPos, ScFormulaCellGroupRef& xGroup,
3019 ScTokenArray& rCode )
3020{
3021 SAL_INFO("sc.opencl", "Interpret cell group " << rTopPos);
3022 MergeCalcConfig(rDoc);
3023
3024 genRPNTokens(rDoc, rTopPos, rCode);
3025
3026 if( rCode.GetCodeLen() == 0 )
3027 return false;
3028
3029 CLInterpreterContext aCxt = createCLInterpreterContext(maCalcConfig, xGroup, rCode);
3030 if (!aCxt.isValid())
3031 return false;
3032
3033 CLInterpreterResult aRes = aCxt.launchKernel();
3034 if (!aRes.isValid())
3035 return false;
3036
3037 if (!waitForResults())
3038 return false;
3039
3040 aRes.fetchResultFromKernel();
3041
3042 return aRes.pushResultToDocument(rDoc, rTopPos);
3043}
3044
3045} // namespace sc::opencl
3046
3047/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
::boost::spirit::classic::rule< ScannerT > argument
XPropertyListType t
void SC_DLLPUBLIC SetFormulaResults(const ScAddress &rTopPos, const double *pResults, size_t nLen)
Set an array of numerical formula results to a group of contiguous formula cells.
Definition: documen8.cxx:402
SC_DLLPUBLIC formula::FormulaGrammar::Grammar GetGrammar() const
Definition: document.hxx:1008
Matrix data type that can store values of mixed types.
Definition: scmatrix.hxx:101
const std::vector< VectorRefArray > & GetArrays() const
sal_uInt16 GetCodeLen() const
OpCode GetOpCode() const
StackVar GetType() const
virtual const svl::SharedString & GetString() const
virtual double GetDouble() const
sal_uInt8 GetParamCount() const
const VectorRefArray & GetArray() const
(Partially) abstract base class for an operand
Definition: opbase.hxx:125
static int GetStringId(const rtl_uString *string)
Definition: opbase.cxx:110
virtual void GenDeclRef(outputstream &ss) const
Generate use/references to the argument.
Definition: opbase.cxx:59
FormulaTreeNodeRef mFormulaTree
Definition: opbase.hxx:176
Handling a Double Vector that is used as a sliding window input to either a sliding window average or...
Definition: opbase.hxx:440
size_t GenReductionLoopHeader(outputstream &ss, bool &needBody)
Controls how the elements in the DoubleVectorRef are traversed.
const formula::DoubleVectorRefToken * mpDVR
Definition: opbase.hxx:463
std::string GenSlidingWindowDeclRef(bool nested=false) const
DynamicKernelSlidingArgument(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > CodeGen, int index)
virtual size_t Marshal(cl_kernel, int, int, cl_program) override
Create buffer and pass the buffer to a given kernel.
virtual std::string GenIsString(bool=false) const override
Will generate value saying whether the value is a string.
virtual void GenSlidingWindowDecl(outputstream &ss) const override
When declared as input to a sliding window function.
Definition: opbase.hxx:244
virtual void GenDecl(outputstream &ss) const override
Generate declaration.
Definition: opbase.hxx:240
Failed in marshaling.
Definition: opbase.hxx:53
Handling a Double Vector that is used as a sliding window input Performs parallel reduction based on ...
Definition: opbase.hxx:472
virtual size_t Marshal(cl_kernel k, int argno, int w, cl_program mpProgram)
size_t GenReductionLoopHeader(outputstream &ss, int nResultSize, bool &needBody)
Controls how the elements in the DoubleVectorRef are traversed.
ParallelReductionVectorRef(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > CodeGen, int index)
virtual void GenSlidingWindowFunction(outputstream &ss)
Emit the definition for the auxiliary reduction kernel.
const formula::DoubleVectorRefToken * mpDVR
Definition: opbase.hxx:492
virtual std::string GenSlidingWindowDeclRef(bool) const
Inconsistent state.
Definition: opbase.hxx:65
Holds an input (read-only) argument reference to a SingleVectorRef.
Definition: opbase.hxx:188
virtual void GenSlidingWindowDecl(outputstream &ss) const override
When declared as input to a sliding window function.
Definition: opbase.cxx:159
virtual std::string GenSlidingWindowDeclRef(bool=false) const override
When referenced in a sliding window function.
Definition: opbase.cxx:165
virtual void GenDecl(outputstream &ss) const override
Generate declaration.
Definition: opbase.cxx:153
std::vector< double > dataBuffer
Definition: opbase.hxx:220
virtual size_t Marshal(cl_kernel, int, int, cl_program) override
Create buffer and pass the buffer to a given kernel.
const OUString & getString() const
rtl_uString * getData()
float x
cl_kernel mpKernel
for managed kernel instance.
DynamicKernelStringArgument mStringArgument
cl_program mpProgram
unsigned int mCurId
#define EXTCASE(name, createCode)
std::string mKernelHash
FormulaTreeNodeRef mpRoot
std::set< std::string > inlineFun
double mConst
std::string mKernelSignature
std::shared_ptr< DynamicKernel > mpKernelStore
DynamicKernelSlidingArgument< VectorRef > mDoubleArgument
const char *const publicFunc
std::vector< DynamicKernelArgumentRef > mParams
std::shared_ptr< SlidingFunctionBase > mpCodeGen
int mnResultSize
std::string mFullProgramSrc
#define CASE(opcode, createCode)
SubArgumentsType mvSubArguments
cl_mem mpCLResBuf
double * mpResBuf
std::set< std::string > inlineDecl
cl_mem mpResClmem
SymbolTable mSyms
cl_mem mCLMem
SCROW mnGroupLength
cl_mem mpClmem2
ArgumentMap mSymbols
ScCalcConfig mCalcConfig
const char * name
sal_Int64 n
#define SAL_WARN_IF(condition, area, stream)
#define SAL_WARN(area, stream)
#define SAL_INFO(area, stream)
err
size
int uniform_int_distribution(int a, int b)
css::uno::Reference< css::deployment::XPackageRegistry > create(css::uno::Reference< css::deployment::XPackageRegistry > const &xRootRegistry, OUString const &context, OUString const &cachePath, css::uno::Reference< css::uno::XComponentContext > const &xComponentContext)
std::string StackVarEnumToString(StackVar const e)
config
int i
index
const char * errorString(cl_int nError)
bool buildProgramFromBinary(const char *buildOption, GPUEnv *gpuInfo, const char *filename, int idx)
void setKernelEnv(KernelEnv *envInfo)
sal_uInt64 kernelFailures
bool generatBinFromKernelSource(cl_program program, const char *clFileName)
log
std::string preciseFloat(double f)
Definition: utils.cxx:55
static std::shared_ptr< DynamicKernelArgument > VectorRefFactory(const ScCalcConfig &config, const std::string &s, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > &pCodeGen, int index)
static DynamicKernelArgumentRef SoPHelper(const ScCalcConfig &config, const std::string &ts, const FormulaTreeNodeRef &ft, std::shared_ptr< SlidingFunctionBase > pCodeGen, int nResultSize)
std::shared_ptr< FormulaTreeNode > FormulaTreeNodeRef
Definition: opbase.hxx:104
std::shared_ptr< DynamicKernelArgument > DynamicKernelArgumentRef
Definition: opbase.hxx:180
css::uno::Reference< css::linguistic2::XProofreadingIterator > get(css::uno::Reference< css::uno::XComponentContext > const &context)
sal_Int32 w
const char getPrice[]
#define UNROLLING_FACTOR
Definition: opbase.hxx:33
#define REDUCE_THRESHOLD
Definition: opbase.hxx:32
OpCode
ocRandom
ocExternal
ocPush
ocPi
Configuration options for formula interpreter.
Definition: calcconfig.hxx:44
StringConversion meStringConversion
Definition: calcconfig.hxx:54
@ ZERO
=1+"1" or =1+"x" give 1
const double * mpNumericArray
rtl_uString ** mpStringArray
cl_program mpArryPrograms[MAX_CLFILE_NUM]
cl_command_queue mpkCmdQueue
unsigned char sal_uInt8
#define SAL_MAX_UINT32
Base
::boost::intrusive_ptr< ScFormulaCellGroup > ScFormulaCellGroupRef
Definition: types.hxx:43
::boost::intrusive_ptr< ScMatrix > ScMatrixRef
Definition: types.hxx:25
sal_Int32 SCROW
Definition: types.hxx:17
Any result
#define VR
Definition: xlformula.cxx:62
sal_Int32 nLength