22#include <rtl/math.hxx>
30#define INPUTSIZE 15360
31#define OUTPUTSIZE 15360
35void DS_CHECK_STATUS(cl_int status,
char const *
name) {
36 if (CL_SUCCESS != status)
38 SAL_INFO(
"opencl.device",
"Error code is " << status <<
" at " <<
name);
42bool bIsDeviceSelected =
false;
45struct LibreOfficeDeviceEvaluationIO
47 std::vector<double> input0;
48 std::vector<double> input1;
49 std::vector<double> input2;
50 std::vector<double> input3;
51 std::vector<double> output;
56const char* source = R
"delimit(
57#if defined(KHR_DP_EXTENSION)
58#pragma OPENCL EXTENSION cl_khr_fp64 : enable
59#elif defined(AMD_DP_EXTENSION)
60#pragma OPENCL EXTENSION cl_amd_fp64 : enable
63 int isNan(fp_t a) { return a != a; }
64 fp_t fsum(fp_t a, fp_t b) { return a + b; }
66 fp_t fAverage(__global fp_t* input)
70 for (int i = 0; i < INPUTSIZE; i++)
74 sum = fsum(input[i], sum);
78 return sum / (fp_t)count;
80 fp_t fMin(__global fp_t* input)
83 for (int i = 0; i < INPUTSIZE; i++)
87 min = fmin(input[i], min);
92 fp_t fSoP(__global fp_t* input0, __global fp_t* input1)
95 for (int i = 0; i < INPUTSIZE; i++)
97 sop += (isNan(input0[i]) ? 0 : input0[i]) * (isNan(input1[i]) ? 0 : input1[i]);
101 __kernel void DynamicKernel(
102 __global fp_t* result, __global fp_t* input0, __global fp_t* input1, __global fp_t* input2, __global fp_t* input3)
104 int gid0 = get_global_id(0);
105 fp_t tmp0 = fAverage(input0);
106 fp_t tmp1 = fMin(input1) * fSoP(input2, input3);
107 result[gid0] = fsum(tmp0, tmp1);
111size_t sourceSize[] = { strlen(source) };
114double random(
double min,
double max)
116 if (rtl::math::approxEqual(
min,
max))
122void populateInput(std::unique_ptr<LibreOfficeDeviceEvaluationIO>
const & testData)
124 double* input0 = testData->input0.data();
125 double* input1 = testData->input1.data();
126 double* input2 = testData->input2.data();
127 double* input3 = testData->input3.data();
130 input0[
i] = random(0,
i);
131 input1[
i] = random(0,
i);
132 input2[
i] = random(0,
i);
133 input3[
i] = random(0,
i);
138ds_status evaluateScoreForDevice(
ds_device& rDevice, std::unique_ptr<LibreOfficeDeviceEvaluationIO>
const & testData)
143 SAL_INFO(
"opencl.device",
"Device: \"" << rDevice.
sDeviceName <<
"\" (OpenCL) evaluation...");
147 std::unique_ptr<char[]> aExtInfo;
149 size_t aDevExtInfoSize = 0;
152 clStatus = clGetDeviceInfo(rDevice.
aDeviceID, CL_DEVICE_EXTENSIONS, 0,
nullptr, &aDevExtInfoSize);
153 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clGetDeviceInfo");
155 aExtInfo.reset(
new char[aDevExtInfoSize]);
156 clStatus = clGetDeviceInfo(rDevice.
aDeviceID, CL_DEVICE_EXTENSIONS,
sizeof(
char) * aDevExtInfoSize, aExtInfo.get(),
nullptr);
157 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clGetDeviceInfo");
160 bool bKhrFp64Flag =
false;
161 bool bAmdFp64Flag =
false;
162 const char* buildOption =
nullptr;
163 std::string tmpStr(
"-Dfp_t=double -Dfp_t4=double4 -Dfp_t16=double16 -DINPUTSIZE=");
164 std::ostringstream tmpOStrStr;
166 tmpStr.append(tmpOStrStr.str());
168 if ((std::string(aExtInfo.get())).find(
"cl_khr_fp64") != std::string::npos)
172 tmpStr.append(
" -DKHR_DP_EXTENSION");
173 buildOption = tmpStr.c_str();
174 SAL_INFO(
"opencl.device",
"... has cl_khr_fp64");
176 else if ((std::string(aExtInfo.get())).find(
"cl_amd_fp64") != std::string::npos)
180 tmpStr.append(
" -DAMD_DP_EXTENSION");
181 buildOption = tmpStr.c_str();
182 SAL_INFO(
"opencl.device",
"... has cl_amd_fp64");
185 if (!bKhrFp64Flag && !bAmdFp64Flag)
188 rDevice.
fTime = DBL_MAX;
190 SAL_INFO(
"opencl.device",
"... no fp64 support");
199 cl_context clContext = clCreateContext(
nullptr, 1, &rDevice.
aDeviceID,
nullptr,
nullptr, &clStatus);
200 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateContext");
201 cl_command_queue clQueue = clCreateCommandQueue(clContext, rDevice.
aDeviceID, 0, &clStatus);
202 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateCommandQueue");
205 cl_program clProgram = clCreateProgramWithSource(clContext, 1, &source, sourceSize, &clStatus);
206 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateProgramWithSource");
207 clStatus = clBuildProgram(clProgram, 1, &rDevice.
aDeviceID, buildOption,
nullptr,
nullptr);
208 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clBuildProgram");
209 if (CL_SUCCESS != clStatus)
214 clStatus = clGetProgramBuildInfo(clProgram, rDevice.
aDeviceID, CL_PROGRAM_BUILD_LOG, 0,
nullptr, &
length);
215 buildLog =
static_cast<char*
>(malloc(
length));
216 clGetProgramBuildInfo(clProgram, rDevice.
aDeviceID, CL_PROGRAM_BUILD_LOG,
length, buildLog, &
length);
217 SAL_INFO(
"opencl.device",
"Build Errors:\n" << buildLog);
220 rDevice.
fTime = DBL_MAX;
229 cl_kernel clKernel = clCreateKernel(clProgram,
"DynamicKernel", &clStatus);
230 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateKernel");
231 cl_mem clResult = clCreateBuffer(clContext, CL_MEM_WRITE_ONLY | CL_MEM_USE_HOST_PTR,
sizeof(cl_double) * testData->outputSize, testData->output.data(), &clStatus);
232 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateBuffer::clResult");
233 cl_mem clInput0 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
sizeof(cl_double) * testData->inputSize, testData->input0.data(), &clStatus);
234 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateBuffer::clInput0");
235 cl_mem clInput1 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
sizeof(cl_double) * testData->inputSize, testData->input1.data(), &clStatus);
236 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateBuffer::clInput1");
237 cl_mem clInput2 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
sizeof(cl_double) * testData->inputSize, testData->input2.data(), &clStatus);
238 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateBuffer::clInput2");
239 cl_mem clInput3 = clCreateBuffer(clContext, CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
sizeof(cl_double) * testData->inputSize, testData->input3.data(), &clStatus);
240 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clCreateBuffer::clInput3");
241 clStatus = clSetKernelArg(clKernel, 0,
sizeof(cl_mem),
static_cast<void*
>(&clResult));
242 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clSetKernelArg::clResult");
243 clStatus = clSetKernelArg(clKernel, 1,
sizeof(cl_mem),
static_cast<void*
>(&clInput0));
244 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clSetKernelArg::clInput0");
245 clStatus = clSetKernelArg(clKernel, 2,
sizeof(cl_mem),
static_cast<void*
>(&clInput1));
246 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clSetKernelArg::clInput1");
247 clStatus = clSetKernelArg(clKernel, 3,
sizeof(cl_mem),
static_cast<void*
>(&clInput2));
248 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clSetKernelArg::clInput2");
249 clStatus = clSetKernelArg(clKernel, 4,
sizeof(cl_mem),
static_cast<void*
>(&clInput3));
250 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clSetKernelArg::clInput3");
251 size_t globalWS[1] = { testData->outputSize };
252 size_t const localSize[1] = { 64 };
253 clStatus = clEnqueueNDRangeKernel(clQueue, clKernel, 1,
nullptr, globalWS, localSize, 0,
nullptr,
nullptr);
254 DS_CHECK_STATUS(clStatus,
"evaluateScoreForDevice::clEnqueueNDRangeKernel");
256 clReleaseMemObject(clInput3);
257 clReleaseMemObject(clInput2);
258 clReleaseMemObject(clInput1);
259 clReleaseMemObject(clInput0);
260 clReleaseMemObject(clResult);
261 clReleaseKernel(clKernel);
267 clReleaseProgram(clProgram);
268 clReleaseCommandQueue(clQueue);
269 clReleaseContext(clContext);
275 SAL_INFO(
"opencl.device",
"Device: \"CPU\" (Native) evaluation...");
279 for (j = 0; j < testData->outputSize; j++)
281 double fAverage = 0.0f;
282 double fMin = DBL_MAX;
286 fAverage += testData->input0[
i];
287 fMin = std::min(fMin, testData->input1[
i]);
288 fSoP += testData->input2[
i] * testData->input3[
i];
290 fAverage /= testData->inputSize;
291 testData->output[j] = fAverage + (fMin * fSoP);
293 if (j > 0 && j % 100 == 0)
296 if (rDevice.
fTime >= 1)
304 rDevice.
fTime /= (
static_cast<double>(j) / testData->outputSize);
310 rDevice.
fTime *= 10.0;
316ds_status profileDevices(std::unique_ptr<ds_profile>
const & pProfile, std::unique_ptr<LibreOfficeDeviceEvaluationIO>
const & pTestData)
323 for (
ds_device& rDevice : pProfile->devices)
325 ds_status evaluatorStatus = evaluateScoreForDevice(rDevice, pTestData);
328 status = evaluatorStatus;
336int pickBestDevice(std::unique_ptr<ds_profile>
const & profile)
338 double bestScore = DBL_MAX;
340 int nBestDeviceIndex = -1;
342 for (std::vector<ds_device>::size_type
d = 0;
d < profile->devices.size();
365 SAL_INFO(
"opencl.device",
"Device[" <<
d <<
"] " << device.
sDeviceName <<
" is denylisted or not allowlisted");
366 device.
fTime = DBL_MAX;
371 double fScore = DBL_MAX;
372 if (device.
fTime >= 0.0
373 || rtl::math::approxEqual(device.
fTime, DBL_MAX))
375 fScore = device.
fTime;
379 SAL_INFO(
"opencl.device",
"Unusual null score");
384 SAL_INFO(
"opencl.device",
"Device[" <<
d <<
"] " << device.
sDeviceName <<
" (OpenCL) score is " << fScore);
388 SAL_INFO(
"opencl.device",
"Device[" <<
d <<
"] CPU (Native) score is " << fScore);
390 if (fScore < bestScore)
393 nBestDeviceIndex =
d;
398 SAL_INFO(
"opencl.device",
"Selected Device[" << nBestDeviceIndex <<
"]: " << profile->devices[nBestDeviceIndex].sDeviceName <<
"(OpenCL).");
402 SAL_INFO(
"opencl.device",
"Selected Device[" << nBestDeviceIndex <<
"]: CPU (Native).");
404 return nBestDeviceIndex;
408int matchDevice(std::unique_ptr<ds_profile>
const & profile,
const char* deviceName)
410 int deviceMatch = -1;
411 for (
size_t d = 0;
d < profile->devices.size() - 1;
d++)
413 if (profile->devices[
d].sDeviceName.indexOf(deviceName) != -1)
416 if (std::string(
"NATIVE_CPU").
find(deviceName) != std::string::npos)
417 deviceMatch = profile->devices.size() - 1;
426 explicit LogWriter(OUString
const & aFileName)
427 : maStream(aFileName, StreamMode::WRITE)
430 void text(std::string_view rText)
436 void log(std::string_view rKey, std::string_view rValue)
444 void log(std::string_view rKey,
int rValue)
446 log(rKey, OString::number(rValue));
449 void log(std::string_view rKey,
bool rValue)
451 log(rKey, OString::boolean(rValue));
456void writeDevicesLog(std::unique_ptr<ds_profile>
const & rProfile, std::u16string_view sProfilePath,
int nSelectedIndex)
458 OUString aCacheFile(OUString::Concat(sProfilePath) +
"opencl_devices.log");
459 LogWriter aWriter(aCacheFile);
463 for (
const ds_device& rDevice : rProfile->devices)
467 aWriter.log(
"Device Index",
nIndex);
468 aWriter.log(
" Selected",
nIndex == nSelectedIndex);
495 std::u16string_view sProfilePath,
bool bForceSelection)
498 if (!bIsDeviceSelected || bForceSelection)
501 std::unique_ptr<ds_profile> aProfile;
509 return selectedDevice;
513 OUString sFilePath = OUString::Concat(sProfilePath) +
"opencl_profile.xml";
515 if (!bForceSelection)
522 SAL_INFO(
"opencl.device",
"Performing forced profiling.");
526 if (!bForceSelection)
528 SAL_INFO(
"opencl.device",
"Profile file not available (" << sFilePath <<
"); performing profiling.");
532 std::unique_ptr<LibreOfficeDeviceEvaluationIO> testData(
new LibreOfficeDeviceEvaluationIO);
535 testData->input0.resize(testData->inputSize);
536 testData->input1.resize(testData->inputSize);
537 testData->input2.resize(testData->inputSize);
538 testData->input3.resize(testData->inputSize);
539 testData->output.resize(testData->outputSize);
540 populateInput(testData);
543 status = profileDevices(aProfile, testData);
551 SAL_INFO(
"opencl.device",
"Scores written to file (" << sFilePath <<
").");
555 SAL_INFO(
"opencl.device",
"Error saving scores to file (" << sFilePath <<
"); scores not written to file.");
560 SAL_INFO(
"opencl.device",
"Unable to evaluate performance; scores not written to file.");
565 SAL_INFO(
"opencl.device",
"Profile read from file (" << sFilePath <<
").");
569 int bestDeviceIdx = pickBestDevice(aProfile);
572 char* overrideDeviceStr = getenv(
"SC_OPENCL_DEVICE_OVERRIDE");
573 if (
nullptr != overrideDeviceStr)
575 int overrideDeviceIdx = matchDevice(aProfile, overrideDeviceStr);
576 if (-1 != overrideDeviceIdx)
578 SAL_INFO(
"opencl.device",
"Overriding Device Selection (SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr <<
").");
579 bestDeviceIdx = overrideDeviceIdx;
582 SAL_INFO(
"opencl.device",
"Selected Device[" << bestDeviceIdx <<
"]: " << aProfile->devices[bestDeviceIdx].sDeviceName <<
" (OpenCL).");
586 SAL_INFO(
"opencl.device",
"Selected Device[" << bestDeviceIdx <<
"]: CPU (Native).");
591 SAL_INFO(
"opencl.device",
"Ignoring invalid SC_OPENCL_DEVICE_OVERRIDE=" << overrideDeviceStr <<
").");
598 selectedDevice = aProfile->devices[bestDeviceIdx];
599 bIsDeviceSelected =
true;
601 writeDevicesLog(aProfile, sProfilePath, bestDeviceIdx);
606 return selectedDevice;
SvStream & WriteOString(std::string_view rStr)
SvStream & WriteChar(char nChar)
#define SAL_INFO(area, stream)
double uniform_real_distribution(double a=0.0, double b=1.0)
OSQLColumns::const_iterator find(const OSQLColumns::const_iterator &first, const OSQLColumns::const_iterator &last, std::u16string_view _rVal, const ::comphelper::UStringMixEqual &_rCase)
constexpr std::enable_if_t< std::is_signed_v< T >, std::make_unsigned_t< T > > make_unsigned(T value)
SwNodeOffset min(const SwNodeOffset &a, const SwNodeOffset &b)
ds_device const & getDeviceSelection(std::u16string_view sProfilePath, bool bForceSelection)
ds_status writeProfile(const OUString &rStreamName, std::unique_ptr< ds_profile > const &pProfile)
ds_status readProfile(const OUString &rStreamName, std::unique_ptr< ds_profile > const &pProfile)
ds_status initDSProfile(std::unique_ptr< ds_profile > &rProfile, OString const &rVersion)
static OpenCLConfig get()
OString sDeviceExtensions
bool bDeviceCompilerAvailable
OString sPlatformExtensions
OString sDeviceOpenCLVersion
bool bDeviceLinkerAvailable