LibreOffice Module sc (master)  1
arraysumAVX512.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #define LO_ARRAYSUM_SPACE AVX512
12 #include "arraysum.hxx"
13 
15 
16 #include <tools/simd.hxx>
17 #include <tools/simdsupport.hxx>
18 
19 #include <stdlib.h>
20 
21 namespace sc::op
22 {
23 #ifdef LO_AVX512F_AVAILABLE
24 
25 bool hasAVX512FCode() { return true; }
26 
27 using namespace AVX512;
28 
31 static inline void sumAVX512(__m512d& sum, __m512d& err, const __m512d& value)
32 {
33  // Temporal parameter
34  __m512d t = _mm512_add_pd(sum, value);
35  // Absolute value of the total sum
36  __m512d asum = _mm512_abs_pd(sum);
37  // Absolute value of the value to add
38  __m512d avalue = _mm512_abs_pd(value);
39  // Compare the absolute values sum >= value
40  __mmask8 mask = _mm512_cmp_pd_mask(avalue, asum, _CMP_GE_OQ);
41  // The following code has this form ( a - t + b)
42  // Case 1: a = sum b = value
43  // Case 2: a = value b = sum
44  __m512d a = _mm512_mask_blend_pd(mask, sum, value);
45  __m512d b = _mm512_mask_blend_pd(mask, value, sum);
46  err = _mm512_add_pd(err, _mm512_add_pd(_mm512_sub_pd(a, t), b));
47  // Store result
48  sum = t;
49 }
50 
53 KahanSumSimple executeAVX512F(size_t& i, size_t nSize, const double* pCurrent)
54 {
55  // Make sure we don't fall out of bounds.
56  // This works by sums of 8 terms.
57  // So the 8'th term is i+7
58  // If we iterate until nSize won't fall out of bounds
59  if (nSize > i + 7)
60  {
61  // Setup sums and errors as 0
62  __m512d sum = _mm512_setzero_pd();
63  __m512d err = _mm512_setzero_pd();
64 
65  // Sum the stuff
66  for (; i + 7 < nSize; i += 8)
67  {
68  // Kahan sum
69  __m512d load = _mm512_loadu_pd(pCurrent);
70  sumAVX512(sum, err, load);
71  pCurrent += 8;
72  }
73 
74  // Store result
75  static_assert(sizeof(double) == 8);
76  double sums[8];
77  double errs[8];
78  _mm512_storeu_pd(&sums[0], sum);
79  _mm512_storeu_pd(&errs[0], err);
80 
81  // First Kahan & pairwise summation
82  // 0+1 1+2 3+4 4+5 6+7 -> 0, 2, 4, 6
83  sumNeumanierNormal(sums[0], errs[0], sums[1]);
84  sumNeumanierNormal(sums[2], errs[2], sums[3]);
85  sumNeumanierNormal(sums[4], errs[4], sums[5]);
86  sumNeumanierNormal(sums[6], errs[6], sums[7]);
87  sumNeumanierNormal(sums[0], errs[0], errs[1]);
88  sumNeumanierNormal(sums[2], errs[2], errs[3]);
89  sumNeumanierNormal(sums[4], errs[4], errs[5]);
90  sumNeumanierNormal(sums[6], errs[6], errs[7]);
91 
92  // Second Kahan & pairwise summation
93  // 0+2 4+6 -> 0, 4
94  sumNeumanierNormal(sums[0], errs[0], sums[2]);
95  sumNeumanierNormal(sums[4], errs[4], sums[6]);
96  sumNeumanierNormal(sums[0], errs[0], errs[2]);
97  sumNeumanierNormal(sums[4], errs[4], errs[6]);
98 
99  // Third Kahan & pairwise summation
100  // 0+4 -> 0
101  sumNeumanierNormal(sums[0], errs[0], sums[4]);
102  sumNeumanierNormal(sums[0], errs[0], errs[4]);
103 
104  // Return final result
105  return { sums[0], errs[0] };
106  }
107  return { 0.0, 0.0 };
108 }
109 
110 #else // LO_AVX512F_AVAILABLE
111 
112 bool hasAVX512FCode() { return false; }
113 
114 KahanSumSimple executeAVX512F(size_t&, size_t, const double*) { abort(); }
115 
116 #endif
117 
118 } // end namespace sc::op
119 
120 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
KahanSumSimple executeAVX512F(size_t &, size_t, const double *)
bool hasAVX512FCode()
err
int i
uno_Any a
XPropertyListType t
Any value
INLINE void sumNeumanierNormal(double &sum, double &err, const double &value)
Performs one step of the Neumanier sum between doubles Overwrites the summand and error sum...
Definition: arraysum.hxx:37