LibreOffice Module sc (master)  1
arraysumSSE2.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #define LO_ARRAYSUM_SPACE SSE2
12 #include "arraysum.hxx"
13 
15 
16 #include <tools/simd.hxx>
17 #include <tools/simdsupport.hxx>
18 
19 #include <cstdlib>
20 
21 namespace sc::op
22 {
23 #ifdef LO_SSE2_AVAILABLE // Old processors
24 
25 using namespace SSE2;
26 
29 static inline void sumSSE2(__m128d& sum, __m128d& err, const __m128d& value)
30 {
31  const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
32  // Temporal parameter
33  __m128d t = _mm_add_pd(sum, value);
34  // Absolute value of the total sum
35  __m128d asum = _mm_and_pd(sum, ANNULATE_SIGN_BIT);
36  // Absolute value of the value to add
37  __m128d avalue = _mm_and_pd(value, ANNULATE_SIGN_BIT);
38  // Compare the absolute values sum >= value
39  __m128d mask = _mm_cmpge_pd(asum, avalue);
40  // The following code has this form ( a - t + b)
41  // Case 1: a = sum b = value
42  // Case 2: a = value b = sum
43  __m128d a = _mm_add_pd(_mm_and_pd(mask, sum), _mm_andnot_pd(mask, value));
44  __m128d b = _mm_add_pd(_mm_and_pd(mask, value), _mm_andnot_pd(mask, sum));
45  err = _mm_add_pd(err, _mm_add_pd(_mm_sub_pd(a, t), b));
46  // Store result
47  sum = t;
48 }
49 
50 #endif
51 
54 KahanSumSimple executeSSE2(size_t& i, size_t nSize, const double* pCurrent)
55 {
56 #ifdef LO_SSE2_AVAILABLE
57  // Make sure we don't fall out of bounds.
58  // This works by sums of 8 terms.
59  // So the 8'th term is i+7
60  // If we iterate until nSize won't fall out of bounds
61  if (nSize > i + 7)
62  {
63  // Setup sums and errors as 0
64  __m128d sum1 = _mm_setzero_pd();
65  __m128d err1 = _mm_setzero_pd();
66  __m128d sum2 = _mm_setzero_pd();
67  __m128d err2 = _mm_setzero_pd();
68  __m128d sum3 = _mm_setzero_pd();
69  __m128d err3 = _mm_setzero_pd();
70  __m128d sum4 = _mm_setzero_pd();
71  __m128d err4 = _mm_setzero_pd();
72 
73  for (; i + 7 < nSize; i += 8)
74  {
75  // Kahan sum 1
76  __m128d load1 = _mm_loadu_pd(pCurrent);
77  sumSSE2(sum1, err1, load1);
78  pCurrent += 2;
79 
80  // Kahan sum 2
81  __m128d load2 = _mm_loadu_pd(pCurrent);
82  sumSSE2(sum2, err2, load2);
83  pCurrent += 2;
84 
85  // Kahan sum 3
86  __m128d load3 = _mm_loadu_pd(pCurrent);
87  sumSSE2(sum3, err3, load3);
88  pCurrent += 2;
89 
90  // Kahan sum 4
91  __m128d load4 = _mm_loadu_pd(pCurrent);
92  sumSSE2(sum4, err4, load4);
93  pCurrent += 2;
94  }
95 
96  // Now we combine pairwise summation with Kahan summation
97 
98  // 1+2 3+4 -> 1, 3
99  sumSSE2(sum1, err1, sum2);
100  sumSSE2(sum1, err1, err2);
101  sumSSE2(sum3, err3, sum4);
102  sumSSE2(sum3, err3, err4);
103 
104  // 1+3 -> 1
105  sumSSE2(sum1, err1, sum3);
106  sumSSE2(sum1, err1, err3);
107 
108  // Store results
109  double sums[2];
110  double errs[2];
111  _mm_storeu_pd(&sums[0], sum1);
112  _mm_storeu_pd(&errs[0], err1);
113 
114  // First Kahan & pairwise summation
115  // 0+1 -> 0
116  sumNeumanierNormal(sums[0], errs[0], sums[1]);
117  sumNeumanierNormal(sums[0], errs[0], errs[1]);
118 
119  // Store result
120  return { sums[0], errs[0] };
121  }
122  return { 0.0, 0.0 };
123 #else
124  (void)i;
125  (void)nSize;
126  (void)pCurrent;
127  abort();
128 #endif
129 }
130 }
131 
132 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
err
int i
if(aStr!=aBuf) UpdateName_Impl(m_xFollowLb.get()
Any value
INLINE void sumNeumanierNormal(double &sum, double &err, const double &value)
Performs one step of the Neumanier sum between doubles Overwrites the summand and error sum...
Definition: arraysum.hxx:37
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo