25static inline void sumSSE2(__m128d& sum, __m128d& err, 
const __m128d& value)
 
   27    const __m128d ANNULATE_SIGN_BIT = _mm_castsi128_pd(_mm_set1_epi64x(0x7FFF'FFFF'FFFF'FFFF));
 
   29    __m128d 
t = _mm_add_pd(sum, value);
 
   31    __m128d asum = _mm_and_pd(sum, ANNULATE_SIGN_BIT);
 
   33    __m128d avalue = _mm_and_pd(value, ANNULATE_SIGN_BIT);
 
   35    __m128d mask = _mm_cmpge_pd(asum, avalue);
 
   39    __m128d 
a = _mm_add_pd(_mm_and_pd(mask, sum), _mm_andnot_pd(mask, value));
 
   40    __m128d b = _mm_add_pd(_mm_and_pd(mask, value), _mm_andnot_pd(mask, sum));
 
   41    err = _mm_add_pd(err, _mm_add_pd(_mm_sub_pd(a, t), b));
 
   57        __m128d sum1 = _mm_setzero_pd();
 
   58        __m128d err1 = _mm_setzero_pd();
 
   59        __m128d sum2 = _mm_setzero_pd();
 
   60        __m128d err2 = _mm_setzero_pd();
 
   61        __m128d sum3 = _mm_setzero_pd();
 
   62        __m128d err3 = _mm_setzero_pd();
 
   63        __m128d sum4 = _mm_setzero_pd();
 
   64        __m128d err4 = _mm_setzero_pd();
 
   66        for (; 
i + 7 < nSize; 
i += 8)
 
   69            __m128d load1 = _mm_loadu_pd(pCurrent);
 
   70            sumSSE2(sum1, err1, load1);
 
   74            __m128d load2 = _mm_loadu_pd(pCurrent);
 
   75            sumSSE2(sum2, err2, load2);
 
   79            __m128d load3 = _mm_loadu_pd(pCurrent);
 
   80            sumSSE2(sum3, err3, load3);
 
   84            __m128d load4 = _mm_loadu_pd(pCurrent);
 
   85            sumSSE2(sum4, err4, load4);
 
   92        sumSSE2(sum1, err1, sum2);
 
   93        sumSSE2(sum1, err1, err2);
 
   94        sumSSE2(sum3, err3, sum4);
 
   95        sumSSE2(sum3, err3, err4);
 
   98        sumSSE2(sum1, err1, sum3);
 
   99        sumSSE2(sum1, err1, err3);
 
  104        _mm_storeu_pd(&sums[0], sum1);
 
  105        _mm_storeu_pd(&errs[0], err1);
 
  113        return { sums[0], errs[0] };
 
This class provides LO with Kahan summation algorithm About this algorithm: https://en....
 
KahanSum executeSSE2(size_t &i, size_t nSize, const double *pCurrent)
 
void sumNeumanierNormal(double &sum, double &err, const double &value)
Performs one step of the Neumanier sum between doubles Overwrites the summand and error @parma sum.