18 #if defined(LO_SSE2_AVAILABLE)
20 size_t nRealSize =
mnSize - i;
21 size_t nUnrolledSize = nRealSize - (nRealSize % 8);
23 if (nUnrolledSize > 0)
25 __m128d sum1 = _mm_setzero_pd();
26 __m128d sum2 = _mm_setzero_pd();
27 __m128d sum3 = _mm_setzero_pd();
28 __m128d sum4 = _mm_setzero_pd();
30 for (; i < nUnrolledSize; i += 8)
32 __m128d load1 = _mm_load_pd(pCurrent);
33 sum1 = _mm_add_pd(sum1, load1);
36 __m128d load2 = _mm_load_pd(pCurrent);
37 sum2 = _mm_add_pd(sum2, load2);
40 __m128d load3 = _mm_load_pd(pCurrent);
41 sum3 = _mm_add_pd(sum3, load3);
44 __m128d load4 = _mm_load_pd(pCurrent);
45 sum4 = _mm_add_pd(sum4, load4);
48 sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4));
52 _mm_storel_pd(&temp, sum1);
55 _mm_storeh_pd(&temp, sum1);
double executeSSE2(size_t &i, const double *pCurrent) const