LibreOffice Module sc (master)  1
arraysumSSE2.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #include <arraysumfunctor.hxx>
12 #include <tools/simdsupport.hxx>
13 
14 namespace sc
15 {
16 double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
17 {
18 #if defined(LO_SSE2_AVAILABLE)
19  double fSum = 0.0;
20  size_t nRealSize = mnSize - i;
21  size_t nUnrolledSize = nRealSize - (nRealSize % 8);
22 
23  if (nUnrolledSize > 0)
24  {
25  __m128d sum1 = _mm_setzero_pd();
26  __m128d sum2 = _mm_setzero_pd();
27  __m128d sum3 = _mm_setzero_pd();
28  __m128d sum4 = _mm_setzero_pd();
29 
30  for (; i < nUnrolledSize; i += 8)
31  {
32  __m128d load1 = _mm_load_pd(pCurrent);
33  sum1 = _mm_add_pd(sum1, load1);
34  pCurrent += 2;
35 
36  __m128d load2 = _mm_load_pd(pCurrent);
37  sum2 = _mm_add_pd(sum2, load2);
38  pCurrent += 2;
39 
40  __m128d load3 = _mm_load_pd(pCurrent);
41  sum3 = _mm_add_pd(sum3, load3);
42  pCurrent += 2;
43 
44  __m128d load4 = _mm_load_pd(pCurrent);
45  sum4 = _mm_add_pd(sum4, load4);
46  pCurrent += 2;
47  }
48  sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4));
49 
50  double temp;
51 
52  _mm_storel_pd(&temp, sum1);
53  fSum += temp;
54 
55  _mm_storeh_pd(&temp, sum1);
56  fSum += temp;
57  }
58  return fSum;
59 #else
60  (void)i;
61  (void)pCurrent;
62  return 0.0;
63 #endif
64 }
65 }
double executeSSE2(size_t &i, const double *pCurrent) const
typedef void(CALLTYPE *GetFuncDataPtr)(sal_uInt16 &nNo