LibreOffice Module sc (master)  1
RegressionDialog.cxx
Go to the documentation of this file.
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3  * This file is part of the LibreOffice project.
4  *
5  * This Source Code Form is subject to the terms of the Mozilla Public
6  * License, v. 2.0. If a copy of the MPL was not distributed with this
7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8  *
9  */
10 
11 #include <sal/config.h>
12 
13 #include <string_view>
14 
15 #include <document.hxx>
16 #include <reffact.hxx>
18 #include <RegressionDialog.hxx>
19 #include <scresid.hxx>
20 #include <strings.hrc>
21 
22 /*
23  Some regression basics
24  ----------------------
25 
26  1. Linear regression fits using data, a linear function between the dependent variable and the independent variable(s).
27  The basic form of this function is :-
28 
29  y = b + m_1*x_1 + m_2*x_2 + ... + m_k*x_k
30 
31  where y is the dependent variable
32  x_1, x_2, ..., x_k are the k independent variables
33  b is the intercept
34  m_1, m_2, ..., m_k are the slopes corresponding to the variables x_1, x_2, ..., x_k respectively.
35 
36 
37  This equation for n observations can be compactly written using matrices as :-
38 
39  y = X*A
40 
41  where y is the n dimensional column vector containing dependent variable observations.
42  where X is matrix of shape n*(k+1) where a row looks like [ 1 x_1 x_2 ... x_k ]
43  A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
44 
45  Calc formula LINEST(Y_array ; X_array) can be used to compute all entries in "A" along with many other statistics.
46 
47 
48  2. Logarithmic regression is basically used to find a linear function between the dependent variable and
49  the natural logarithm of the independent variable(s).
50  So the basic form of this functions is :-
51 
52  y = b + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k)
53 
54  This can be again written in a compact matrix form for n observations.
55 
56  y = ln(X)*A
57 
58  where y is the n dimensional column vector containing dependent variable observations.
59  where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
60  A is the k+1 dimensional column vector [ b m_1 m_2 ... m_k ]
61 
62  To estimate A, we use the formula =LINEST(Y_array ; LN(X_array))
63 
64 
65  3. Power regression is used to fit the following model :-
66 
67  y = b * (x_1 ^ m_1) * (x_2 ^ m_2) * ... * (x_k ^ m_k)
68 
69  To reduce this to a linear function(so that we can still use LINEST()), we take natural logarithm on both sides
70 
71  ln(y) = c + m_1*ln(x_1) + m_2*ln(x_2) + ... + m_k*ln(x_k) ; where c = ln(b)
72 
73 
74  This again can be written compactly in matrix form as :-
75 
76  ln(y) = ln(X)*A
77 
78  where y is the n dimensional column vector containing dependent variable observations.
79  where X is matrix of shape n*(k+1) where a row looks like [ e x_1 x_2 ... x_k ]
80  A is the k+1 dimensional column vector [ c m_1 m_2 ... m_k ]
81 
82  To estimate A, we use the formula =LINEST(LN(Y_array) ; LN(X_array))
83 
84  Once we get A, to get back y from x's we use the formula :-
85 
86  y = exp( ln(X)*A )
87 
88 
89 
90  Some references for computing confidence interval for the regression coefficients :-
91 
92  [1] https://en.wikipedia.org/wiki/Student%27s_t-test#Slope_of_a_regression_line
93  [2] https://en.wikipedia.org/wiki/Simple_linear_regression#Normality_assumption
94  [3] https://onlinecourses.science.psu.edu/stat414/node/280
95 
96  */
97 
98 namespace
99 {
100  enum class ScRegType {
101  LINEAR,
102  LOGARITHMIC,
103  POWER
104  };
105 
106  const char* constRegressionModel[] =
107  {
108  STR_LABEL_LINEAR,
109  STR_LABEL_LOGARITHMIC,
110  STR_LABEL_POWER
111  };
112 
113  OUString constTemplateLINEST[] =
114  {
115  "=LINEST(%VARIABLE2_RANGE% ; %VARIABLE1_RANGE% ; %CALC_INTERCEPT% ; TRUE)",
116  "=LINEST(%VARIABLE2_RANGE% ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)",
117  "=LINEST(LN(%VARIABLE2_RANGE%) ; LN(%VARIABLE1_RANGE%) ; %CALC_INTERCEPT% ; TRUE)"
118  };
119 
120  OUString constRegressionFormula[] =
121  {
122  "=MMULT(%XDATAMATRIX_RANGE% ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
123  "=MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%",
124  "=EXP(MMULT(LN(%XDATAMATRIX_RANGE%) ; %SLOPES_RANGE%) + %INTERCEPT_ADDR%)"
125  };
126 
127 } // end anonymous namespace
128 
129 static size_t lcl_GetNumRowsColsInRange(const ScRange& rRange, bool bRows)
130 {
131  if (bRows)
132  return rRange.aEnd.Row() - rRange.aStart.Row() + 1;
133 
134  return rRange.aEnd.Col() - rRange.aStart.Col() + 1;
135 }
136 
138  SfxBindings* pSfxBindings, SfxChildWindow* pChildWindow,
139  weld::Window* pParent, ScViewData& rViewData )
141  pSfxBindings, pChildWindow, pParent, rViewData,
142  "modules/scalc/ui/regressiondialog.ui", "RegressionDialog")
143  , mbUnivariate(true)
144  , mnNumIndependentVars(1)
145  , mnNumObservations(0)
146  , mbUse3DAddresses(false)
147  , mbCalcIntercept(true)
148  , mxWithLabelsCheckBox(m_xBuilder->weld_check_button("withlabels-check"))
149  , mxLinearRadioButton(m_xBuilder->weld_radio_button("linear-radio"))
150  , mxLogarithmicRadioButton(m_xBuilder->weld_radio_button("logarithmic-radio"))
151  , mxPowerRadioButton(m_xBuilder->weld_radio_button("power-radio"))
152  , mxErrorMessage(m_xBuilder->weld_label("error-message"))
153  , mxConfidenceLevelField(m_xBuilder->weld_spin_button("confidencelevel-spin"))
154  , mxCalcResidualsCheckBox(m_xBuilder->weld_check_button("calcresiduals-check"))
155  , mxNoInterceptCheckBox(m_xBuilder->weld_check_button("nointercept-check"))
156 {
157  mxWithLabelsCheckBox->connect_toggled(LINK(this, ScRegressionDialog, CheckBoxHdl));
158  mxConfidenceLevelField->connect_value_changed(LINK(this, ScRegressionDialog, NumericFieldHdl));
159 }
160 
162 {
163 }
164 
166 {
168 }
169 
171 {
172  return STR_REGRESSION_UNDO_NAME;
173 }
174 
176 {
177  AddressWalkerWriter aOutput(mOutputAddress, pDocShell, mDocument,
179  FormulaTemplate aTemplate(&mDocument);
181  mbCalcIntercept = !mxNoInterceptCheckBox->get_active();
182 
183  // max col of our output should account for
184  // 1. constant term column,
185  // 2. mnNumIndependentVars columns
186  // 3. Actual Y column
187  // 4. Predicted Y column
188  // 5. Residual Column
189  SCCOL nOutputMaxCol = mOutputAddress.Col() + mnNumIndependentVars + 3;
190 
191  ScRange aXDataRange(GetDataRange(mVariable1Range));
192  ScRange aYDataRange(GetDataRange(mVariable2Range));
193 
194  aTemplate.autoReplaceRange("%VARIABLE1_RANGE%", aXDataRange);
195  aTemplate.autoReplaceRange("%VARIABLE2_RANGE%", aYDataRange);
196  size_t nRegressionIndex = GetRegressionTypeIndex();
197  ScRegType eRegType = static_cast<ScRegType>(nRegressionIndex);
198  bool bTakeLogX = eRegType == ScRegType::LOGARITHMIC || eRegType == ScRegType::POWER;
199 
200  WriteRawRegressionResults(aOutput, aTemplate, nRegressionIndex);
201  WriteRegressionStatistics(aOutput, aTemplate);
202  WriteRegressionANOVAResults(aOutput, aTemplate);
203  WriteRegressionEstimatesWithCI(aOutput, aTemplate, bTakeLogX);
204  if (mxCalcResidualsCheckBox->get_active())
205  WritePredictionsWithResiduals(aOutput, aTemplate, nRegressionIndex);
206 
207  ScAddress aMaxAddress(aOutput.mMaximumAddress);
208  aMaxAddress.SetCol(std::max(aMaxAddress.Col(), nOutputMaxCol));
209  return ScRange(aOutput.mMinimumAddress, aMaxAddress);
210 }
211 
213 {
214  if (!mVariable1Range.IsValid())
215  {
216  mxErrorMessage->set_label(ScResId(STR_MESSAGE_XINVALID_RANGE));
217  return false;
218  }
219 
220  if (!mVariable2Range.IsValid())
221  {
222  mxErrorMessage->set_label(ScResId(STR_MESSAGE_YINVALID_RANGE));
223  return false;
224  }
225 
226  if (!mOutputAddress.IsValid())
227  {
228  mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_OUTPUT_ADDR));
229  return false;
230  }
231 
232  {
233  double fConfidenceLevel = mxConfidenceLevelField->get_value();
234  if ( fConfidenceLevel <= 0.0 || fConfidenceLevel >= 100.0 )
235  {
236  mxErrorMessage->set_label(ScResId(STR_MESSAGE_INVALID_CONFIDENCE_LEVEL));
237  return false;
238  }
239  }
240 
243 
244  bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
245 
246  bool bYHasSingleDim = (
247  (bGroupedByColumn &&
249  (!bGroupedByColumn &&
251 
252  if (!bYHasSingleDim)
253  {
254  if (bGroupedByColumn)
255  mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_COLUMN));
256  else
257  mxErrorMessage->set_label(ScResId(STR_MESSAGE_YVARIABLE_MULTI_ROW));
258  return false;
259  }
260 
261  bool bWithLabels = mxWithLabelsCheckBox->get_active();
262 
263  size_t nYObs = lcl_GetNumRowsColsInRange(mVariable2Range, bGroupedByColumn);
264  size_t nNumXVars = lcl_GetNumRowsColsInRange(mVariable1Range, !bGroupedByColumn);
265  mbUnivariate = nNumXVars == 1;
266  // Observation count mismatch check
267  if (lcl_GetNumRowsColsInRange(mVariable1Range, bGroupedByColumn) != nYObs)
268  {
269  if (mbUnivariate)
270  mxErrorMessage->set_label(ScResId(STR_MESSAGE_UNIVARIATE_NUMOBS_MISMATCH));
271  else
272  mxErrorMessage->set_label(ScResId(STR_MESSAGE_MULTIVARIATE_NUMOBS_MISMATCH));
273  return false;
274  }
275 
276  mnNumIndependentVars = nNumXVars;
277  mnNumObservations = bWithLabels ? nYObs - 1 : nYObs;
278 
281 
282  mxErrorMessage->set_label("");
283 
284  return true;
285 }
286 
288 {
289  if (mxLinearRadioButton->get_active())
290  return 0;
291  if (mxLogarithmicRadioButton->get_active())
292  return 1;
293  return 2;
294 }
295 
297 {
298  if (!mxWithLabelsCheckBox->get_active())
299  return rRange;
300 
301  ScRange aDataRange(rRange);
302  if (mGroupedBy == BY_COLUMN)
303  aDataRange.aStart.IncRow(1);
304  else
305  aDataRange.aStart.IncCol(1);
306 
307  return aDataRange;
308 }
309 
310 OUString ScRegressionDialog::GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog)
311 {
312  if (bXVar && nIndex == 0)
313  return "=\"" + ScResId(STR_LABEL_INTERCEPT) + "\"";
314 
315  if (mxWithLabelsCheckBox->get_active())
316  {
318  if (mGroupedBy == BY_COLUMN)
319  aAddr.IncCol(nIndex - 1);
320  else
321  aAddr.IncRow(nIndex - 1);
322 
324  return bWithLog ? OUString("=CONCAT(\"LN(\";" +
325  aAddr.Format(eAddrFlag, &mDocument, mDocument.GetAddressConvention()) + ";\")\")") :
326  OUString("=" + aAddr.Format(eAddrFlag, &mDocument, mDocument.GetAddressConvention()));
327  }
328 
329  OUString aDefaultVarName;
330 
331  if (bXVar)
332  aDefaultVarName = "X" + OUString::number(nIndex);
333  else
334  aDefaultVarName = "Y";
335 
336  return bWithLog ? OUString("=\"LN(" + aDefaultVarName + ")\"") :
337  OUString("=\"" + aDefaultVarName + "\"");
338 }
339 
340 OUString ScRegressionDialog::GetXVariableNameFormula(size_t nIndex, bool bWithLog)
341 {
342  assert(nIndex <= mnNumIndependentVars);
343  return GetVariableNameFormula(true, nIndex, bWithLog);
344 }
345 
347 {
348  return GetVariableNameFormula(false, 1, bWithLog);
349 }
350 
352  size_t nRegressionIndex)
353 {
354  rOutput.writeBoldString(ScResId(STR_REGRESSION));
355  rOutput.newLine();
356  // REGRESSION MODEL
357  rOutput.writeString(ScResId(STR_LABEL_REGRESSION_MODEL));
358  rOutput.nextColumn();
359  rOutput.writeString(ScResId(constRegressionModel[nRegressionIndex]));
360  rOutput.newLine();
361  rOutput.newLine();
362 
363  rOutput.writeString(ScResId(STR_LINEST_RAW_OUTPUT_TITLE));
364  rOutput.newLine();
365  rOutput.push();
366 
367  rTemplate.setTemplate(constTemplateLINEST[nRegressionIndex].
368  replaceFirst("%CALC_INTERCEPT%",
369  mbCalcIntercept ? std::u16string_view(u"TRUE") : std::u16string_view(u"FALSE")));
370  rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1 + mnNumIndependentVars, 5);
371  // Add LINEST result components to template
372  // 1. Add ranges for coefficients and standard errors for indep. vars and the intercept.
373  // Note that these two are in the reverse order(m_n, m_n-1, ..., m_1, b) w.r.t what we expect.
374  rTemplate.autoReplaceRange("%COEFFICIENTS_REV_RANGE%", ScRange(rOutput.current(), rOutput.current(mnNumIndependentVars)));
375  rTemplate.autoReplaceRange("%SERRORSX_REV_RANGE%", ScRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars, 1)));
376 
377  // 2. Add R-squared and standard error for y estimate.
378  rTemplate.autoReplaceAddress("%RSQUARED_ADDR%", rOutput.current(0, 2));
379  rTemplate.autoReplaceAddress("%SERRORY_ADDR%", rOutput.current(1, 2));
380 
381  // 3. Add F statistic and degrees of freedom
382  rTemplate.autoReplaceAddress("%FSTATISTIC_ADDR%", rOutput.current(0, 3));
383  rTemplate.autoReplaceAddress("%DoFRESID_ADDR%", rOutput.current(1, 3));
384 
385  // 4. Add regression sum of squares and residual sum of squares
386  rTemplate.autoReplaceAddress("%SSREG_ADDR%", rOutput.current(0, 4));
387  rTemplate.autoReplaceAddress("%SSRESID_ADDR%", rOutput.current(1, 4));
388 
389  rOutput.push(0, 4);
390  rOutput.newLine();
391 }
392 
394 {
395  rOutput.newLine();
396  rOutput.writeString(ScResId(STR_LABEL_REGRESSION_STATISTICS));
397  rOutput.newLine();
398 
399  const char* aMeasureNames[] =
400  {
401  STR_LABEL_RSQUARED,
402  STRID_CALC_STD_ERROR,
403  STR_LABEL_XVARIABLES_COUNT,
404  STR_OBSERVATIONS_LABEL,
405  STR_LABEL_ADJUSTED_RSQUARED
406  };
407 
408  OUString aMeasureFormulas[] =
409  {
410  "=%RSQUARED_ADDR%",
411  "=%SERRORY_ADDR%",
412  "=" + OUString::number(mnNumIndependentVars),
413  "=" + OUString::number(mnNumObservations),
414  OUString::Concat(
415  "=1 - (1 - %RSQUARED_ADDR%)*(%NUMOBS_ADDR% - 1)/(%NUMOBS_ADDR% - %NUMXVARS_ADDR%") +
416  (mbCalcIntercept ? std::u16string_view(u" - 1)") : std::u16string_view(u")"))
417  };
418 
419  rTemplate.autoReplaceAddress("%NUMXVARS_ADDR%", rOutput.current(1, 2));
420  rTemplate.autoReplaceAddress("%NUMOBS_ADDR%", rOutput.current(1, 3));
421 
422  for (size_t nIdx = 0; nIdx < SAL_N_ELEMENTS(aMeasureNames); ++nIdx)
423  {
424  rOutput.writeString(ScResId(aMeasureNames[nIdx]));
425  rOutput.nextColumn();
426  rTemplate.setTemplate(aMeasureFormulas[nIdx]);
427  rOutput.writeFormula(rTemplate.getTemplate());
428  rOutput.newLine();
429  }
430 }
431 
433 {
434  rOutput.newLine();
435  rOutput.writeString(ScResId(STR_LABEL_ANOVA));
436  rOutput.newLine();
437 
438  const size_t nColsInTable = 6;
439  const size_t nRowsInTable = 4;
440  OUString aTable[nRowsInTable][nColsInTable] =
441  {
442  {
443  "",
444  ScResId(STR_ANOVA_LABEL_DF),
445  ScResId(STR_ANOVA_LABEL_SS),
446  ScResId(STR_ANOVA_LABEL_MS),
447  ScResId(STR_ANOVA_LABEL_F),
448  ScResId(STR_ANOVA_LABEL_SIGNIFICANCE_F)
449  },
450  {
451  ScResId(STR_REGRESSION),
452  "=%NUMXVARS_ADDR%",
453  "=%SSREG_ADDR%",
454  "=%SSREG_ADDR% / %DoFREG_ADDR%",
455  "=%FSTATISTIC_ADDR%",
456  "=FDIST(%FSTATISTIC_ADDR% ; %DoFREG_ADDR% ; %DoFRESID_ADDR%)"
457  },
458  {
459  ScResId(STR_LABEL_RESIDUAL),
460  "=%DoFRESID_ADDR%",
461  "=%SSRESID_ADDR%",
462  "=%SSRESID_ADDR% / %DoFRESID_ADDR%",
463  "",
464  ""
465  },
466  {
467  ScResId(STR_ANOVA_LABEL_TOTAL),
468  "=%DoFREG_ADDR% + %DoFRESID_ADDR%",
469  "=%SSREG_ADDR% + %SSRESID_ADDR%",
470  "",
471  "",
472  ""
473  }
474  };
475 
476  rTemplate.autoReplaceAddress("%DoFREG_ADDR%", rOutput.current(1, 1));
477 
478  // Cell getter lambda
479  std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
480  {
481  return aTable[nRowIdx][nColIdx];
482  };
483 
484  // Cell writer lambda
485  std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate]
486  (const OUString& rContent, size_t /*nRowIdx*/, size_t /*nColIdx*/)
487  {
488  if (!rContent.isEmpty())
489  {
490  if (rContent.startsWith("="))
491  {
492  rTemplate.setTemplate(rContent);
493  rOutput.writeFormula(rTemplate.getTemplate());
494  }
495  else
496  rOutput.writeString(rContent);
497  }
498  };
499 
500  WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
501 
502  // User given confidence level
503  rOutput.newLine();
504  rOutput.writeString(ScResId(STR_LABEL_CONFIDENCE_LEVEL));
505  rOutput.nextColumn();
506  rOutput.writeValue(mxConfidenceLevelField->get_value() / 100.0);
507  rTemplate.autoReplaceAddress("%CONFIDENCE_LEVEL_ADDR%", rOutput.current());
508  rOutput.newLine();
509 }
510 
511 // Write slopes, intercept, their standard errors, t-statistics, p-value, confidence intervals
513  bool bTakeLogX)
514 {
515  rOutput.newLine();
516  SCROW nLastRow = rOutput.current(0, 1 + mnNumIndependentVars).Row();
517 
518  // Coefficients & Std.Errors ranges (column vectors) in this table (yet to populate).
519  rTemplate.autoReplaceRange("%COEFFICIENTS_RANGE%",
520  ScRange(rOutput.current(1, 1),
521  rOutput.current(1, 1 + mnNumIndependentVars)));
522  rTemplate.autoReplaceRange("%SLOPES_RANGE%", // Excludes the intercept
523  ScRange(rOutput.current(1, 2),
524  rOutput.current(1, 1 + mnNumIndependentVars)));
525  rTemplate.autoReplaceAddress("%INTERCEPT_ADDR%", rOutput.current(1, 1));
526  rTemplate.autoReplaceRange("%SERRORSX_RANGE%",
527  ScRange(rOutput.current(2, 1),
528  rOutput.current(2, 1 + mnNumIndependentVars)));
529  // t-Statistics range in this table (yet to populate)
530  rTemplate.autoReplaceRange("%TSTAT_RANGE%",
531  ScRange(rOutput.current(3, 1),
532  rOutput.current(3, 1 + mnNumIndependentVars)));
533 
534  const size_t nColsInTable = 7;
535  const size_t nRowsInTable = 2;
536  OUString aTable[nRowsInTable][nColsInTable] =
537  {
538  {
539  "",
540  ScResId(STR_LABEL_COEFFICIENTS),
541  ScResId(STRID_CALC_STD_ERROR),
542  ScResId(STR_LABEL_TSTATISTIC),
543  ScResId(STR_P_VALUE_LABEL),
544 
545  "=CONCAT(\"" + ScResId(STR_LABEL_LOWER) +
546  " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
547 
548  "=CONCAT(\"" + ScResId(STR_LABEL_UPPER) +
549  " \" ; INT(%CONFIDENCE_LEVEL_ADDR% * 100) ; \"%\")",
550  },
551 
552  // Following are matrix formulas of size numcols = 1, numrows = (mnNumIndependentVars + 1)
553  {
554  "",
555  // This puts the coefficients in the reverse order compared to that in LINEST output.
556  "=INDEX(%COEFFICIENTS_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
557  // This puts the standard errors in the reverse order compared to that in LINEST output.
558  "=INDEX(%SERRORSX_REV_RANGE%; 1 ; " + OUString::number(nLastRow + 2) + " - ROW())",
559  // t-Statistic
560  "=%COEFFICIENTS_RANGE% / %SERRORSX_RANGE%",
561  // p-Value
562  "=TDIST(ABS(%TSTAT_RANGE%) ; %DoFRESID_ADDR% ; 2 )",
563  // Lower limit of confidence interval
564  "=%COEFFICIENTS_RANGE% - %SERRORSX_RANGE% * "
565  "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)",
566  // Upper limit of confidence interval
567  "=%COEFFICIENTS_RANGE% + %SERRORSX_RANGE% * "
568  "TINV(1 - %CONFIDENCE_LEVEL_ADDR% ; %DoFRESID_ADDR%)"
569  }
570  };
571 
572  // Cell getter lambda
573  std::function<CellValueGetter> aCellGetterFunc = [&aTable](size_t nRowIdx, size_t nColIdx) -> const OUString&
574  {
575  return aTable[nRowIdx][nColIdx];
576  };
577 
578  // Cell writer lambda
579  size_t nNumIndependentVars = mnNumIndependentVars;
580  std::function<CellWriter> aCellWriterFunc = [&rOutput, &rTemplate, nNumIndependentVars]
581  (const OUString& rContent, size_t nRowIdx, size_t /*nColIdx*/)
582  {
583  if (!rContent.isEmpty())
584  {
585  if (rContent.startsWith("="))
586  {
587  rTemplate.setTemplate(rContent);
588  if (nRowIdx == 0)
589  rOutput.writeFormula(rTemplate.getTemplate());
590  else
591  rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, 1 + nNumIndependentVars);
592  }
593  else
594  rOutput.writeString(rContent);
595  }
596  };
597 
598  WriteTable(aCellGetterFunc, nRowsInTable, nColsInTable, rOutput, aCellWriterFunc);
599 
600  // Go back to the second row and first column of the table to
601  // fill the names of variables + intercept
602  rOutput.push(0, -1);
603 
604  for (size_t nXvarIdx = 0; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
605  {
606  rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, bTakeLogX));
607  rOutput.newLine();
608  }
609 
610 }
611 
612 // Re-write all observations in group-by column mode with predictions and residuals
614  size_t nRegressionIndex)
615 {
616  bool bGroupedByColumn = mGroupedBy == BY_COLUMN;
617  rOutput.newLine();
618  rOutput.push();
619 
620  // Range of X variables with rows as observations and columns as variables.
621  ScRange aDataMatrixRange(rOutput.current(0, 1), rOutput.current(mnNumIndependentVars - 1, mnNumObservations));
622  rTemplate.autoReplaceRange("%XDATAMATRIX_RANGE%", aDataMatrixRange);
623 
624  // Write X variable names
625  for (size_t nXvarIdx = 1; nXvarIdx <= mnNumIndependentVars; ++nXvarIdx)
626  {
627  // Here we write the X variables without any transformation(LN)
628  rOutput.writeFormula(GetXVariableNameFormula(nXvarIdx, false));
629  rOutput.nextColumn();
630  }
631  rOutput.reset();
632 
633  // Write the X data matrix
634  rOutput.nextRow();
635  OUString aDataMatrixFormula = bGroupedByColumn ? OUString("=%VARIABLE1_RANGE%") : OUString("=TRANSPOSE(%VARIABLE1_RANGE%)");
636  rTemplate.setTemplate(aDataMatrixFormula);
638 
639  // Write predicted values
640  rOutput.push(mnNumIndependentVars, -1);
641  rOutput.writeString(ScResId(STR_LABEL_PREDICTEDY));
642  rOutput.nextRow();
643  rTemplate.setTemplate(constRegressionFormula[nRegressionIndex]);
644  rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
645  rTemplate.autoReplaceRange("%PREDICTEDY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
646 
647  // Write actual Y
648  rOutput.push(1, -1);
649  rOutput.writeFormula(GetYVariableNameFormula(false));
650  rOutput.nextRow();
651  OUString aYVectorFormula = bGroupedByColumn ? OUString("=%VARIABLE2_RANGE%") : OUString("=TRANSPOSE(%VARIABLE2_RANGE%)");
652  rTemplate.setTemplate(aYVectorFormula);
653  rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
654  rTemplate.autoReplaceRange("%ACTUALY_RANGE%", ScRange(rOutput.current(), rOutput.current(0, mnNumObservations - 1)));
655 
656  // Write residual
657  rOutput.push(1, -1);
658  rOutput.writeString(ScResId(STR_LABEL_RESIDUAL));
659  rOutput.nextRow();
660  rTemplate.setTemplate("=%ACTUALY_RANGE% - %PREDICTEDY_RANGE%");
661  rOutput.writeMatrixFormula(rTemplate.getTemplate(), 1, mnNumObservations);
662 }
663 
664 // Generic table writer
665 void ScRegressionDialog::WriteTable(const std::function<CellValueGetter>& rCellGetter,
666  size_t nRowsInTable, size_t nColsInTable,
667  AddressWalkerWriter& rOutput,
668  const std::function<CellWriter>& rFunc)
669 {
670  for (size_t nRowIdx = 0; nRowIdx < nRowsInTable; ++nRowIdx)
671  {
672  for (size_t nColIdx = 0; nColIdx < nColsInTable; ++nColIdx)
673  {
674  rFunc(rCellGetter(nRowIdx, nColIdx), nRowIdx, nColIdx);
675  rOutput.nextColumn();
676  }
677  rOutput.newLine();
678  }
679 }
680 
682 {
684 }
685 
687 {
689 }
690 
691 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
SC_DLLPUBLIC void Format(OStringBuffer &r, ScRefFlags nFlags, const ScDocument *pDocument=nullptr, const Details &rDetails=detailsOOOa1) const
Definition: address.cxx:2116
ScAddress aStart
Definition: address.hxx:499
virtual void Close() override
void writeFormula(const OUString &aFormula)
SCROW Row() const
Definition: address.hxx:261
void writeString(const OUString &aString)
std::unique_ptr< weld::CheckButton > mxNoInterceptCheckBox
std::unique_ptr< weld::SpinButton > mxConfidenceLevelField
ScAddress aEnd
Definition: address.hxx:500
OUString GetYVariableNameFormula(bool bWithLog)
void WritePredictionsWithResiduals(AddressWalkerWriter &rOutput, FormulaTemplate &rTemplate, size_t nRegressionIndex)
void push(SCCOL aRelativeCol=0, SCROW aRelativeRow=0, SCTAB aRelativeTab=0)
SC_DLLPUBLIC formula::FormulaGrammar::AddressConvention GetAddressConvention() const
Definition: documen3.cxx:494
size_t GetRegressionTypeIndex() const
static size_t lcl_GetNumRowsColsInRange(const ScRange &rRange, bool bRows)
void WriteRawRegressionResults(AddressWalkerWriter &rOutput, FormulaTemplate &rTemplate, size_t nRegressionIndex)
formula::FormulaGrammar::AddressConvention eConv
Definition: address.hxx:212
void writeBoldString(const OUString &aString)
std::unique_ptr< weld::Label > mxErrorMessage
void writeMatrixFormula(const OUString &aFormula, SCCOL nCols=1, SCROW nRows=1)
static Grammar mergeToGrammar(const Grammar eGrammar, const AddressConvention eConv)
SCTAB Tab() const
Definition: address.hxx:270
std::unique_ptr< weld::CheckButton > mxWithLabelsCheckBox
void SetCol(SCCOL nColP)
Definition: address.hxx:278
#define SAL_N_ELEMENTS(arr)
bool IsValid() const
Definition: address.hxx:292
ScAddress current(SCCOL aRelativeCol=0, SCROW aRelativeRow=0, SCTAB aRelativeTab=0)
void IncCol(SCCOL nDelta=1)
Definition: address.hxx:303
sal_Int16 SCCOL
Definition: types.hxx:21
LINEAR
void IncRow(SCROW nDelta=1)
Definition: address.hxx:299
float u
void autoReplaceRange(const OUString &aVariable, const ScRange &rRange)
bool IsValid() const
Definition: address.hxx:546
SCCOL Col() const
Definition: address.hxx:266
virtual bool InputRangesValid() override
std::unique_ptr< weld::CheckButton > mxCalcResidualsCheckBox
void WriteRegressionStatistics(AddressWalkerWriter &rOutput, FormulaTemplate &rTemplate)
sal_Int32 SCROW
Definition: types.hxx:17
void PutInOrder()
Definition: address.cxx:1582
OUString GetVariableNameFormula(bool bXVar, size_t nIndex, bool bWithLog)
ScRegressionDialog(SfxBindings *pB, SfxChildWindow *pCW, weld::Window *pParent, ScViewData &rViewData)
OUString ScResId(std::string_view aId)
Definition: scdll.cxx:89
void autoReplaceAddress(const OUString &aVariable, ScAddress const &aAddress)
virtual ScRange ApplyOutput(ScDocShell *pDocShell) override
void WriteRegressionANOVAResults(AddressWalkerWriter &rOutput, FormulaTemplate &rTemplate)
virtual ~ScRegressionDialog() override
virtual const char * GetUndoNameId() override
IMPL_LINK_NOARG(ScRegressionDialog, CheckBoxHdl, weld::Toggleable &, void)
void WriteRegressionEstimatesWithCI(AddressWalkerWriter &rOutput, FormulaTemplate &rTemplate, bool bTakeLogX)
ScRefFlags
Definition: address.hxx:144
void setTemplate(const OUString &aTemplate)
static void WriteTable(const std::function< CellValueGetter > &rCellGetter, size_t nRowsInTable, size_t nColsInTable, AddressWalkerWriter &rOutput, const std::function< CellWriter > &rFunc)
OUString GetXVariableNameFormula(size_t nIndex, bool bWithLog)
ScRange GetDataRange(const ScRange &rRange)
bool DoClose(sal_uInt16 nId)
Definition: anyrefdg.cxx:695
std::unique_ptr< weld::RadioButton > mxLinearRadioButton
std::unique_ptr< weld::RadioButton > mxLogarithmicRadioButton