Commit 01142b35 authored by Andreas J. Guelzow's avatar Andreas J. Guelzow Committed by Andreas J. Guelzow

change all occurrences of extra_stat.df to extra_stat.df_resid

2002-02-20  Andreas J. Guelzow <aguelzow@taliesin.ca>

	* fn_stat.c : change all occurrences of extra_stat.df to
	  extra_stat.df_resid

2002-02-20  Andreas J. Guelzow <aguelzow@taliesin.ca>

	* src/analysis-tools.c (regression_tool) : use only values
	  from extra_stat (renamed to regression_stat)
	* src/regression.c (general_linear_regression) : calculate more
	  regression_stat
	(regression_stat_new) : new
	(regression_stat_destroy) : new
	* src/regression.h : add fields to regression_stat_t and add
	  declarations of regression_stat_new and regression_stat_destroy
parent bafcd158
2002-02-20 Andreas J. Guelzow <aguelzow@taliesin.ca>
* src/analysis-tools.c (regression_tool) : use only values
from extra_stat (renamed to regression_stat)
* src/regression.c (general_linear_regression) : calculate more
regression_stat
(regression_stat_new) : new
(regression_stat_destroy) : new
* src/regression.h : add fields to regression_stat_t and add
declarations of regression_stat_new and regression_stat_destroy
2002-02-20 Morten Welinder <terra@diku.dk>
* src/format.c (format_value): Missing g_strdup added for sanity
......
......@@ -19,6 +19,7 @@ Andreas:
widget and improve filter mechanism
* Port the plugin manager dialog to gnome2
* Improve numerical precision in single factor ANOVA
* Correct regression analysis tool (intercept == 0 case)
Jody:
* Initial port to gnome2.
......
2002-02-20 Andreas J. Guelzow <aguelzow@taliesin.ca>
* src/analysis-tools.c (regression_tool) : use only values
from extra_stat (renamed to regression_stat)
* src/regression.c (general_linear_regression) : calculate more
regression_stat
(regression_stat_new) : new
(regression_stat_destroy) : new
* src/regression.h : add fields to regression_stat_t and add
declarations of regression_stat_new and regression_stat_destroy
2002-02-20 Morten Welinder <terra@diku.dk>
* src/format.c (format_value): Missing g_strdup added for sanity
......
2002-02-20 Andreas J. Guelzow <aguelzow@taliesin.ca>
* src/analysis-tools.c (regression_tool) : use only values
from extra_stat (renamed to regression_stat)
* src/regression.c (general_linear_regression) : calculate more
regression_stat
(regression_stat_new) : new
(regression_stat_destroy) : new
* src/regression.h : add fields to regression_stat_t and add
declarations of regression_stat_new and regression_stat_destroy
2002-02-20 Morten Welinder <terra@diku.dk>
* src/format.c (format_value): Missing g_strdup added for sanity
......
......@@ -3786,7 +3786,7 @@ gnumeric_linest (FunctionEvalInfo *ei, Value *argv[])
value_array_set (result, 0, 3,
value_new_float (extra_stat.F));
value_array_set (result, 1, 3,
value_new_float (extra_stat.df));
value_new_float (extra_stat.df_resid));
value_array_set (result, 0, 4,
value_new_float (extra_stat.ss_reg));
value_array_set (result, 1, 4,
......@@ -4152,7 +4152,7 @@ gnumeric_logest (FunctionEvalInfo *ei, Value *argv[])
value_array_set (result, 0, 3,
value_new_float (extra_stat.F));
value_array_set (result, 1, 3,
value_new_float (extra_stat.df));
value_new_float (extra_stat.df_resid));
value_array_set (result, 0, 4,
value_new_float (extra_stat.ss_reg));
value_array_set (result, 1, 4,
......
......@@ -2262,17 +2262,14 @@ regression_tool (WorkbookControl *wbc, Sheet *sheet,
data_set_t *y_data = NULL;
GArray *cleaned = NULL;
char *text = NULL;
regression_stat_t extra_stat;
gnum_float mean_y;
gnum_float ss_yy;
regression_stat_t *regression_stat = NULL;
gnum_float r;
gnum_float *res, **xss;
guint i;
guint xdim = 0;
int err = 0;
int cor_err = 0;
int av_err = 0;
int sumsq_err = 0;
/* read the data and check for consistency */
x_input_range = x_input;
......@@ -2332,10 +2329,12 @@ regression_tool (WorkbookControl *wbc, Sheet *sheet,
(x_data, i))->data->data);
}
regression_stat = regression_stat_new ();
err = linear_regression (xss, xdim, (gnum_float *)(y_data->data->data),
y_data->data->len, intercept, res, &extra_stat);
y_data->data->len, intercept, res, regression_stat);
if (err) {
regression_stat_destroy (regression_stat);
destroy_data_set (y_data);
destroy_data_set_list (x_data);
range_list_destroy (x_input_range);
......@@ -2386,62 +2385,56 @@ regression_tool (WorkbookControl *wbc, Sheet *sheet,
set_italic (dao, 1, 15, 6, 15);
g_free (text);
av_err = range_average ((gnum_float *)(y_data->data->data), y_data->data->len, &mean_y);
sumsq_err = range_sumsq ((gnum_float *)(y_data->data->data), y_data->data->len, &ss_yy);
ss_yy -= y_data->data->len * mean_y * mean_y;
if (xdim == 1)
cor_err = range_correl_pop (xss[0], (gnum_float *)(y_data->data->data),
y_data->data->len, &r);
else r = sqrt (extra_stat.sqr_r);
else r = sqrt (regression_stat->sqr_r);
/* Multiple R */
set_cell_float_na (dao, 1, 3, r, cor_err == 0);
/* R Square */
set_cell_float (dao, 1, 4, extra_stat.sqr_r);
set_cell_float (dao, 1, 4, regression_stat->sqr_r);
/* Adjusted R Square */
set_cell_float (dao, 1, 5, extra_stat.adj_sqr_r);
set_cell_float (dao, 1, 5, regression_stat->adj_sqr_r);
/* Standard Error */
set_cell_float (dao, 1, 6, sqrt (extra_stat.var));
set_cell_float (dao, 1, 6, sqrt (regression_stat->var));
/* Observations */
set_cell_float (dao, 1, 7, y_data->data->len);
/* Regression / df */
set_cell_float (dao, 1, 11, xdim);
set_cell_float (dao, 1, 11, regression_stat->df_reg);
/* Residual / df */
set_cell_float (dao, 1, 12, y_data->data->len - intercept - xdim);
set_cell_float (dao, 1, 12, regression_stat->df_resid);
/* Total / df */
set_cell_float (dao, 1, 13, y_data->data->len - intercept);
set_cell_float (dao, 1, 13, regression_stat->df_total);
/* Residual / SS */
set_cell_float (dao, 2, 12, extra_stat.ss_resid);
set_cell_float (dao, 2, 12, regression_stat->ss_resid);
/* Total / SS */
set_cell_float_na (dao, 2, 13, ss_yy, (sumsq_err == 0) && (av_err == 0));
set_cell_float (dao, 2, 13, regression_stat->ss_total);
/* Regression / SS */
set_cell_float_na (dao, 2, 11, ss_yy - extra_stat.ss_resid,
(sumsq_err == 0) && (av_err == 0));
set_cell_float (dao, 2, 11, regression_stat->ss_reg);
/* Regression / MS */
set_cell_float_na (dao, 3, 11, (ss_yy - extra_stat.ss_resid) / xdim,
(sumsq_err == 0) && (av_err == 0));
set_cell_float (dao, 3, 11, regression_stat->ms_reg);
/* Residual / MS */
set_cell_float (dao, 3, 12, extra_stat.ss_resid / (y_data->data->len - 1 - xdim));
set_cell_float (dao, 3, 12, regression_stat->ms_resid);
/* F */
set_cell_float (dao, 4, 11, extra_stat.F);
set_cell_float (dao, 4, 11, regression_stat->F);
/* Significance of F */
set_cell_float (dao, 5, 11, 1 - pf (extra_stat.F, xdim - intercept,
y_data->data->len - xdim));
set_cell_float (dao, 5, 11, 1 - pf (regression_stat->F, regression_stat->df_reg,
regression_stat->df_resid));
/* Intercept / Coefficient */
set_cell_float (dao, 1, 16, res[0]);
......@@ -2455,20 +2448,20 @@ regression_tool (WorkbookControl *wbc, Sheet *sheet,
t = qt (1 - alpha/2, y_data->data->len - xdim - 1);
/* Intercept / Standard Error */
set_cell_float (dao, 2, 16, extra_stat.se[0]);
set_cell_float (dao, 2, 16, regression_stat->se[0]);
/* Intercept / t Stat */
set_cell_float (dao, 3, 16, extra_stat.t[0]);
set_cell_float (dao, 3, 16, regression_stat->t[0]);
/* Intercept / p values */
set_cell_float (dao, 4, 16, 2.0 * (1.0 - pt (extra_stat.t[0],
set_cell_float (dao, 4, 16, 2.0 * (1.0 - pt (regression_stat->t[0],
y_data->data->len - xdim - 1)));
/* Intercept / Lower 95% */
set_cell_float (dao, 5, 16, res[0] - t * extra_stat.se[0]);
set_cell_float (dao, 5, 16, res[0] - t * regression_stat->se[0]);
/* Intercept / Upper 95% */
set_cell_float (dao, 6, 16, res[0] + t * extra_stat.se[0]);
set_cell_float (dao, 6, 16, res[0] + t * regression_stat->se[0]);
}
/* Slopes */
......@@ -2481,27 +2474,28 @@ regression_tool (WorkbookControl *wbc, Sheet *sheet,
/* Slopes / Standard Error */
/*With no intercept se[0] is for the first slope variable; with
intercept, se[1] is the first slope se */
set_cell_float (dao, 2, 17 + i, extra_stat.se[intercept + i]);
set_cell_float (dao, 2, 17 + i, regression_stat->se[intercept + i]);
/* Slopes / t Stat */
set_cell_float (dao, 3, 17 + i, extra_stat.t[intercept + i]);
set_cell_float (dao, 3, 17 + i, regression_stat->t[intercept + i]);
/* Slopes / p values */
set_cell_float (dao, 4, 17 + i,
2.0 * (1.0 - pt (extra_stat.t[intercept + i],
2.0 * (1.0 - pt (regression_stat->t[intercept + i],
y_data->data->len - xdim - intercept)));
t = qt (1 - alpha/2, y_data->data->len - xdim - intercept);
/* Slope / Lower 95% */
set_cell_float (dao, 5, 17 + i,
res[i + 1] - t * extra_stat.se[intercept + i]);
res[i + 1] - t * regression_stat->se[intercept + i]);
/* Slope / Upper 95% */
set_cell_float (dao, 6, 17 + i,
res[i + 1] + t * extra_stat.se[intercept + i]);
res[i + 1] + t * regression_stat->se[intercept + i]);
}
regression_stat_destroy (regression_stat);
autofit_columns (dao, 0, 6);
destroy_data_set (y_data);
destroy_data_set_list (x_data);
......
2002-02-20 Andreas J. Guelzow <aguelzow@taliesin.ca>
* fn_stat.c : change all occurrences of extra_stat.df to
extra_stat.df_resid
2002-02-19 Morten Welinder <terra@diku.dk>
* fn-math.c (gnumeric_randbetween): use gnum_float, not double.
......
......@@ -3786,7 +3786,7 @@ gnumeric_linest (FunctionEvalInfo *ei, Value *argv[])
value_array_set (result, 0, 3,
value_new_float (extra_stat.F));
value_array_set (result, 1, 3,
value_new_float (extra_stat.df));
value_new_float (extra_stat.df_resid));
value_array_set (result, 0, 4,
value_new_float (extra_stat.ss_reg));
value_array_set (result, 1, 4,
......@@ -4152,7 +4152,7 @@ gnumeric_logest (FunctionEvalInfo *ei, Value *argv[])
value_array_set (result, 0, 3,
value_new_float (extra_stat.F));
value_array_set (result, 1, 3,
value_new_float (extra_stat.df));
value_new_float (extra_stat.df_resid));
value_array_set (result, 0, 4,
value_new_float (extra_stat.ss_reg));
value_array_set (result, 1, 4,
......
......@@ -170,14 +170,14 @@ static int
general_linear_regression (gnum_float **xss, int xdim,
const gnum_float *ys, int n,
gnum_float *res,
regression_stat_t *extra_stat, int affine)
regression_stat_t *regression_stat, int affine)
{
gnum_float *xTy, **xTx;
int i,j;
int err;
if (extra_stat)
memset (extra_stat, 0, sizeof (regression_stat_t));
if (regression_stat)
memset (regression_stat, 0, sizeof (regression_stat_t));
if (xdim > n || n < 1)
return 1; /* Too few points. */
......@@ -225,11 +225,10 @@ general_linear_regression (gnum_float **xss, int xdim,
err = linear_solve (xTx, xTy, xdim, res);
if (extra_stat && err == 0) {
if (regression_stat && err == 0) {
int err2;
gnum_float *residuals = g_new (gnum_float, n);
gnum_float **LU;
gnum_float ss_total;
int *P;
gnum_float *e, *inv;
gnum_float ybar;
......@@ -238,23 +237,23 @@ general_linear_regression (gnum_float **xss, int xdim,
/* This should not fail since n >= 1. */
err = range_average (ys, n, &ybar);
g_assert (err == 0);
extra_stat->ybar = ybar;
regression_stat->ybar = ybar;
/* FIXME: we ought to have a devsq variant that does not
recompute the mean. */
if (xss[0])
err = range_sumsq (ys, n, &ss_total);
if (affine)
err = range_devsq (ys, n, &regression_stat->ss_total);
else
err = range_devsq (ys, n, &ss_total);
err = range_sumsq (ys, n, &regression_stat->ss_total);
g_assert (err == 0);
extra_stat->xbar = g_new (gnum_float, n);
regression_stat->xbar = g_new (gnum_float, n);
for (i = 0; i < xdim; i++) {
if (xss[i]) {
int err = range_average (xss[i], n, &extra_stat->xbar[i]);
int err = range_average (xss[i], n, &regression_stat->xbar[i]);
g_assert (err == 0);
} else {
extra_stat->xbar[i] = 1;
regression_stat->xbar[i] = 1;
}
}
......@@ -269,19 +268,20 @@ general_linear_regression (gnum_float **xss, int xdim,
residuals[i] = ys[i] - residuals[i];
}
err = range_sumsq (residuals, n, &extra_stat->ss_resid);
err = range_sumsq (residuals, n, &regression_stat->ss_resid);
g_assert (err == 0);
/* FIXME: we want to guard against division by zero. */
extra_stat->sqr_r = 1 - (extra_stat->ss_resid / ss_total);
extra_stat->adj_sqr_r = 1 - extra_stat->ss_resid * (n - 1) / ((n - xdim) * ss_total);
extra_stat->var = (extra_stat->ss_resid / (n - xdim));
regression_stat->sqr_r = 1 - (regression_stat->ss_resid / regression_stat->ss_total);
regression_stat->adj_sqr_r = 1 - regression_stat->ss_resid * (n - 1) /
((n - xdim) * regression_stat->ss_total);
regression_stat->var = (regression_stat->ss_resid / (n - xdim));
ALLOC_MATRIX (LU, xdim, xdim);
P = g_new (int, n);
err2 = LUPDecomp (xTx, LU, P, xdim);
extra_stat->se = g_new (gnum_float, xdim);
regression_stat->se = g_new (gnum_float, xdim);
if (err2 == 0) {
e = g_new (gnum_float, xdim); /* Elmentary vector */
inv = g_new (gnum_float, xdim);
......@@ -290,7 +290,7 @@ general_linear_regression (gnum_float **xss, int xdim,
for (i = 0; i < xdim; i++) {
e[i] = 1;
backsolve (LU, P, e, xdim, inv);
extra_stat->se[i] = sqrt (extra_stat->var * inv[i]);
regression_stat->se[i] = sqrt (regression_stat->var * inv[i]);
e[i] = 0;
}
g_free (e);
......@@ -298,22 +298,28 @@ general_linear_regression (gnum_float **xss, int xdim,
} else {
/* FIXME: got any better idea? */
for (i = 0; i < xdim; i++)
extra_stat->se[i] = 1;
regression_stat->se[i] = 1;
}
FREE_MATRIX (LU, xdim, xdim);
g_free (P);
extra_stat->t = g_new (gnum_float, xdim);
regression_stat->t = g_new (gnum_float, xdim);
for (i = 0; i < xdim; i++)
extra_stat->t[i] = res[i] / extra_stat->se[i];
regression_stat->t[i] = res[i] / regression_stat->se[i];
extra_stat->F = (extra_stat->sqr_r / (xdim - affine)) /
((1 - extra_stat->sqr_r) / (n - xdim));
regression_stat->df_resid = n - xdim;
regression_stat->df_reg = xdim - (affine ? 1 : 0);
regression_stat->df_total = regression_stat->df_resid + regression_stat->df_reg;
regression_stat->F = (regression_stat->sqr_r / regression_stat->df_reg) /
((1 - regression_stat->sqr_r) / regression_stat->df_resid);
regression_stat->ss_reg = regression_stat->ss_total - regression_stat->ss_resid;
regression_stat->se_y = sqrt (regression_stat->ss_total / n);
regression_stat->ms_reg = regression_stat->ss_reg / regression_stat->df_reg;
regression_stat->ms_resid = regression_stat->ss_resid / regression_stat->df_resid;
extra_stat->df = n - xdim;
extra_stat->ss_reg = ss_total - extra_stat->ss_resid;
extra_stat->se_y = sqrt (ss_total / n);
g_free (residuals);
}
......@@ -331,7 +337,7 @@ linear_regression (gnum_float **xss, int dim,
const gnum_float *ys, int n,
int affine,
gnum_float *res,
regression_stat_t *extra_stat)
regression_stat_t *regression_stat)
{
int result;
......@@ -342,12 +348,12 @@ linear_regression (gnum_float **xss, int dim,
memcpy (xss2 + 1, xss, dim * sizeof (gnum_float *));
result = general_linear_regression (xss2, dim + 1, ys, n,
res, extra_stat, affine);
res, regression_stat, affine);
g_free (xss2);
} else {
res[0] = 0;
result = general_linear_regression (xss, dim, ys, n,
res + 1, extra_stat, affine);
res + 1, regression_stat, affine);
}
return result;
}
......@@ -360,7 +366,7 @@ exponential_regression (gnum_float **xss, int dim,
const gnum_float *ys, int n,
int affine,
gnum_float *res,
regression_stat_t *extra_stat)
regression_stat_t *regression_stat)
{
gnum_float *log_ys;
int result;
......@@ -382,12 +388,12 @@ exponential_regression (gnum_float **xss, int dim,
memcpy (xss2 + 1, xss, dim * sizeof (gnum_float *));
result = general_linear_regression (xss2, dim + 1, log_ys,
n, res, extra_stat, affine);
n, res, regression_stat, affine);
g_free (xss2);
} else {
res[0] = 0;
result = general_linear_regression (xss, dim, log_ys, n,
res + 1, extra_stat, affine);
res + 1, regression_stat, affine);
}
if (result == 0)
......@@ -400,3 +406,30 @@ exponential_regression (gnum_float **xss, int dim,
}
/* ------------------------------------------------------------------------- */
regression_stat_t *
regression_stat_new (void)
{
regression_stat_t * regression_stat = g_new0 (regression_stat_t, 1);
regression_stat->se = NULL;
regression_stat->t = NULL;
regression_stat->xbar = NULL;
return regression_stat;
}
void
regression_stat_destroy (regression_stat_t *regression_stat)
{
g_return_if_fail (regression_stat != NULL);
if (regression_stat->se)
g_free(regression_stat->se);
if (regression_stat->t)
g_free(regression_stat->t);
if (regression_stat->xbar)
g_free(regression_stat->xbar);
g_free (regression_stat);
}
......@@ -30,15 +30,23 @@ typedef struct {
gnum_float adj_sqr_r;
gnum_float se_y; /* The Standard Error of Y */
gnum_float F;
int df;
int df_reg;
int df_resid;
int df_total;
gnum_float ss_reg;
gnum_float ss_resid;
gnum_float ss_total;
gnum_float ms_reg;
gnum_float ms_resid;
gnum_float ybar;
gnum_float *xbar;
gnum_float var; /* The variance of the entire regression:
sum(errors^2)/(n-xdim) */
} regression_stat_t;
regression_stat_t * regression_stat_new (void);
void regression_stat_destroy (regression_stat_t *regression_stat);
int linear_regression (gnum_float **xss, int dim,
const gnum_float *ys, int n,
int affine,
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment