Commit cfe75406 authored by Morten Welinder's avatar Morten Welinder

Implement most of LOGEST.

parent efe1e93d
1999-09-21 Morten Welinder <terra@diku.dk>
* src/functions/fn-stat.c (gnumeric_logest): Add some meat.
* src/regression.c (general_linear_regression): Renamed from
general_regression.
(exponential_regression): New function.
1999-09-21 Jukka-Pekka Iivonen <iivonen@iki.fi> 1999-09-21 Jukka-Pekka Iivonen <iivonen@iki.fi>
* samples/statfuns.xls: Updated the second PERCENTRANK test. * samples/statfuns.xls: Updated the second PERCENTRANK test.
......
1999-09-21 Morten Welinder <terra@diku.dk>
* src/functions/fn-stat.c (gnumeric_logest): Add some meat.
* src/regression.c (general_linear_regression): Renamed from
general_regression.
(exponential_regression): New function.
1999-09-21 Jukka-Pekka Iivonen <iivonen@iki.fi> 1999-09-21 Jukka-Pekka Iivonen <iivonen@iki.fi>
* samples/statfuns.xls: Updated the second PERCENTRANK test. * samples/statfuns.xls: Updated the second PERCENTRANK test.
......
...@@ -3985,18 +3985,18 @@ static char *help_logest = { ...@@ -3985,18 +3985,18 @@ static char *help_logest = {
"@SYNTAX=LOGEST(known_y's[,known_x's,const,stat])\n" "@SYNTAX=LOGEST(known_y's[,known_x's,const,stat])\n"
"@DESCRIPTION=" "@DESCRIPTION="
"LOGEST function applies the ``least squares'' method to fit " "The LOGEST function applies the ``least squares'' method to fit "
"an exponential curve of the form " "an exponential curve of the form "
"y = b*m{1}^x{1}+m{2}^x{2}... to your data." "y = b * m{1}^x{1} * m{2}^x{2}... to your data."
"\n" "\n"
"If @known_x's is omitted, an array {1, 2, 3, ...} is used. " "If @known_x's is omitted, an array {1, 2, 3, ...} is used. "
"LOGEST returns an array { m{n},m{n-1}, ...,m{1},b }. " "LOGEST returns an array { m{n},m{n-1}, ...,m{1},b }."
"\n" "\n"
"If @known_y's and @known_x's have unequal number of data points, " "If @known_y's and @known_x's have unequal number of data points, "
"LOGEST returns #NUM! error." "LOGEST returns #NUM! error."
"\n" "\n"
"If @const is FALSE, the line will be forced to go through the " "If @const is FALSE, the line will be forced to go through (0,1),"
"origin, i.e., b will be zero. The default is TRUE." "i.e., b will be one. The default is TRUE."
"\n" "\n"
"If @stat is TRUE, extra statistical information will be returned. " "If @stat is TRUE, extra statistical information will be returned. "
"The default is FALSE." "The default is FALSE."
...@@ -4008,8 +4008,79 @@ static char *help_logest = { ...@@ -4008,8 +4008,79 @@ static char *help_logest = {
static Value * static Value *
gnumeric_logest (FunctionEvalInfo *ei, Value *argv []) gnumeric_logest (FunctionEvalInfo *ei, Value *argv [])
{ {
/* Does nothing yet; look examples in samples/statfuns.xls */ float_t *xs = NULL, *ys = NULL;
return value_new_float (0); Value *result = NULL;
int nx, ny, dim;
float_t expres[2];
gboolean affine, stat, err;
ys = collect_floats_value (argv[0], &ei->pos,
COLLECT_IGNORE_STRINGS |
COLLECT_IGNORE_BOOLS,
&ny, &result);
if (result)
goto out;
if (argv[1] != NULL) {
xs = collect_floats_value (argv[1], &ei->pos,
COLLECT_IGNORE_STRINGS |
COLLECT_IGNORE_BOOLS,
&nx, &result);
if (result)
goto out;
} else {
xs = g_new(float_t, ny);
for (nx=0; nx<ny; nx++)
xs[nx] = nx+1;
}
if (nx != ny) {
result = value_new_error (&ei->pos, gnumeric_err_NUM);
goto out;
}
if (argv[2]) {
affine = value_get_as_bool (argv[2], &err);
if (err) {
result = value_new_error (&ei->pos, gnumeric_err_VALUE);
goto out;
}
} else
affine = TRUE;
if (argv[3]) {
stat = value_get_as_bool (argv[3], &err);
if (err) {
result = value_new_error (&ei->pos, gnumeric_err_VALUE);
goto out;
}
} else
stat = TRUE;
if (exponential_regression (xs, ys, nx, affine, expres)) {
result = value_new_error (&ei->pos, gnumeric_err_NUM);
goto out;
}
/* FIXME: we should handle multi-dimensional data, but we do not. */
dim = 1;
if (stat) {
int y, x;
result = value_new_array (dim + 1, 5);
for (y = 0; y < 5; y++)
for (x = 0; x < dim + 1; x++)
value_array_set (result, x, y, value_new_error (&ei->pos, gnumeric_err_NA));
/* FIXME: lots of stuff goes here. */
} else {
result = value_new_array (dim + 1, 1);
}
value_array_set (result, dim, 0, value_new_float (expres[0]));
value_array_set (result, 0, 0, value_new_float (expres[1]));
out:
g_free (xs);
g_free (ys);
return result;
} }
/***************************************************************************/ /***************************************************************************/
...@@ -4290,7 +4361,7 @@ stat_functions_init (void) ...@@ -4290,7 +4361,7 @@ stat_functions_init (void)
&help_linest, gnumeric_linest); &help_linest, gnumeric_linest);
function_add_args (cat, "logest", "A|Abb", function_add_args (cat, "logest", "A|Abb",
"known_y's[,known_x's,const,stat]", "known_y's[,known_x's,const,stat]",
&help_logest, gnumeric_linest); &help_logest, gnumeric_logest);
function_add_args (cat, "loginv", "fff", "", function_add_args (cat, "loginv", "fff", "",
&help_loginv, gnumeric_loginv); &help_loginv, gnumeric_loginv);
function_add_args (cat, "lognormdist", "fff", "", function_add_args (cat, "lognormdist", "fff", "",
......
...@@ -3985,18 +3985,18 @@ static char *help_logest = { ...@@ -3985,18 +3985,18 @@ static char *help_logest = {
"@SYNTAX=LOGEST(known_y's[,known_x's,const,stat])\n" "@SYNTAX=LOGEST(known_y's[,known_x's,const,stat])\n"
"@DESCRIPTION=" "@DESCRIPTION="
"LOGEST function applies the ``least squares'' method to fit " "The LOGEST function applies the ``least squares'' method to fit "
"an exponential curve of the form " "an exponential curve of the form "
"y = b*m{1}^x{1}+m{2}^x{2}... to your data." "y = b * m{1}^x{1} * m{2}^x{2}... to your data."
"\n" "\n"
"If @known_x's is omitted, an array {1, 2, 3, ...} is used. " "If @known_x's is omitted, an array {1, 2, 3, ...} is used. "
"LOGEST returns an array { m{n},m{n-1}, ...,m{1},b }. " "LOGEST returns an array { m{n},m{n-1}, ...,m{1},b }."
"\n" "\n"
"If @known_y's and @known_x's have unequal number of data points, " "If @known_y's and @known_x's have unequal number of data points, "
"LOGEST returns #NUM! error." "LOGEST returns #NUM! error."
"\n" "\n"
"If @const is FALSE, the line will be forced to go through the " "If @const is FALSE, the line will be forced to go through (0,1),"
"origin, i.e., b will be zero. The default is TRUE." "i.e., b will be one. The default is TRUE."
"\n" "\n"
"If @stat is TRUE, extra statistical information will be returned. " "If @stat is TRUE, extra statistical information will be returned. "
"The default is FALSE." "The default is FALSE."
...@@ -4008,8 +4008,79 @@ static char *help_logest = { ...@@ -4008,8 +4008,79 @@ static char *help_logest = {
static Value * static Value *
gnumeric_logest (FunctionEvalInfo *ei, Value *argv []) gnumeric_logest (FunctionEvalInfo *ei, Value *argv [])
{ {
/* Does nothing yet; look examples in samples/statfuns.xls */ float_t *xs = NULL, *ys = NULL;
return value_new_float (0); Value *result = NULL;
int nx, ny, dim;
float_t expres[2];
gboolean affine, stat, err;
ys = collect_floats_value (argv[0], &ei->pos,
COLLECT_IGNORE_STRINGS |
COLLECT_IGNORE_BOOLS,
&ny, &result);
if (result)
goto out;
if (argv[1] != NULL) {
xs = collect_floats_value (argv[1], &ei->pos,
COLLECT_IGNORE_STRINGS |
COLLECT_IGNORE_BOOLS,
&nx, &result);
if (result)
goto out;
} else {
xs = g_new(float_t, ny);
for (nx=0; nx<ny; nx++)
xs[nx] = nx+1;
}
if (nx != ny) {
result = value_new_error (&ei->pos, gnumeric_err_NUM);
goto out;
}
if (argv[2]) {
affine = value_get_as_bool (argv[2], &err);
if (err) {
result = value_new_error (&ei->pos, gnumeric_err_VALUE);
goto out;
}
} else
affine = TRUE;
if (argv[3]) {
stat = value_get_as_bool (argv[3], &err);
if (err) {
result = value_new_error (&ei->pos, gnumeric_err_VALUE);
goto out;
}
} else
stat = TRUE;
if (exponential_regression (xs, ys, nx, affine, expres)) {
result = value_new_error (&ei->pos, gnumeric_err_NUM);
goto out;
}
/* FIXME: we should handle multi-dimensional data, but we do not. */
dim = 1;
if (stat) {
int y, x;
result = value_new_array (dim + 1, 5);
for (y = 0; y < 5; y++)
for (x = 0; x < dim + 1; x++)
value_array_set (result, x, y, value_new_error (&ei->pos, gnumeric_err_NA));
/* FIXME: lots of stuff goes here. */
} else {
result = value_new_array (dim + 1, 1);
}
value_array_set (result, dim, 0, value_new_float (expres[0]));
value_array_set (result, 0, 0, value_new_float (expres[1]));
out:
g_free (xs);
g_free (ys);
return result;
} }
/***************************************************************************/ /***************************************************************************/
...@@ -4290,7 +4361,7 @@ stat_functions_init (void) ...@@ -4290,7 +4361,7 @@ stat_functions_init (void)
&help_linest, gnumeric_linest); &help_linest, gnumeric_linest);
function_add_args (cat, "logest", "A|Abb", function_add_args (cat, "logest", "A|Abb",
"known_y's[,known_x's,const,stat]", "known_y's[,known_x's,const,stat]",
&help_logest, gnumeric_linest); &help_logest, gnumeric_logest);
function_add_args (cat, "loginv", "fff", "", function_add_args (cat, "loginv", "fff", "",
&help_loginv, gnumeric_loginv); &help_loginv, gnumeric_loginv);
function_add_args (cat, "lognormdist", "fff", "", function_add_args (cat, "lognormdist", "fff", "",
......
...@@ -8,6 +8,7 @@ ...@@ -8,6 +8,7 @@
#include <config.h> #include <config.h>
#include "regression.h" #include "regression.h"
#include <glib.h> #include <glib.h>
#include <math.h>
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
...@@ -47,9 +48,9 @@ linear_solve (float_t **A, float_t *b, int n, ...@@ -47,9 +48,9 @@ linear_solve (float_t **A, float_t *b, int n,
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
static int static int
general_regression (const float_t *const *xss, int xdim, general_linear_regression (const float_t *const *xss, int xdim,
const float_t *ys, int n, const float_t *ys, int n,
float_t *res) float_t *res)
{ {
float_t *xTy, **xTx; float_t *xTy, **xTx;
int i; int i;
...@@ -113,12 +114,7 @@ general_regression (const float_t *const *xss, int xdim, ...@@ -113,12 +114,7 @@ general_regression (const float_t *const *xss, int xdim,
} }
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
/* Fit the best possible line (y = ax + b) through a set of data points. /* Please refer to description in regression.h. */
*
* If affine is false, force b = 0.
*
* "Best" means minimum total squared vertical distance.
*/
int int
linear_regression (const float_t *xs, const float_t *ys, int n, linear_regression (const float_t *xs, const float_t *ys, int n,
...@@ -131,11 +127,53 @@ linear_regression (const float_t *xs, const float_t *ys, int n, ...@@ -131,11 +127,53 @@ linear_regression (const float_t *xs, const float_t *ys, int n,
xss[0] = NULL; /* Substitute for 1-vector. */ xss[0] = NULL; /* Substitute for 1-vector. */
xss[1] = xs; xss[1] = xs;
return general_regression (xss, 2, ys, n, res); return general_linear_regression (xss, 2, ys, n, res);
} else {
res[0] = 0;
return general_linear_regression (&xs, 1, ys, n, res + 1);
}
}
/* ------------------------------------------------------------------------- */
/* Please refer to description in regression.h. */
int
exponential_regression (const float_t *xs, const float_t *ys, int n,
int affine,
float_t *res)
{
float_t *log_ys;
int result;
int i;
log_ys = g_new (float_t, n);
for (i = 0; i < n; i++)
if (ys[i] > 0)
log_ys[i] = log (ys[i]);
else {
result = 1; /* Bad data. */
goto out;
}
if (affine) {
const float_t *xss[2];
xss[0] = NULL; /* Substitute for 1-vector. */
xss[1] = xs;
result = general_linear_regression (xss, 2, log_ys, n, res);
} else { } else {
res[0] = 0; res[0] = 0;
return general_regression (&xs, 1, ys, n, res + 1); result = general_linear_regression (&xs, 1, log_ys, n, res + 1);
} }
if (result == 0)
for (i = 0; i < n; i++)
res[i] = exp (res[i]);
out:
g_free (log_ys);
return result;
} }
/* ------------------------------------------------------------------------- */ /* ------------------------------------------------------------------------- */
...@@ -22,4 +22,24 @@ int linear_regression (const float_t *xs, const float_t *ys, int n, ...@@ -22,4 +22,24 @@ int linear_regression (const float_t *xs, const float_t *ys, int n,
int affine, int affine,
float_t *res); float_t *res);
/**
* exponential_regression:
* @xs: x-vector. (Ie., independent data.)
* @ys: y-vector. (Dependent data.)
* @n: number of data points.
* @affine: if true, a non-one multiplier is allowed.
* @res: output place for constant[0] and root[1].
*
* This performs one-dimensional linear regressions on the input points.
* Fits to "y = b * m^x" or equivalently to "log y = log b + x * log m".
*
* Returns 0 for ok, non-zero otherwise. (Errors: less than two points,
* all points on a vertical line, non-positive y data.)
*/
int exponential_regression (const float_t *xs, const float_t *ys, int n,
int affine,
float_t *res);
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment