rangefunc.c 8.62 KB
Newer Older
1 2 3 4
/*
 * rangefunc.c: Functions on ranges (data sets).
 *
 * Authors:
Morten Welinder's avatar
Morten Welinder committed
5
 *   Morten Welinder <terra@gnome.org>
6 7 8
 *   Andreas J. Guelzow  <aguelzow@taliesin.ca>
 */

9 10
#include <gnumeric-config.h>
#include "gnumeric.h"
11
#include "rangefunc.h"
12

13 14 15
#include "mathfunc.h"
#include <math.h>
#include <stdlib.h>
16
#include <string.h>
17

Morten Welinder's avatar
Morten Welinder committed
18
int
19
gnm_range_count (gnm_float const *xs, int n, gnm_float *res)
Morten Welinder's avatar
Morten Welinder committed
20 21 22 23 24
{
	*res = n;
	return 0;
}

25
int
26
gnm_range_hypot (gnm_float const *xs, int n, gnm_float *res)
Morten Welinder's avatar
Morten Welinder committed
27 28 29
{
	switch (n) {
	case 0: *res = 0; return 0;
Morten Welinder's avatar
Morten Welinder committed
30
	case 1: *res = gnm_abs (xs[0]); return 0;
31
	case 2: *res = gnm_hypot (xs[0], xs[1]); return 0;
Morten Welinder's avatar
Morten Welinder committed
32
	default:
33
		if (gnm_range_sumsq (xs, n, res))
Morten Welinder's avatar
Morten Welinder committed
34
			return 1;
35
		*res = gnm_sqrt (*res);
Morten Welinder's avatar
Morten Welinder committed
36 37 38 39
		return 0;
	}
}

40 41
/* Minimum absolute element.  */
int
42
gnm_range_minabs (gnm_float const *xs, int n, gnm_float *res)
43 44
{
	if (n > 0) {
Morten Welinder's avatar
Morten Welinder committed
45
		gnm_float min = gnm_abs (xs[0]);
46 47 48
		int i;

		for (i = 1; i < n; i++)
Morten Welinder's avatar
Morten Welinder committed
49 50
			if (gnm_abs (xs[i]) < min)
				min = gnm_abs (xs[i]);
51 52 53 54 55 56
		*res = min;
		return 0;
	} else
		return 1;
}

57 58
/* Average absolute deviation from mean.  */
int
59
gnm_range_avedev (gnm_float const *xs, int n, gnm_float *res)
60 61
{
	if (n > 0) {
62
		gnm_float m, s = 0;
63 64
		int i;

65
		gnm_range_average (xs, n, &m);
66
		for (i = 0; i < n; i++)
Morten Welinder's avatar
Morten Welinder committed
67
			s += gnm_abs (xs[i] - m);
68 69 70 71 72 73 74 75
		*res = s / n;
		return 0;
	} else
		return 1;
}

/* Variance with weight N.  */
int
76
gnm_range_var_pop (gnm_float const *xs, int n, gnm_float *res)
77 78
{
	if (n > 0) {
79
		gnm_float q;
80

81
		gnm_range_devsq (xs, n, &q);
82 83 84 85 86 87 88 89
		*res = q / n;
		return 0;
	} else
		return 1;
}

/* Variance with weight N-1.  */
int
90
gnm_range_var_est (gnm_float const *xs, int n, gnm_float *res)
91 92
{
	if (n > 1) {
93
		gnm_float q;
94

95
		gnm_range_devsq (xs, n, &q);
96 97 98 99 100 101 102 103
		*res = q / (n - 1);
		return 0;
	} else
		return 1;
}

/* Standard deviation with weight N.  */
int
104
gnm_range_stddev_pop (gnm_float const *xs, int n, gnm_float *res)
105
{
106
	if (gnm_range_var_pop (xs, n, res))
107 108
		return 1;
	else {
109
		*res = gnm_sqrt (*res);
110 111 112 113 114 115
		return 0;
	}
}

/* Standard deviation with weight N-1.  */
int
116
gnm_range_stddev_est (gnm_float const *xs, int n, gnm_float *res)
117
{
118
	if (gnm_range_var_est (xs, n, res))
119 120
		return 1;
	else {
121
		*res = gnm_sqrt (*res);
122 123 124 125 126 127
		return 0;
	}
}

/* Population skew.  */
int
128
gnm_range_skew_pop (gnm_float const *xs, int n, gnm_float *res)
129
{
130
	gnm_float m, s, dxn, x3 = 0;
131 132
	int i;

133
	if (n < 1 || gnm_range_average (xs, n, &m) || gnm_range_stddev_pop (xs, n, &s))
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x3 += dxn * dxn *dxn;
	}

	*res = x3 / n;
	return 0;
}

/* Maximum-likelyhood estimator for skew.  */
int
149
gnm_range_skew_est (gnm_float const *xs, int n, gnm_float *res)
150
{
151
	gnm_float m, s, dxn, x3 = 0;
152 153
	int i;

154
	if (n < 3 || gnm_range_average (xs, n, &m) || gnm_range_stddev_est (xs, n, &s))
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x3 += dxn * dxn *dxn;
	}

	*res = ((x3 * n) / (n - 1)) / (n - 2);
	return 0;
}

/* Population kurtosis (with offset 3).  */
int
170
gnm_range_kurtosis_m3_pop (gnm_float const *xs, int n, gnm_float *res)
171
{
172
	gnm_float m, s, dxn, x4 = 0;
173 174
	int i;

175
	if (n < 1 || gnm_range_average (xs, n, &m) || gnm_range_stddev_pop (xs, n, &s))
176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x4 += (dxn * dxn) * (dxn * dxn);
	}

	*res = x4 / n - 3;
	return 0;
}

/* Unbiased, I hope, estimator for kurtosis (with offset 3).  */
int
191
gnm_range_kurtosis_m3_est (gnm_float const *xs, int n, gnm_float *res)
192
{
193 194
	gnm_float m, s, dxn, x4 = 0;
	gnm_float common_den, nth, three;
195 196
	int i;

197
	if (n < 4 || gnm_range_average (xs, n, &m) || gnm_range_stddev_est (xs, n, &s))
198 199 200 201 202 203 204 205 206
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x4 += (dxn * dxn) * (dxn * dxn);
	}

207 208
	common_den = (gnm_float)(n - 2) * (n - 3);
	nth = (gnm_float)n * (n + 1) / ((n - 1) * common_den);
209 210 211 212 213 214 215 216
	three = 3.0 * (n - 1) * (n - 1) / common_den;

	*res = x4 * nth - three;
	return 0;
}

/* Harmonic mean of positive numbers.  */
int
217
gnm_range_harmonic_mean (gnm_float const *xs, int n, gnm_float *res)
218 219
{
	if (n > 0) {
220
		gnm_float invsum = 0;
221 222 223 224 225 226 227 228 229 230 231 232 233
		int i;

		for (i = 0; i < n; i++) {
			if (xs[i] <= 0)
				return 1;
			invsum += 1 / xs[i];
		}
		*res = n / invsum;
		return 0;
	} else
		return 1;
}

234
static void
235
product_helper (gnm_float const *xs, int n,
236
		gnm_float *res, int *exp2,
237 238
		gboolean *zerop, gboolean *anynegp)
{
239
	gnm_float x0 = xs[0];
240 241 242 243 244 245 246 247
	*zerop = (x0 == 0);
	*anynegp = (x0 < 0);

	if (n == 1 || *zerop) {
		*res = x0;
		*exp2 = 0;
	} else {
		int e;
248
		gnm_float mant = gnm_frexp (x0, &e);
249 250 251 252
		int i;

		for (i = 1; i < n; i++) {
			int thise;
253
			gnm_float x = xs[i];
254 255 256 257 258 259 260 261 262

			if (x == 0) {
				*zerop = TRUE;
				*res = 0;
				*exp2 = 0;
				return;
			}
			if (x < 0) *anynegp = TRUE;

263
			mant *= gnm_frexp (x, &thise);
264 265 266
			e += thise;

			/* Keep 0.5 < |mant| <= 1.  */
Morten Welinder's avatar
Morten Welinder committed
267
			if (gnm_abs (mant) <= 0.5) {
268 269 270 271 272 273 274 275 276 277 278
				mant *= 2;
				e--;
			}
		}

		*exp2 = e;
		*res = mant;
	}
}


279 280
/* Geometric mean of positive numbers.  */
int
281
gnm_range_geometric_mean (gnm_float const *xs, int n, gnm_float *res)
282
{
283 284
	int exp2;
	gboolean zerop, anynegp;
285

286 287
	/* FIXME: check empty case.  */
	if (n < 1)
288
		return 1;
289 290 291 292 293 294 295 296

	/* FIXME: check zero case.  */
	product_helper (xs, n, res, &exp2, &zerop, &anynegp);
	if (zerop || anynegp)
		return 1;

	/* Now compute (res * 2^exp2) ^ (1/n).  */
	if (exp2 >= 0)
297
		*res = gnm_pow (*res * gnm_pow2 (exp2 % n), 1.0 / n) * gnm_pow2 (exp2 / n);
298
	else
299
		*res = gnm_pow (*res / gnm_pow2 ((-exp2) % n), 1.0 / n) / gnm_pow2 ((-exp2) / n);
300 301

	return 0;
302 303 304 305 306
}


/* Product.  */
int
307
gnm_range_product (gnm_float const *xs, int n, gnm_float *res)
308
{
309
	if (n == 0) {
Morten Welinder's avatar
Morten Welinder committed
310
		*res = 1;
311 312 313 314 315 316
	} else {
		int exp2;
		gboolean zerop, anynegp;

		product_helper (xs, n, res, &exp2, &zerop, &anynegp);
		if (exp2)
317
			*res = gnm_ldexp (*res, exp2);
318
	}
319

320 321 322 323
	return 0;
}

int
324
gnm_range_multinomial (gnm_float const *xs, int n, gnm_float *res)
325
{
326
	gnm_float result = 1;
327 328 329 330
	int sum = 0;
	int i;

	for (i = 0; i < n; i++) {
331
		gnm_float x = xs[i];
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
		int xi;

		if (x < 0)
			return 1;

		xi = (int)x;
		if (sum == 0 || xi == 0)
			; /* Nothing.  */
		else if (xi < 20) {
			int j;
			int f = sum + xi;

			result *= f--;
			for (j = 2; j <= xi; j++)
				result = result * f-- / j;
		} else {
			/* Same as above, only faster.  */
			result *= combin (sum + xi, xi);
		}

		sum += xi;
	}

	*res = result;
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
359
/* Co-variance.  */
360
int
361
gnm_range_covar (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
362
{
363
	gnm_float ux, uy, s = 0;
364 365
	int i;

366
	if (n <= 0 || gnm_range_average (xs, n, &ux) || gnm_range_average (ys, n, &uy))
367 368 369 370 371 372 373 374
		return 1;

	for (i = 0; i < n; i++)
		s += (xs[i] - ux) * (ys[i] - uy);
	*res = s / n;
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
375
/* Population correlation coefficient.  */
376
int
377
gnm_range_correl_pop (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
378
{
379
	gnm_float sx, sy, vxy;
380

381 382 383
	if (gnm_range_stddev_pop (xs, n, &sx) || sx == 0 ||
	    gnm_range_stddev_pop (ys, n, &sy) || sy == 0 ||
	    gnm_range_covar (xs, ys, n, &vxy))
384 385 386 387 388 389
		return 1;

	*res = vxy / (sx * sy);
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
390
/* Maximum-likelyhood correlation coefficient.  */
391
int
392
gnm_range_correl_est (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
393
{
394
	gnm_float sx, sy, vxy;
395

396 397 398
	if (gnm_range_stddev_est (xs, n, &sx) || sx == 0 ||
	    gnm_range_stddev_est (ys, n, &sy) || sy == 0 ||
	    gnm_range_covar (xs, ys, n, &vxy))
399 400 401 402 403 404
		return 1;

	*res = vxy / (sx * sy);
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
405
/* Population R-squared.  */
406
int
407
gnm_range_rsq_pop (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
408
{
409
	if (gnm_range_correl_pop (xs, ys, n, res))
410 411 412 413 414 415
		return 1;

	*res *= *res;
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
416
/* Maximum-likelyhood R-squared.  */
417
int
418
gnm_range_rsq_est (gnm_float const *xs, const gnm_float *ys, int n, gnm_float *res)
419
{
420
	if (gnm_range_correl_est (xs, ys, n, res))
421 422 423 424 425 426
		return 1;

	*res *= *res;
	return 0;
}

427 428
/* Most-common element.  (The one whose first occurrence comes first in
   case of several equally common.  */
429
int
430
gnm_range_mode (gnm_float const *xs, int n, gnm_float *res)
431 432 433
{
	GHashTable *h;
	int i;
434
	gnm_float mode = 0;
435
	gconstpointer mode_key = NULL;
436 437 438 439
	int dups = 0;

	if (n <= 1) return 1;

440 441
	h = g_hash_table_new_full ((GHashFunc)gnm_float_hash,
				   (GCompareFunc)gnm_float_equal,
442 443
				   NULL,
				   (GDestroyNotify)g_free);
444
	for (i = 0; i < n; i++) {
445 446 447
		gpointer rkey, rval;
		gboolean found = g_hash_table_lookup_extended (h, &xs[i], &rkey, &rval);
		int *pdups;
448

449 450
		if (found) {
			pdups = (int *)rval;
451
			(*pdups)++;
452 453 454 455 456
			if (*pdups == dups && rkey < mode_key) {
				mode = xs[i];
				mode_key = rkey;
			}
		} else {
457 458
			pdups = g_new (int, 1);
			*pdups = 1;
459 460
			rkey = (gpointer)(xs + i);
			g_hash_table_insert (h, rkey, pdups);
461 462 463 464 465
		}

		if (*pdups > dups) {
			dups = *pdups;
			mode = xs[i];
466
			mode_key = rkey;
467 468 469 470 471 472 473 474 475 476
		}
	}
	g_hash_table_destroy (h);

	if (dups <= 1)
		return 1;

	*res = mode;
	return 0;
}