rangefunc.c 13 KB
Newer Older
1 2 3 4
/*
 * rangefunc.c: Functions on ranges (data sets).
 *
 * Authors:
Morten Welinder's avatar
Morten Welinder committed
5
 *   Morten Welinder <terra@gnome.org>
6 7 8
 *   Andreas J. Guelzow  <aguelzow@taliesin.ca>
 */

9 10
#include <gnumeric-config.h>
#include "gnumeric.h"
11
#include "rangefunc.h"
12

13 14 15
#include "mathfunc.h"
#include <math.h>
#include <stdlib.h>
16
#include <string.h>
17

Morten Welinder's avatar
Morten Welinder committed
18
int
19
range_count (const gnm_float *xs, int n, gnm_float *res)
Morten Welinder's avatar
Morten Welinder committed
20 21 22 23 24 25
{
	*res = n;
	return 0;
}


26 27
/* Arithmetic sum.  */
int
28
range_sum (const gnm_float *xs, int n, gnm_float *res)
29
{
30
	/* http://bugzilla.gnome.org/show_bug.cgi?id=131588 */
31
#ifdef HAVE_LONG_DOUBLE
32
	long double sum = 0;
33
#else
34
	gnm_float sum = 0;
35
#endif
36 37
	int i;

38
	for (i = 0; i < n; i++)
39 40 41
		sum += xs[i];

	*res = sum;
42 43 44 45 46
	return 0;
}

/* Arithmetic sum of squares.  */
int
47
range_sumsq (const gnm_float *xs, int n, gnm_float *res)
48
{
49
	/* http://bugzilla.gnome.org/show_bug.cgi?id=131588 */
50
#ifdef HAVE_LONG_DOUBLE
51
	long double sum = 0;
52
#else
53
	gnm_float sum = 0;
54
#endif
55 56
	int i;

57
	for (i = 0; i < n; i++)
58 59 60
		sum += xs[i] * xs[i];

	*res = sum;
61 62 63
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
64 65 66 67 68
int
range_hypot (const gnm_float *xs, int n, gnm_float *res)
{
	switch (n) {
	case 0: *res = 0; return 0;
Morten Welinder's avatar
Morten Welinder committed
69
	case 1: *res = gnm_abs (xs[0]); return 0;
70
	case 2: *res = gnm_hypot (xs[0], xs[1]); return 0;
Morten Welinder's avatar
Morten Welinder committed
71 72 73
	default:
		if (range_sumsq (xs, n, res))
			return 1;
74
		*res = gnm_sqrt (*res);
Morten Welinder's avatar
Morten Welinder committed
75 76 77 78 79
		return 0;
	}
}


80 81
/* Arithmetic average.  */
int
82
range_average (const gnm_float *xs, int n, gnm_float *res)
83 84 85 86 87 88 89 90
{
	if (n <= 0 || range_sum (xs, n, res))
		return 1;

	*res /= n;
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
91
/* Minimum element.  */
92
int
93
range_min (const gnm_float *xs, int n, gnm_float *res)
94 95
{
	if (n > 0) {
96
		gnm_float min = xs[0];
97 98 99 100 101 102 103 104 105 106 107
		int i;

		for (i = 1; i < n; i++)
			if (xs[i] < min)
				min = xs[i];
		*res = min;
		return 0;
	} else
		return 1;
}

Morten Welinder's avatar
Morten Welinder committed
108
/* Maximum element.  */
109
int
110
range_max (const gnm_float *xs, int n, gnm_float *res)
111 112
{
	if (n > 0) {
113
		gnm_float max = xs[0];
114 115 116 117 118 119 120 121 122 123 124
		int i;

		for (i = 1; i < n; i++)
			if (xs[i] > max)
				max = xs[i];
		*res = max;
		return 0;
	} else
		return 1;
}

125 126
/* Minimum absolute element.  */
int
127
range_minabs (const gnm_float *xs, int n, gnm_float *res)
128 129
{
	if (n > 0) {
Morten Welinder's avatar
Morten Welinder committed
130
		gnm_float min = gnm_abs (xs[0]);
131 132 133
		int i;

		for (i = 1; i < n; i++)
Morten Welinder's avatar
Morten Welinder committed
134 135
			if (gnm_abs (xs[i]) < min)
				min = gnm_abs (xs[i]);
136 137 138 139 140 141 142 143
		*res = min;
		return 0;
	} else
		return 1;
}

/* Maximum absolute element.  */
int
144
range_maxabs (const gnm_float *xs, int n, gnm_float *res)
145 146
{
	if (n > 0) {
Morten Welinder's avatar
Morten Welinder committed
147
		gnm_float max = gnm_abs (xs[0]);
148 149 150
		int i;

		for (i = 1; i < n; i++)
Morten Welinder's avatar
Morten Welinder committed
151 152
			if (gnm_abs (xs[i]) > max)
				max = gnm_abs (xs[i]);
153 154 155 156 157 158
		*res = max;
		return 0;
	} else
		return 1;
}

159 160 161

/* Average absolute deviation from mean.  */
int
162
range_avedev (const gnm_float *xs, int n, gnm_float *res)
163 164
{
	if (n > 0) {
165
		gnm_float m, s = 0;
166 167 168 169
		int i;

		range_average (xs, n, &m);
		for (i = 0; i < n; i++)
Morten Welinder's avatar
Morten Welinder committed
170
			s += gnm_abs (xs[i] - m);
171 172 173 174 175 176 177 178 179
		*res = s / n;
		return 0;
	} else
		return 1;
}


/* Sum of square deviations from mean.  */
int
180
range_devsq (const gnm_float *xs, int n, gnm_float *res)
181
{
182
	gnm_float m, dx, q = 0;
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
	if (n > 0) {
		int i;

		range_average (xs, n, &m);
		for (i = 0; i < n; i++) {
			dx = xs[i] - m;
			q += dx * dx;
		}
	}
	*res = q;
	return 0;
}

/* Variance with weight N.  */
int
198
range_var_pop (const gnm_float *xs, int n, gnm_float *res)
199 200
{
	if (n > 0) {
201
		gnm_float q;
202 203 204 205 206 207 208 209 210 211

		range_devsq (xs, n, &q);
		*res = q / n;
		return 0;
	} else
		return 1;
}

/* Variance with weight N-1.  */
int
212
range_var_est (const gnm_float *xs, int n, gnm_float *res)
213 214
{
	if (n > 1) {
215
		gnm_float q;
216 217 218 219 220 221 222 223 224 225

		range_devsq (xs, n, &q);
		*res = q / (n - 1);
		return 0;
	} else
		return 1;
}

/* Standard deviation with weight N.  */
int
226
range_stddev_pop (const gnm_float *xs, int n, gnm_float *res)
227 228 229 230
{
	if (range_var_pop (xs, n, res))
		return 1;
	else {
231
		*res = gnm_sqrt (*res);
232 233 234 235 236 237
		return 0;
	}
}

/* Standard deviation with weight N-1.  */
int
238
range_stddev_est (const gnm_float *xs, int n, gnm_float *res)
239 240 241 242
{
	if (range_var_est (xs, n, res))
		return 1;
	else {
243
		*res = gnm_sqrt (*res);
244 245 246 247 248 249
		return 0;
	}
}

/* Population skew.  */
int
250
range_skew_pop (const gnm_float *xs, int n, gnm_float *res)
251
{
252
	gnm_float m, s, dxn, x3 = 0;
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
	int i;

	if (n < 1 || range_average (xs, n, &m) || range_stddev_pop (xs, n, &s))
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x3 += dxn * dxn *dxn;
	}

	*res = x3 / n;
	return 0;
}

/* Maximum-likelyhood estimator for skew.  */
int
271
range_skew_est (const gnm_float *xs, int n, gnm_float *res)
272
{
273
	gnm_float m, s, dxn, x3 = 0;
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
	int i;

	if (n < 3 || range_average (xs, n, &m) || range_stddev_est (xs, n, &s))
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x3 += dxn * dxn *dxn;
	}

	*res = ((x3 * n) / (n - 1)) / (n - 2);
	return 0;
}

/* Population kurtosis (with offset 3).  */
int
292
range_kurtosis_m3_pop (const gnm_float *xs, int n, gnm_float *res)
293
{
294
	gnm_float m, s, dxn, x4 = 0;
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
	int i;

	if (n < 1 || range_average (xs, n, &m) || range_stddev_pop (xs, n, &s))
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x4 += (dxn * dxn) * (dxn * dxn);
	}

	*res = x4 / n - 3;
	return 0;
}

/* Unbiased, I hope, estimator for kurtosis (with offset 3).  */
int
313
range_kurtosis_m3_est (const gnm_float *xs, int n, gnm_float *res)
314
{
315 316
	gnm_float m, s, dxn, x4 = 0;
	gnm_float common_den, nth, three;
317 318 319 320 321 322 323 324 325 326 327 328
	int i;

	if (n < 4 || range_average (xs, n, &m) || range_stddev_est (xs, n, &s))
		return 1;
	if (s == 0)
		return 1;

	for (i = 0; i < n; i++) {
		dxn = (xs[i] - m) / s;
		x4 += (dxn * dxn) * (dxn * dxn);
	}

329 330
	common_den = (gnm_float)(n - 2) * (n - 3);
	nth = (gnm_float)n * (n + 1) / ((n - 1) * common_den);
331 332 333 334 335 336 337 338
	three = 3.0 * (n - 1) * (n - 1) / common_den;

	*res = x4 * nth - three;
	return 0;
}

/* Harmonic mean of positive numbers.  */
int
339
range_harmonic_mean (const gnm_float *xs, int n, gnm_float *res)
340 341
{
	if (n > 0) {
342
		gnm_float invsum = 0;
343 344 345 346 347 348 349 350 351 352 353 354 355
		int i;

		for (i = 0; i < n; i++) {
			if (xs[i] <= 0)
				return 1;
			invsum += 1 / xs[i];
		}
		*res = n / invsum;
		return 0;
	} else
		return 1;
}

356
static void
357 358
product_helper (const gnm_float *xs, int n,
		gnm_float *res, int *exp2,
359 360
		gboolean *zerop, gboolean *anynegp)
{
361
	gnm_float x0 = xs[0];
362 363 364 365 366 367 368 369
	*zerop = (x0 == 0);
	*anynegp = (x0 < 0);

	if (n == 1 || *zerop) {
		*res = x0;
		*exp2 = 0;
	} else {
		int e;
370
		gnm_float mant = gnm_frexp (x0, &e);
371 372 373 374
		int i;

		for (i = 1; i < n; i++) {
			int thise;
375
			gnm_float x = xs[i];
376 377 378 379 380 381 382 383 384

			if (x == 0) {
				*zerop = TRUE;
				*res = 0;
				*exp2 = 0;
				return;
			}
			if (x < 0) *anynegp = TRUE;

385
			mant *= gnm_frexp (x, &thise);
386 387 388
			e += thise;

			/* Keep 0.5 < |mant| <= 1.  */
Morten Welinder's avatar
Morten Welinder committed
389
			if (gnm_abs (mant) <= 0.5) {
390 391 392 393 394 395 396 397 398 399 400
				mant *= 2;
				e--;
			}
		}

		*exp2 = e;
		*res = mant;
	}
}


401 402
/* Geometric mean of positive numbers.  */
int
403
range_geometric_mean (const gnm_float *xs, int n, gnm_float *res)
404
{
405 406
	int exp2;
	gboolean zerop, anynegp;
407

408 409
	/* FIXME: check empty case.  */
	if (n < 1)
410
		return 1;
411 412 413 414 415 416 417 418

	/* FIXME: check zero case.  */
	product_helper (xs, n, res, &exp2, &zerop, &anynegp);
	if (zerop || anynegp)
		return 1;

	/* Now compute (res * 2^exp2) ^ (1/n).  */
	if (exp2 >= 0)
419
		*res = gnm_pow (*res * gnm_pow2 (exp2 % n), 1.0 / n) * gnm_pow2 (exp2 / n);
420
	else
421
		*res = gnm_pow (*res / gnm_pow2 ((-exp2) % n), 1.0 / n) / gnm_pow2 ((-exp2) / n);
422 423

	return 0;
424 425 426 427 428
}


/* Product.  */
int
429
range_product (const gnm_float *xs, int n, gnm_float *res)
430
{
431
	if (n == 0) {
Morten Welinder's avatar
Morten Welinder committed
432
		*res = 1;
433 434 435 436 437 438
	} else {
		int exp2;
		gboolean zerop, anynegp;

		product_helper (xs, n, res, &exp2, &zerop, &anynegp);
		if (exp2)
439
			*res = gnm_ldexp (*res, exp2);
440
	}
441

442 443 444 445
	return 0;
}

int
446
range_multinomial (const gnm_float *xs, int n, gnm_float *res)
447
{
448
	gnm_float result = 1;
449 450 451 452
	int sum = 0;
	int i;

	for (i = 0; i < n; i++) {
453
		gnm_float x = xs[i];
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480
		int xi;

		if (x < 0)
			return 1;

		xi = (int)x;
		if (sum == 0 || xi == 0)
			; /* Nothing.  */
		else if (xi < 20) {
			int j;
			int f = sum + xi;

			result *= f--;
			for (j = 2; j <= xi; j++)
				result = result * f-- / j;
		} else {
			/* Same as above, only faster.  */
			result *= combin (sum + xi, xi);
		}

		sum += xi;
	}

	*res = result;
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
481
/* Co-variance.  */
482
int
483
range_covar (const gnm_float *xs, const gnm_float *ys, int n, gnm_float *res)
484
{
485
	gnm_float ux, uy, s = 0;
486 487 488 489 490 491 492 493 494 495 496
	int i;

	if (n <= 0 || range_average (xs, n, &ux) || range_average (ys, n, &uy))
		return 1;

	for (i = 0; i < n; i++)
		s += (xs[i] - ux) * (ys[i] - uy);
	*res = s / n;
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
497
/* Population correlation coefficient.  */
498
int
499
range_correl_pop (const gnm_float *xs, const gnm_float *ys, int n, gnm_float *res)
500
{
501
	gnm_float sx, sy, vxy;
502 503 504 505 506 507 508 509 510 511

	if (range_stddev_pop (xs, n, &sx) || sx == 0 ||
	    range_stddev_pop (ys, n, &sy) || sy == 0 ||
	    range_covar (xs, ys, n, &vxy))
		return 1;

	*res = vxy / (sx * sy);
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
512
/* Maximum-likelyhood correlation coefficient.  */
513
int
514
range_correl_est (const gnm_float *xs, const gnm_float *ys, int n, gnm_float *res)
515
{
516
	gnm_float sx, sy, vxy;
517 518 519 520 521 522 523 524 525 526

	if (range_stddev_est (xs, n, &sx) || sx == 0 ||
	    range_stddev_est (ys, n, &sy) || sy == 0 ||
	    range_covar (xs, ys, n, &vxy))
		return 1;

	*res = vxy / (sx * sy);
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
527
/* Population R-squared.  */
528
int
529
range_rsq_pop (const gnm_float *xs, const gnm_float *ys, int n, gnm_float *res)
530 531 532 533 534 535 536 537
{
	if (range_correl_pop (xs, ys, n, res))
		return 1;

	*res *= *res;
	return 0;
}

Morten Welinder's avatar
Morten Welinder committed
538
/* Maximum-likelyhood R-squared.  */
539
int
540
range_rsq_est (const gnm_float *xs, const gnm_float *ys, int n, gnm_float *res)
541 542 543 544 545 546 547 548 549
{
	if (range_correl_est (xs, ys, n, res))
		return 1;

	*res *= *res;
	return 0;
}

static guint
550
float_hash (const gnm_float *d)
551
{
552
	int expt;
Morten Welinder's avatar
Morten Welinder committed
553
	gnm_float mant = gnm_frexp (gnm_abs (*d), &expt);
554 555 556 557
	guint h = ((guint)(0x80000000u * mant)) ^ expt;
	if (*d >= 0)
		h ^= 0x55555555;
	return h;
558 559 560
}

static gint
561
float_equal (const gnm_float *a, const gnm_float *b)
562 563 564 565 566 567
{
	if (*a == *b)
	        return 1;
	return 0;
}

568 569
/* Most-common element.  (The one whose first occurrence comes first in
   case of several equally common.  */
570
int
571
range_mode (const gnm_float *xs, int n, gnm_float *res)
572 573 574
{
	GHashTable *h;
	int i;
575
	gnm_float mode = 0;
576
	gconstpointer mode_key = NULL;
577 578 579 580
	int dups = 0;

	if (n <= 1) return 1;

581 582 583 584
	h = g_hash_table_new_full ((GHashFunc)float_hash,
				   (GCompareFunc)float_equal,
				   NULL,
				   (GDestroyNotify)g_free);
585
	for (i = 0; i < n; i++) {
586 587 588
		gpointer rkey, rval;
		gboolean found = g_hash_table_lookup_extended (h, &xs[i], &rkey, &rval);
		int *pdups;
589

590 591
		if (found) {
			pdups = (int *)rval;
592
			(*pdups)++;
593 594 595 596 597
			if (*pdups == dups && rkey < mode_key) {
				mode = xs[i];
				mode_key = rkey;
			}
		} else {
598 599
			pdups = g_new (int, 1);
			*pdups = 1;
600 601
			rkey = (gpointer)(xs + i);
			g_hash_table_insert (h, rkey, pdups);
602 603 604 605 606
		}

		if (*pdups > dups) {
			dups = *pdups;
			mode = xs[i];
607
			mode_key = rkey;
608 609 610 611 612 613 614 615 616 617 618 619 620
		}
	}
	g_hash_table_destroy (h);

	if (dups <= 1)
		return 1;

	*res = mode;
	return 0;
}


static gint
621
float_compare (const gnm_float *a, const gnm_float *b)
622 623 624 625 626 627 628 629 630
{
        if (*a < *b)
                return -1;
	else if (*a == *b)
	        return 0;
	else
	        return 1;
}

631 632
static gnm_float *
range_sort (const gnm_float *xs, int n)
Morten Welinder's avatar
Morten Welinder committed
633 634 635 636
{
	if (n <= 0)
		return NULL;
	else {
637 638
		gnm_float *ys = g_new (gnm_float, n);
		memcpy (ys, xs, n * sizeof (gnm_float));
Morten Welinder's avatar
Morten Welinder committed
639 640 641 642 643 644 645 646
		qsort (ys, n, sizeof (ys[0]), (int (*) (const void *, const void *))&float_compare);
		return ys;
	}
}


/* This requires sorted data.  */
static int
647
range_fractile_inter_sorted (const gnm_float *xs, int n, gnm_float *res, gnm_float f)
Morten Welinder's avatar
Morten Welinder committed
648
{
649
	gnm_float fpos, residual;
Morten Welinder's avatar
Morten Welinder committed
650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668
	int pos;

	if (n <= 0 || f < 0.0 || f > 1.0)
		return 1;

	fpos = (n - 1) * f;
	pos = (int)fpos;
	residual = fpos - pos;

	if (residual == 0.0 || pos + 1 >= n)
		*res = xs[pos];
	else
		*res = (1 - residual) * xs[pos] + residual * xs[pos + 1];

	return 0;
}

/* Interpolative fractile.  */
int
669
range_fractile_inter (const gnm_float *xs, int n, gnm_float *res, gnm_float f)
Morten Welinder's avatar
Morten Welinder committed
670
{
671
	gnm_float *ys = range_sort (xs, n);
Morten Welinder's avatar
Morten Welinder committed
672 673 674 675 676 677 678 679
	int error = range_fractile_inter_sorted (ys, n, res, f);
	g_free (ys);
	return error;
}

/* Interpolative fractile.  */
/* This version may reorder data points.  */
int
680
range_fractile_inter_nonconst (gnm_float *xs, int n, gnm_float *res, gnm_float f)
Morten Welinder's avatar
Morten Welinder committed
681 682 683 684 685
{
	qsort (xs, n, sizeof (xs[0]), (int (*) (const void *, const void *))&float_compare);
	return range_fractile_inter_sorted (xs, n, res, f);
}

686 687
/* Interpolative median.  */
int
688
range_median_inter (const gnm_float *xs, int n, gnm_float *res)
689
{
Morten Welinder's avatar
Morten Welinder committed
690
	return range_fractile_inter (xs, n, res, 0.5);
691 692
}

Morten Welinder's avatar
Morten Welinder committed
693 694 695
/* Interpolative median.  */
/* This version may reorder data points.  */
int
696
range_median_inter_nonconst (gnm_float *xs, int n, gnm_float *res)
Morten Welinder's avatar
Morten Welinder committed
697 698 699
{
	return range_fractile_inter_nonconst (xs, n, res, 0.5);
}
Morten Welinder's avatar
Morten Welinder committed
700 701 702

/* k-th smallest.  Note: k is zero-based.  */
int
703
range_min_k (const gnm_float *xs, int n, gnm_float *res, int k)
Morten Welinder's avatar
Morten Welinder committed
704
{
705
	gnm_float *ys;
Morten Welinder's avatar
Morten Welinder committed
706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722

	if (k < 0 || k >= n)
		return 1;
	if (k == 0)
		return range_min (xs, n, res);
	if (k == n - 1)
		return range_max (xs, n, res);

	ys = range_sort (xs, n);
	*res = ys[k];
	g_free (ys);
	return 0;
}

/* k-th smallest.  Note: k is zero-based.  */
/* This version may reorder data points.  */
int
723
range_min_k_nonconst (gnm_float *xs, int n, gnm_float *res, int k)
Morten Welinder's avatar
Morten Welinder committed
724 725 726 727 728 729 730 731 732 733 734 735
{
	if (k < 0 || k >= n)
		return 1;
	if (k == 0)
		return range_min (xs, n, res);
	if (k == n - 1)
		return range_max (xs, n, res);

	qsort (xs, n, sizeof (xs[0]), (int (*) (const void *, const void *))&float_compare);
	*res = xs[k];
	return 0;
}