GitLab repository storage has been migrated to hashed layout. Please contact Infrastructure team if you notice any issues with repositories or hooks.

Commit 496e868a authored by Jukka-Pekka Iivonen's avatar Jukka-Pekka Iivonen Committed by jpekka

New file for data analysis tools. Currently only correlation and

1999-07-02  Jukka-Pekka Iivonen  <iivonen@iki.fi>

	* src/anaysis-tools.c: New file for data analysis tools.
  	Currently only correlation and covariance tools implemented.

	* src/dialog-analysis-tools.c: New file.

	* src/dialogs.h: Added some definitions for data analysis tools.

	* src/workbook.c: Added data analysis menu entry.
parent db3d65b8
......@@ -22,5 +22,5 @@ Financial functions code:
TXT plugin for simple text import/export:
Takashii Matsuda <matsu@arch.comp.kyutech.ac.jp>
Numerous functions:
Numerous functions and tools:
Jukka-Pekka Iivonen <iivonen@iki.fi>
......@@ -5,9 +5,10 @@ Miguel:
Pattern selector works again.
Jukka:
* Started the implementation of Data Analysis tools
- Implemented Correlation and Covariance tools
* Added the following functions:
- FORECAST() and INTERCEPT().
* And fixed these functions:
- SUMIF() and N().
......
1999-07-02 Jukka-Pekka Iivonen <iivonen@iki.fi>
* src/anaysis-tools.c: New file for data analysis tools.
Currently only correlation and covariance tools implemented.
* src/dialog-analysis-tools.c: New file.
* src/dialogs.h: Added some definitions for data analysis tools.
* src/workbook.c: Added data analysis menu entry.
1999-06-26 Morten Welinder <terra@diku.dk>
* src/fn-string.c (gnumeric_dollar): Avoid warning overflows.
......
1999-07-02 Jukka-Pekka Iivonen <iivonen@iki.fi>
* src/anaysis-tools.c: New file for data analysis tools.
Currently only correlation and covariance tools implemented.
* src/dialog-analysis-tools.c: New file.
* src/dialogs.h: Added some definitions for data analysis tools.
* src/workbook.c: Added data analysis menu entry.
1999-06-26 Morten Welinder <terra@diku.dk>
* src/fn-string.c (gnumeric_dollar): Avoid warning overflows.
......
1999-07-02 Jukka-Pekka Iivonen <iivonen@iki.fi>
* src/anaysis-tools.c: New file for data analysis tools.
Currently only correlation and covariance tools implemented.
* src/dialog-analysis-tools.c: New file.
* src/dialogs.h: Added some definitions for data analysis tools.
* src/workbook.c: Added data analysis menu entry.
1999-06-26 Morten Welinder <terra@diku.dk>
* src/fn-string.c (gnumeric_dollar): Avoid warning overflows.
......
1999-07-02 Jukka-Pekka Iivonen <iivonen@iki.fi>
* src/anaysis-tools.c: New file for data analysis tools.
Currently only correlation and covariance tools implemented.
* src/dialog-analysis-tools.c: New file.
* src/dialogs.h: Added some definitions for data analysis tools.
* src/workbook.c: Added data analysis menu entry.
1999-06-26 Morten Welinder <terra@diku.dk>
* src/fn-string.c (gnumeric_dollar): Avoid warning overflows.
......
......@@ -53,6 +53,7 @@ INCLUDES = \
GNUMERIC_BASE_SOURCES = \
about.c \
analysis-tools.c \
cell.h \
cell.c \
cell-draw.c \
......@@ -70,6 +71,7 @@ GNUMERIC_BASE_SOURCES = \
cursors.h \
dates.c \
dates.h \
dialog-analysis-tools.c \
dialog-cell-comment.c \
dialog-cell-format.c \
dialog-cell-sort.c \
......
......@@ -34,7 +34,7 @@ dialog_about (Workbook *wb)
N_("Vincent Renardias, CSV support."),
N_("Vladimir Vuksan, financial functions."),
N_("Takashi Matsuda, simple text plugin."),
N_("Jukka-Pekka Iivonen, numerous functions."),
N_("Jukka-Pekka Iivonen, numerous functions and tools."),
N_("Morten Welinder, Gnumeric hacker."),
NULL
};
......
/*
* analysis-tools.c:
*
* Author:
* Jukka-Pekka Iivonen <iivonen@iki.fi>
*/
#include <config.h>
#include <gnome.h>
#include <string.h>
#include <math.h>
#include "gnumeric.h"
#include "gnumeric-util.h"
#include "dialogs.h"
typedef struct {
GSList *array;
float_t sum;
float_t sum2; /* square of the sum */
float_t sqrsum;
int n;
} data_set_t;
/***** Some general routines ***********************************************/
static Cell *
set_cell (Sheet *sheet, int col, int row, char *text)
{
Cell *cell;
cell = sheet_cell_get (sheet, col, row);
if (cell == NULL)
cell = sheet_cell_new (sheet, col, row);
cell_set_text (cell, text);
return cell;
}
static void
get_data_groupped_by_columns(Sheet *sheet, Range *range, int col,
data_set_t *data)
{
gpointer p;
Cell *cell;
Value *v;
float_t x;
int row;
data->sum = 0;
data->sum2 = 0;
data->sqrsum = 0;
data->n = 0;
data->array = NULL;
for (row=range->start_row; row<=range->end_row; row++) {
cell = sheet_cell_get(sheet, col, row);
if (cell != NULL && cell->value != NULL) {
v = cell->value;
if (VALUE_IS_NUMBER(v))
x = value_get_as_float (v);
else
x = 0;
p = g_new(float_t, 1);
*((float_t *) p) = x;
data->array = g_slist_append(data->array, p);
data->sum += x;
data->sqrsum += x*x;
data->n++;
}
}
data->sum2 = data->sum * data->sum;
}
static void
get_data_groupped_by_rows(Sheet *sheet, Range *range, int row,
data_set_t *data)
{
gpointer p;
Cell *cell;
Value *v;
float_t x;
int col;
data->sum = 0;
data->sum2 = 0;
data->sqrsum = 0;
data->n = 0;
data->array = NULL;
for (col=range->start_col; col<=range->end_col; col++) {
cell = sheet_cell_get(sheet, col, row);
if (cell != NULL && cell->value != NULL) {
v = cell->value;
if (VALUE_IS_NUMBER(v))
x = value_get_as_float (v);
else
x = 0;
p = g_new(float_t, 1);
*((float_t *) p) = x;
data->array = g_slist_append(data->array, p);
data->sum += x;
data->sqrsum += x*x;
data->n++;
}
}
data->sum2 = data->sum * data->sum;
}
static void
free_data_set(data_set_t *data)
{
GSList *current = data->array;
while (current != NULL) {
g_free(current->data);
current=current->next;
}
g_slist_free(data->array);
}
/************* Correlation Tool *******************************************
*
* The correlation tool calculates the correlation coefficient of two
* data sets. The two data sets can be groupped by rows or by columns.
* The results are given in a table which can be printed out in a new
* sheet, in a new workbook, or simply into an existing sheet.
*
* TODO: a new workbook output and output to an existing sheet
*
**/
static float_t
correl(data_set_t *set_one, data_set_t *set_two, int *error_flag)
{
GSList *current_one, *current_two;
float_t sum_xy = 0, c=0;
float_t tmp;
*error_flag = 0;
current_one = set_one->array;
current_two = set_two->array;
while (current_one != NULL && current_two != NULL) {
sum_xy += *((float_t *) current_one->data) *
*((float_t *) current_two->data);
current_one = current_one->next;
current_two = current_two->next;
}
if (current_one != NULL || current_two != NULL)
*error_flag = 1;
else {
tmp = (set_one->sqrsum - (set_one->sum2 / set_one->n)) *
(set_two->sqrsum - (set_two->sum2 / set_two->n));
if (tmp == 0)
*error_flag = 2;
else
c = (sum_xy - (set_one->sum*set_two->sum/set_one->n)) /
sqrt(tmp);
}
return c;
}
/* If columns_flag is set, the data entries are groupped by columns
* otherwise by rows.
*/
void
correlation_tool (Workbook *wb, Sheet *current_sheet,
Range *input_range, int columns_flag)
{
data_set_t *data_sets;
Sheet *sheet;
char buf[256];
Cell *cell;
int vars, cols, rows, col, row, i;
int error;
sheet = sheet_new(wb, "Correlations");
workbook_attach_sheet(wb, sheet);
cols = input_range->end_col - input_range->start_col + 1;
rows = input_range->end_row - input_range->start_row + 1;
set_cell (sheet, 0, 0, "");
if (columns_flag) {
vars = cols;
for (col=0; col<vars; col++) {
sprintf(buf, "Column %d", col+1);
cell = set_cell (sheet, 0, col+1, buf);
}
for (row=0; row<vars; row++) {
sprintf(buf, "Column %d", row+1);
cell = set_cell (sheet, 1+row, 0, buf);
}
data_sets = g_new(data_set_t, vars);
for (i=0; i<vars; i++)
get_data_groupped_by_columns(current_sheet,
input_range, i,
&data_sets[i]);
} else {
vars = rows;
for (col=0; col<vars; col++) {
sprintf(buf, "Row %d", col+1);
cell = set_cell (sheet, 0, col+1, buf);
}
for (row=0; row<vars; row++) {
sprintf(buf, "Row %d", row+1);
cell = set_cell (sheet, 1+row, 0, buf);
}
data_sets = g_new(data_set_t, vars);
for (i=0; i<vars; i++)
get_data_groupped_by_rows(current_sheet,
input_range, i,
&data_sets[i]);
}
for (row=0; row<vars; row++) {
for (col=0; col<vars; col++) {
if (row == col) {
set_cell (sheet, col+1, row+1, "1");
break;
} else {
sprintf(buf, "%f", correl(&data_sets[col],
&data_sets[row],
&error));
if (error)
set_cell (sheet, col+1, row+1, "--");
else
set_cell (sheet, col+1, row+1, buf);
}
}
}
for (i=0; i<vars; i++)
free_data_set(&data_sets[i]);
}
/************* Covariance Tool ********************************************
*
* The covariance tool calculates the covariance of two data sets.
* The two data sets can be groupped by rows or by columns. The
* results are given in a table which can be printed out in a new
* sheet, in a new workbook, or simply into an existing sheet.
*
* TODO: a new workbook output and output to an existing sheet
*
**/
static float_t
covar(data_set_t *set_one, data_set_t *set_two, int *error_flag)
{
GSList *current_one, *current_two;
float_t sum = 0, c=0;
float_t mean1, mean2, x, y;
*error_flag = 0;
current_one = set_one->array;
current_two = set_two->array;
mean1 = set_one->sum / set_one->n;
mean2 = set_two->sum / set_two->n;
while (current_one != NULL && current_two != NULL) {
x = *((float_t *) current_one->data);
y = *((float_t *) current_two->data);
sum += (x - mean1) * (y - mean2);
current_one = current_one->next;
current_two = current_two->next;
}
if (current_one != NULL || current_two != NULL)
*error_flag = 1;
c = sum / set_one->n;
return c;
}
/* If columns_flag is set, the data entries are groupped by columns
* otherwise by rows.
*/
void
covariance_tool (Workbook *wb, Sheet *current_sheet,
Range *input_range, int columns_flag)
{
data_set_t *data_sets;
Sheet *sheet;
char buf[256];
Cell *cell;
int vars, cols, rows, col, row, i;
int error;
sheet = sheet_new(wb, "Covariances");
workbook_attach_sheet(wb, sheet);
cols = input_range->end_col - input_range->start_col + 1;
rows = input_range->end_row - input_range->start_row + 1;
set_cell (sheet, 0, 0, "");
if (columns_flag) {
vars = cols;
for (col=0; col<vars; col++) {
sprintf(buf, "Column %d", col+1);
cell = set_cell (sheet, 0, col+1, buf);
}
for (row=0; row<vars; row++) {
sprintf(buf, "Column %d", row+1);
cell = set_cell (sheet, 1+row, 0, buf);
}
data_sets = g_new(data_set_t, vars);
for (i=0; i<vars; i++)
get_data_groupped_by_columns(current_sheet,
input_range, i,
&data_sets[i]);
} else {
vars = rows;
for (col=0; col<vars; col++) {
sprintf(buf, "Row %d", col+1);
cell = set_cell (sheet, 0, col+1, buf);
}
for (row=0; row<vars; row++) {
sprintf(buf, "Row %d", row+1);
cell = set_cell (sheet, 1+row, 0, buf);
}
data_sets = g_new(data_set_t, vars);
for (i=0; i<vars; i++)
get_data_groupped_by_rows(current_sheet,
input_range, i,
&data_sets[i]);
}
for (row=0; row<vars; row++) {
for (col=0; col<vars; col++) {
if (row == col) {
set_cell (sheet, col+1, row+1, "1");
break;
} else {
sprintf(buf, "%f", covar(&data_sets[col],
&data_sets[row],
&error));
if (error)
set_cell (sheet, col+1, row+1, "--");
else
set_cell (sheet, col+1, row+1, buf);
}
}
}
for (i=0; i<vars; i++)
free_data_set(&data_sets[i]);
}
/*
* dialog-analysis-tools.c:
*
* Author:
* Jukka-Pekka Iivonen <iivonen@iki.fi>
*/
#include <config.h>
#include <gnome.h>
#include <string.h>
#include "gnumeric.h"
#include "gnumeric-util.h"
#include "dialogs.h"
#include "utils.h"
void correlation_dialog (Workbook *wb, Sheet *sheet);
void covariance_dialog (Workbook *wb, Sheet *sheet);
typedef void (*tool_fun_ptr_t)(Workbook *wb, Sheet *sheet);
typedef struct {
char *col1;
char *col2;
} tool_name_t;
typedef struct {
tool_name_t name;
tool_fun_ptr_t fun;
} tool_list_t;
tool_list_t tools[] = {
{ { "Correlation", NULL }, correlation_dialog },
{ { "Covariance", NULL }, covariance_dialog },
{ { NULL, NULL }, NULL }
};
static int selected_row;
static int
parse_range (char *text, int *start_col, int *start_row,
int *end_col, int *end_row)
{
char buf[256];
char *p;
strcpy(buf, text);
p = strchr(buf, ':');
if (p == NULL)
return 0;
*p = '\0';
if (!parse_cell_name (buf, start_col, start_row))
return 0;
if (!parse_cell_name (p+1, end_col, end_row))
return 0;
return 1;
}
static char *groupped_ops [] = {
N_("Columns"),
N_("Rows"),
NULL
};
void correlation_dialog (Workbook *wb, Sheet *sheet)
{
static GtkWidget *dialog;
static GtkWidget *box, *hbox_x, *group_box;
static GtkWidget *input_range, *groupped_label;
static GtkWidget *input_range_label;
static GSList *group_ops;
static GtkWidget *r;
char *text;
int selection;
static Range range_input;
int i;
if (!dialog) {
dialog = gnome_dialog_new (_("Correlation"),
_("OK"),
GNOME_STOCK_BUTTON_CANCEL,
NULL);
gnome_dialog_close_hides (GNOME_DIALOG (dialog), TRUE);
gnome_dialog_set_parent (GNOME_DIALOG (dialog),
GTK_WINDOW (wb->toplevel));
box = gtk_vbox_new (FALSE, 0);
hbox_x = gtk_hbox_new (FALSE, 0);
group_box = gtk_vbox_new (FALSE, 0);
gtk_box_pack_start_defaults (GTK_BOX (box), hbox_x);
gtk_box_pack_start_defaults (GTK_BOX (GNOME_DIALOG
(dialog)->vbox), box);
input_range = gtk_entry_new_with_max_length (20);
input_range_label = gtk_label_new ("Input Range:");
gtk_box_pack_start_defaults (GTK_BOX (hbox_x),
input_range_label);
gtk_box_pack_start_defaults (GTK_BOX (hbox_x),
input_range);
groupped_label = gtk_label_new ("Groupped By:");
gtk_box_pack_start_defaults (GTK_BOX (GNOME_DIALOG
(dialog)->vbox),
groupped_label);
group_ops = NULL;
for (i = 0; groupped_ops [i]; i++) {
r = gtk_radio_button_new_with_label (group_ops,
_(groupped_ops[i])
);
group_ops = GTK_RADIO_BUTTON (r)->group;
gtk_box_pack_start_defaults (GTK_BOX (group_box), r);
}
gtk_box_pack_start_defaults (GTK_BOX (GNOME_DIALOG
(dialog)->vbox),
group_box);
gtk_widget_show_all (dialog);
} else
gtk_widget_show_all (dialog);
gtk_widget_grab_focus (input_range);
correlation_dialog_loop:
selection = gnome_dialog_run (GNOME_DIALOG (dialog));
if (selection == 1) {
gnome_dialog_close (GNOME_DIALOG (dialog));
return;
}
i = gtk_radio_group_get_selected (group_ops);
text = gtk_entry_get_text (GTK_ENTRY (input_range));
if (!parse_range (text, &range_input.start_col,
&range_input.start_row,
&range_input.end_col,
&range_input.end_row)) {
gnumeric_notice (wb, GNOME_MESSAGE_BOX_ERROR,
_("You should introduce a valid cell range "
"in 'Input Range:'"));
gtk_widget_grab_focus (input_range);
gtk_entry_set_position(GTK_ENTRY (input_range), 0);
gtk_entry_select_region(GTK_ENTRY (input_range), 0,
GTK_ENTRY(input_range)->text_length);
goto correlation_dialog_loop;
}
correlation_tool (wb, sheet, &range_input, !i);
gnome_dialog_close (GNOME_DIALOG (dialog));
}
void covariance_dialog (Workbook *wb, Sheet *sheet)
{
static GtkWidget *dialog;
static GtkWidget *box, *hbox_x, *group_box;
static GtkWidget *input_range, *groupped_label;
static GtkWidget *input_range_label;
static GSList *group_ops;
static GtkWidget *r;
char *text;
int selection;
static Range range_input;
int i;
if (!dialog) {
dialog = gnome_dialog_new (_("Covariance"),
_("OK"),
GNOME_STOCK_BUTTON_CANCEL,
NULL);
gnome_dialog_close_hides (GNOME_DIALOG (dialog), TRUE);
gnome_dialog_set_parent (GNOME_DIALOG (dialog),
GTK_WINDOW (wb->toplevel));
box = gtk_vbox_new (FALSE, 0);
hbox_x = gtk_hbox_new (FALSE, 0);
group_box = gtk_vbox_new (FALSE, 0);
gtk_box_pack_start_defaults (GTK_BOX (box), hbox_x);
gtk_box_pack_start_defaults (GTK_BOX (GNOME_DIALOG
(dialog)->vbox), box);
input_range = gtk_entry_new_with_max_length (20);
input_range_label = gtk_label_new ("Input Range:");
gtk_box_pack_start_defaults (GTK_BOX (hbox_x),
input_range_label);
gtk_box_pack_start_defaults (GTK_BOX (hbox_x),
input_range);
groupped_label = gtk_label_new ("Groupped By:");
gtk_box_pack_start_defaults (GTK_BOX (GNOME_DIALOG
(dialog)->vbox),
groupped_label);
group_ops = NULL;
for (i = 0; groupped_ops [i]; i++) {
r = gtk_radio_button_new_with_label (group_ops,
_(groupped_ops[i])
);
group_ops = GTK_RADIO_BUTTON (r)->group;
gtk_box_pack_start_defaults (GTK_BOX (group_box), r);