Commit c8a6f6d9 authored by Arturo Espinosa's avatar Arturo Espinosa

New version of CSV file importer, uses Alan's CSV import code.


New version of CSV file importer, uses Alan's CSV import code.
parent 9f5aac9d
......@@ -13,9 +13,6 @@ Number formatting code:
Excel file loading code:
Michael Meeks (michael@imaginator.com)
CSV file loading code:
Vincent Renardias <vincent@ldsol.com>
Financial functions code:
Vladimir Vuksan <vuksan@veus.hr>
......@@ -24,3 +21,12 @@ TXT plugin for simple text import/export:
Numerous functions and tools:
Jukka-Pekka Iivonen <iivonen@iki.fi>
LibCSV:
Alan Cox (alan@redhat.com).
New LibCSV:
Miguel
Original CSV code:
Vincent Renardias <vincent@ldsol.com>
......@@ -3,6 +3,7 @@ Change between versions 0.27 and 0.28
Miguel:
Print support (imported from the GNUMERIC_PRINT branch).
Pattern selector works again.
Imported Alan's libCSV and rewrwite ff-csv so that it actually works.
Jukka:
* Started the implementation of Data Analysis tools
......
......@@ -17,4 +17,6 @@ else
GUILE_DIR =
endif
SUBDIRS = sample stat excel ff-csv text $(PYTHON) $(PERL) $(GUILE_DIR) xbase
#SUBDIRS = sample stat excel ff-csv text $(PYTHON) $(PERL) $(GUILE_DIR) xbase
SUBDIRS = sample stat excel ff-csv text xbase
\ No newline at end of file
1999-07-02 Miguel de Icaza <miguel@nuclecu.unam.mx>
* libcsv.c: Rewrote to use Alan's library.
1999-06-25 Morten Welinder <terra@diku.dk>
* csv-io.c (insert_csv_cell): Use g_free. Cleanup the mess.
......
......@@ -16,7 +16,7 @@ CFLAGS += -g -Wall \
plugin_LTLIBRARIES = libcsv.la
libcsv_la_SOURCES = csv-io.h csv-io.c
libcsv_la_SOURCES = csv-io.h csv-io.c libcsv.c libcsv.h
INCLUDES = \
-I$(GNUMERIC_SRC_DIR) \
......
/*
* csv-io.c: save/read Sheets using a CSV encoding.
* (some code taken from xml-io.c by Daniel Veillard <Daniel.Veillard@w3.org>)
*
* Vincent Renardias <vincent@ldsol.com>
* Miguel de Icaza <miguel@gnu.org>
*
* $Id$
*/
/*
* TODO:
* handle quoted CSV
*/
#include <config.h>
#include <stdio.h>
#include <gnome.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
#include <fcntl.h>
#include <errno.h>
#include "csv-io.h"
#include "plugin.h"
#include "gnumeric.h"
#include "file.h"
#include "gnumeric-util.h"
#include "libcsv.h"
static struct csv_table load_and_probe_table;
static void
insert_csv_cell (Sheet* sheet, const char *string, int start, int end, int col, int row)
load_table_into_sheet (struct csv_table *table, Sheet *sheet)
{
char *p;
Cell *cell;
int len;
if(sheet == NULL)
return;
len = end - start + 1;
if (len < 0)
return;
p = g_new (char, len + 1);
memcpy (p, string + start, len);
p[len] = 0;
#if 0
fprintf(stderr,"'%s' at col:%d, row:%d.\n", p, col, row);
#endif
if ((cell = sheet_cell_get (sheet, row, col)) == NULL){
if ((cell = sheet_cell_new (sheet, row, col)) == 0){
g_free (p);
return;
}
}
cell_set_text_simple (cell, p);
g_free (p);
}
static int
csv_parse_file (const char *filename,Sheet *sheet)
{
int fd;
struct stat buf;
int flen; /* file length */
char *file; /* data pointer */
int idx, lindex;
int crow=0,ccol=0,mcol=0; /* current/max col/row */
gboolean file_mmaped, data;
struct {
int non_printables;
int lines;
int commas;
} statistics;
if ((fd = open (filename, O_RDONLY)) < 0){
char *msg;
int err = errno;
msg = g_strdup_printf (_("While opening %s\n%s"),
filename, g_strerror (err));
gnumeric_notice (NULL, GNOME_MESSAGE_BOX_ERROR, msg);
g_free (msg);
return 0;
}
if (fstat (fd, &buf) == -1){
gnumeric_notice (NULL, GNOME_MESSAGE_BOX_ERROR,
"Cannot stat the file");
close(fd);
return 0;
}
/* FIXME: ARBITRARY VALUE */
if (buf.st_size < 1 || buf.st_size > 1000000){
close(fd);
return 0;
} else {
flen = buf.st_size;
}
file = mmap (NULL, flen, PROT_READ, MAP_PRIVATE, fd, 0);
if (file == (char*)-1) {
/* mmap failed. */
file_mmaped = FALSE;
file = g_new (char, flen);
if (file) {
if (read (fd, file, flen) != flen) {
g_free (file);
file = 0;
}
}
} else {
file_mmaped = TRUE;
}
close (fd);
if (!file) {
gnumeric_notice (NULL, GNOME_MESSAGE_BOX_ERROR,
_("Failed to read csv file"));
return 0;
}
statistics.non_printables = 0;
statistics.lines = 0;
statistics.commas = 0;
int row;
idx = 0;
lindex = 0;
data = FALSE;
for (row = 0; row < table->height; row++){
Cell *cell;
int col;
while (idx < flen) {
switch (file [idx]) {
case '\r':
if (idx + 1 == flen || file [idx+1] != '\n')
statistics.non_printables++;
idx++;
break;
case '\n':
if (data){ /* Non empty line */
insert_csv_cell (sheet, file, lindex, idx-1, crow, ccol);
}
data = FALSE;
lindex = idx+1;
if (ccol > mcol){
mcol=ccol;
}
ccol = 0;
crow++;
idx++;
statistics.lines++;
break;
case ',':
if(data){ /* Non empty cell */
insert_csv_cell (sheet, file, lindex, idx-1, crow, ccol);
}
data = FALSE;
lindex = idx+1;
ccol++;
idx++;
statistics.commas++;
break;
default:
if (!isspace ((unsigned char)file[idx]) &&
!isprint ((unsigned char)file[idx]))
statistics.non_printables++;
idx++;
data = TRUE;
break;
for (col = 0; col < CSV_WIDTH (table, row); col++){
cell = sheet_cell_new (sheet, col, row);
cell_set_text_simple (cell, CSV_ITEM (table, row, col));
}
}
if (sheet) {
sheet->max_col_used=mcol;
sheet->max_row_used=crow;
}
if (file_mmaped)
munmap (file, flen);
else
g_free (file);
/* Heuristics ahead! */
if (statistics.non_printables > flen / 200 ||
statistics.commas < statistics.lines / 2) {
return 0;
}
return 1;
}
static Workbook *
csv_read_workbook (const char* filename)
{
Workbook *book;
Sheet *sheet;
Workbook *book;
Sheet *sheet;
char *name;
book = workbook_new ();
if (!book) return NULL;
if (!book)
return NULL;
sheet = sheet_new (book, _("NoName"));
name = g_strdup_printf (_("Imported %s"), g_basename (filename));
sheet = sheet_new (book, name);
g_free (name);
workbook_attach_sheet (book, sheet);
/*if (sheet != NULL){
book->sheet = sheet;
}*/
if ((csv_parse_file (filename, sheet)) == 0) {
workbook_destroy (book);
return NULL;
}
load_table_into_sheet (&load_and_probe_table, sheet);
/*
* Destroy and tag as invalid
*/
csv_destroy_table (&load_and_probe_table);
load_and_probe_table.row = NULL;
return book;
}
......@@ -224,43 +64,46 @@ csv_read_workbook (const char* filename)
static gboolean
csv_probe (const char *filename)
{
if(csv_parse_file (filename, NULL) == 1){
return TRUE;
} else {
FILE *f;
f = fopen (filename, "r");
if (f == NULL)
return FALSE;
if (csv_load_table (f, &load_and_probe_table) == -1){
load_and_probe_table.row = NULL;
return FALSE;
}
fclose (f);
/*
* Careful:
*
* We assume that the load routine is going to be invoked inmediately
*/
return TRUE;
}
static void
csv_init (void)
static int
csv_can_unload (PluginData *pd)
{
const char *desc = _("CSV (comma separated values)");
file_format_register_open (1, desc, csv_probe, csv_read_workbook);
/* file_format_register_save (".csv", desc, gnumericWriteCSVWorkbook);*/
/* We can always unload */
return TRUE;
}
static void
csv_cleanup_plugin (PluginData *pd)
{
file_format_unregister_open (csv_probe, csv_read_workbook);
/* file_format_unregister_save (csv_read_workbook); */
}
static int
csv_can_unload (PluginData *pd)
{
return TRUE;
}
int
init_plugin (PluginData *pd)
init_plugin (PluginData * pd)
{
csv_init ();
file_format_register_open (1, _("Comma Separated Value (CSV) import"), csv_probe, csv_read_workbook);
pd->can_unload = csv_can_unload;
pd->cleanup_plugin = csv_cleanup_plugin;
pd->title = g_strdup (_("CSV (comma separated value file import/export plugin)"));
pd->title = g_strdup (_("Comma Separated Value (CSV) module"));
return 0;
}
/*
* Loader for comma delimited database files
*
* (c) Copright 1998 Building Number Three Ltd
*
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
/*
* Take a table of good old comma seperate junk, and load it into
* a table
*/
#include <stdio.h>
#include <string.h>
#include "libcsv.h"
static char linebuf[16384]; /* Hack for now */
char *csv_error;
int csv_line;
static char *csv_strdup(const char *p)
{
char *n=strdup(p);
if(n==NULL)
csv_error="out of memory";
return n;
}
static int count_fields(char *ptr)
{
int count=0;
int quoted=0;
if(*ptr==0)
return -1; /* Blank line flag */
while(*ptr)
{
/* Embedded quote */
if(*ptr=='"' && ptr[1]=='"')
{
ptr+=2;
continue;
}
if(*ptr=='"')
quoted=1-quoted;
else if(*ptr==',')
count+=1-quoted;
ptr++;
}
return count+1;
}
static char *cut_quoted(char **p)
{
char *x;
char *e;
char *n;
x=*p;
x++; /* Open quote */
e=x;
while(*x)
{
if(*x=='"' && x[1]=='"')
{
memmove(x,x+1,strlen(x));
x++;
continue;
}
if(*x=='"')
{
*x=0;
x++;
/* printf("%p %p (%s)\n", e, x, e);*/
n=csv_strdup(e);
*p=x;
return n;
}
x++;
}
/* Error */
csv_error="missing quote";
return NULL;
}
static char *cut_comma(char **ptr)
{
char *e=strchr(*ptr,',');
char *n=*ptr;
if(e==NULL)
{
e=n+strlen(n);
*ptr=e;
return csv_strdup(n);
}
*e=0;
*ptr=e+1;
return csv_strdup(n);
}
static int smash_fields(char *ptr, char **array, int len)
{
if(*ptr==0)
{
int i;
for(i=0;i<len;i++)
array[i]="";
return 0;
}
while(*ptr)
{
if(*ptr=='"')
*array=cut_quoted(&ptr);
else
*array=cut_comma(&ptr);
/* printf("[%s]\n", *array);*/
if(*array++ == NULL)
return -1;
}
return 0;
}
static char * read_line(FILE *f)
{
int n;
if(fgets(linebuf, sizeof(linebuf),f)==NULL)
return NULL;
csv_line++;
n=strlen(linebuf);
if(n && linebuf[n-1]=='\n')
linebuf[--n]=0;
if(n && linebuf[n-1]=='\r')
linebuf[n-1]=0;
return linebuf;
}
int csv_load_table(FILE *f, struct csv_table *ptr)
{
int width;
int length;
int size;
csv_line = -1;
if(read_line(f)==NULL)
return -1;
size=256;
ptr->row=(struct csv_row *)malloc(sizeof(struct csv_row)*size);
if(ptr->row==NULL)
{
csv_error="out of memory";
return -1;
}
length=0;
do
{
width = count_fields(linebuf);
if(width < 1)
width = 1;
ptr->row[length].data=(char **)malloc(width*sizeof(char *));
ptr->row[length].width = width;
if(ptr->row[length].data==NULL)
{
csv_error="out of memory";
return -1;
}
memset(ptr->row[length].data, 0, width*sizeof(char *));
if(smash_fields(linebuf, ptr->row[length].data, width)==-1)
return -1;
length++;
/* printf("-------------\n");*/
fflush(stdout);
if(length==size)
{
size+=256;
ptr->row=(struct csv_row *)realloc(ptr->row, sizeof(struct csv_row)*size);
if(ptr->row==NULL)
{
csv_error="out of memory";
return -1;
}
}
if(read_line(f)==NULL)
break;
}
while(1);
ptr->height = length;
ptr->size = size;
return 0;
}
void csv_destroy_table(struct csv_table *table)
{
int i, j;
for(i=0;i<table->height;i++)
{
struct csv_row *r=&table->row[i];
for(j=0;j<r->width;j++)
if(r->data[j])
free(r->data[j]);
}
free(table->row);
}
#ifdef TEST
int main(int argc, char *argv)
{
struct csv_table t;
printf("Got %d\n", parse_table(stdin, &t));
}
#endif
extern char *csv_error;
extern int csv_line;
struct csv_row
{
char **data;
int width;
};
struct csv_table
{
struct csv_row *row;
int height;
int size;
};
extern int csv_load_table(FILE *f, struct csv_table *ptr);
extern void csv_destroy_table(struct csv_table *ptr);
#define CSV_ITEM(t,r,c) ((t)->row[(r)].data[(c)])
#define CSV_WIDTH(t,r) ((t)->row[(r)].width)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment