Commit a4cd1aba authored by Michael Meeks's avatar Michael Meeks

Preliminary Excel support; breaks gnumeric until Miguel can fix it.

parent 187cd0cf
/*
* ms-biff.c: MS Excel BIFF support for Gnumeric
*
* Author:
* Michael Meeks (michael@imaginator.com)
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <malloc.h>
#include <assert.h>
#include <ctype.h>
#include "ms-ole.h"
#include "ms-biff.h"
BIFF_BOF_DATA *new_ms_biff_bof_data (BIFF_QUERY *q)
{
BIFF_BOF_DATA *ans = (BIFF_BOF_DATA *)malloc(sizeof(BIFF_BOF_DATA)) ;
if ((q->opcode&0xff) == BIFF_BOF)
{
assert (q->length>=4) ;
// Determine type from boff
switch (q->opcode>>8)
{
case 0:
ans->version = eBiffV2 ;
break ;
case 2:
ans->version = eBiffV3 ;
break ;
case 4:
ans->version = eBiffV4 ;
break ;
case 8: // MOre complicated
{
switch (GETWORD(q->data))
{
case 0x0600:
ans->version = eBiffV8 ;
break ;
case 0x500:
ans->version = eBiffV5 ; // OR ebiff7 : FIXME ? !
break ;
default:
printf ("Unknown BIFF sub-number in BOF %x\n", q->opcode) ;
ans->version = eBiffVUnknown ;
}
}
break;
default:
printf ("Unknown BIFF number in BOF %x\n", q->opcode) ;
ans->version = eBiffVUnknown ;
}
switch (GETWORD(q->data+2))
{
case 0x0005:
ans->type = eBiffTWorkbook ;
break ;
case 0x0006:
ans->type = eBiffTVBModule ;
break ;
case 0x0010:
ans->type = eBiffTWorksheet ;
break ;
case 0x0020:
ans->type = eBiffTChart ;
break ;
case 0x0040:
ans->type = eBiffTMacrosheet ;
break ;
case 0x0100:
ans->type = eBiffTWorkspace ;
break ;
default:
ans->type = eBiffTUnknown ;
printf ("Unknown BIFF type in BOF %x\n", GETWORD(q->data+2)) ;
break ;
}
// Now store in the directory array:
printf ("BOF %x, %d == %d, %d\n", q->opcode, q->length,
ans->version, ans->type) ;
}
else
{
printf ("Not a BOF !\n") ;
ans->version = eBiffVUnknown ;
ans->type = eBiffTUnknown ;
}
return ans ;
}
void free_ms_biff_bof_data (BIFF_BOF_DATA *data)
{
free (data) ;
}
/*
* ms-biff.h: MS Excel BIFF support for Gnumeric
*
* Author:
* Michael Meeks (michael@imaginator.com)
*/
#ifndef GNUMERIC_BIFF_H
#define GNUMERIC_BIFF_H
#include "ms-ole.h"
// p must be a BYTE* !
#define GETDLONG(p) (long long int)(GETLONG(p)+(((long long int)GETLONG(p+4))<<32))
// Oh dear, silly really, brutal endianness hack: FIXME
// #define GETDOUBLE(p) ((double)GETDLONG(p))
#define GETDOUBLE(p) (*((double *)(p)))
typedef struct _BIFF_QUERY
{
BYTE ms_op ;
BYTE ls_op ;
WORD opcode ;
WORD length ; // NB. can be extended by a continue opcode
BYTE *data ;
int data_malloced ; // is *data a copy ?
LONG streamPos ; // count og bytes into the stream
MS_OLE_STREAM_POS *pos ;
} BIFF_QUERY ;
//------------------------------------------------------------------------
// This set of functions is for parsing an entire file's raw BIFF records
// it is recommended that you use the above subset of the API handling 'files'
// to split the stream into files first, before using ms_next_biff
// Opens OLE file 'workbook' or 'book' depending.
extern BIFF_QUERY *new_ms_biff_query_file (MS_OLE_FILE *) ;
extern BIFF_QUERY *new_ms_biff_query_here (MS_OLE_STREAM_POS *p) ;
extern BIFF_QUERY *copy_ms_biff_query (const BIFF_QUERY *p) ;
// Updates the BIFF_QUERY Structure
extern int ms_next_biff (BIFF_QUERY *) ;
// Free it then.
extern void free_ms_biff_query (BIFF_QUERY *) ;
//------------------------------------------------------------------------
// This API firstly generates a list of available 'files' within an OLE2
// stream, and allows you to selectivly read from them.
typedef enum _eBiff_version { eBiffV2=2, eBiffV3=3, eBiffV4=4, eBiffV5=5, eBiffV7=7,
eBiffV8=8, eBiffVUnknown=0} eBiff_version ;
typedef enum _eBiff_filetype { eBiffTWorkbook=0, eBiffTVBModule=1, eBiffTWorksheet=2,
eBiffTChart=3, eBiffTMacrosheet=4, eBiffTWorkspace=5,
eBiffTUnknown=6 } eBiff_filetype ;
// Cell / XF types
typedef enum _eBiff_hidden { eBiffHVisible=0, eBiffHHidden=1,
eBiffHVeryHidden=2 } eBiff_hidden ;
typedef enum _eBiff_locked { eBiffLLocked=1, eBiffLUnlocked=0 } eBiff_locked ;
typedef enum _eBiff_xftype { eBiffXStyle=0, eBiffXCell=1 } eBiff_xftype ;
typedef enum _eBiff_format { eBiffFMS=0, eBiffFLotus=1 } eBiff_format ;
typedef enum _eBiff_alignment { eBiffAgeneral=0, eBiffAleft = 1,
eBiffAcentre=2, eBiffAright=3,
eBiffAfill=4, eBiffAjustify=5,
eBiffACentreAcrossSelection=6 } eBiff_alignment ;
typedef enum _eBiff_vert_align { eBiffVAtop=0, eBiffVAcentre=1,
eBiffVAbottom=2, eBiffVAjustify=3} eBiff_vert_align ;
typedef enum _eBiff_wrap { eBiffWWrap=0, eBiffWNoWrap=1 } eBiff_wrap ;
typedef enum _eBiff_eastern { eBiffEContext=0, eBiffEleftToRight=1,
eBiffErightToLeft=2 } eBiff_eastern ;
typedef enum _eBiff_direction { eBiffDirTop=0, eBiffDirBottom=1,
eBiffDirLeft=2, eBiffDirRight=3 } eBiff_direction ;
typedef enum _eBiff_border_orientation { eBiffBODiagDown=1,
eBiffBODiagUp=2,
eBiffBODiagBoth=3 } eBiff_border_orientation ;
typedef enum _eBiff_border_linestyle // Magic numbers !
{
eBiffBorderNone=0, eBiffBorderThin=1, eBiffBorderMedium=2,
eBiffBorderDashed=3, eBiffBorderDotted=4, eBiffBorderThick=5,
eBiffBorderDouble=6, eBiffBorderHair=7, eBiffBorderMediumDash=8,
eBiffBorderDashDot=9, eBiffBorderMediumDashDot=10,
eBiffBorderDashDotDot=11, eBiffBorderMediumDashDotDot=12,
eBiffBorderSlantedDashDot=13
} eBiff_border_linestyle ;
typedef struct _BIFF_BOF_DATA
{
eBiff_version version ;
eBiff_filetype type ;
} BIFF_BOF_DATA ;
// Privatish BIFF_FILE functions
extern BIFF_BOF_DATA *new_ms_biff_bof_data (BIFF_QUERY *pos) ;
extern void free_ms_biff_bof_data (BIFF_BOF_DATA *data) ;
//------------------------------------------------------------------------
#define BIFF_BLANK 0x01
#define BIFF_NUMBER 0x03
#define BIFF_LABEL 0x04
#define BIFF_FORMULA 0x06
#define BIFF_ROW 0x08
#define BIFF_BOF 0x09
#define BIFF_EOF 0x0a
#define BIFF_PRECISION 0x0e
#define BIFF_XF_OLD 0x43
#define BIFF_RK 0x7e
#define BIFF_BOUNDSHEET 0x85
#define BIFF_MULBLANK 0xbe
#define BIFF_RSTRING 0xd6
#define BIFF_XF 0xe0
//------------------------------------------------------------------------
#endif
/*
* ms-excel.c: MS Excel support for Gnumeric
*
* Author:
* Michael Meeks (michael@imaginator.com)
*/
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <assert.h>
#include <config.h>
#include <stdio.h>
#include <gnome.h>
#include "gnumeric.h"
#include "gnome-xml/tree.h"
#include "gnome-xml/parser.h"
#include "color.h"
#include "sheet-object.h"
#include "ms-ole.h"
#include "ms-biff.h"
#include "ms-excel.h"
#define STRNPRINTF(ptr,n) { int xxxlp; printf ("'") ; for (xxxlp=0;xxxlp<(n);xxxlp++) printf ("%c", (ptr)[xxxlp]) ; printf ("'\n") ; }
// This needs proper unicode support !
static char *ms_get_biff_text (BYTE *ptr, int length)
{
int lp ;
char *ans = (char *)malloc(sizeof(char)*length+1) ;
for (lp=0;lp<length;lp++)
ans[lp] = (char)ptr[lp] ;
ans[lp] = 0 ;
return ans ;
}
// Pass this a BIFF_QUERY *
#define BIFF_GETROW(p) (GETWORD(p->data + 0))
#define BIFF_GETCOL(p) (GETWORD(p->data + 2))
#define BIFF_GETXFREC(p) (GETWORD(p->data + 4))
#define BIFF_GETSTRLEN(p) (GETWORD(p->data + 6))
typedef struct _BIFF_BOUNDSHEET_DATA
{
LONG streamStartPos ;
eBiff_filetype type ;
eBiff_hidden hidden ;
char *name ;
} BIFF_BOUNDSHEET_DATA ;
static BIFF_BOUNDSHEET_DATA *new_biff_boundsheet_data (BIFF_QUERY *q, eBiff_version ver)
{
BIFF_BOUNDSHEET_DATA *ans = (BIFF_BOUNDSHEET_DATA *)malloc (sizeof(BIFF_BOUNDSHEET_DATA)) ;
if (ver != eBiffV5 && // Testing seems to indicate that Biff5 is compatibile with Biff7 here.
ver != eBiffV7 &&
ver != eBiffV8)
{
printf ("Unknown BIFF Boundsheet spec. Assuming same as Biff7 FIXME\n") ;
ver = eBiffV7 ;
}
ans->streamStartPos = GETLONG(q->data) ;
switch (GETBYTE(q->data+4))
{
case 00:
ans->type = eBiffTWorksheet ;
break;
case 01:
ans->type = eBiffTMacrosheet ;
break ;
case 02:
ans->type = eBiffTChart ;
break ;
case 06:
ans->type = eBiffTVBModule ;
break;
default:
printf ("Unknown sheet type : %d\n", GETBYTE(q->data+4)) ;
ans->type = eBiffTUnknown ;
break ;
}
switch ((GETBYTE(q->data+5)) & 0x3)
{
case 00:
ans->hidden = eBiffHVisible ;
break ;
case 01:
ans->hidden = eBiffHHidden ;
break ;
case 02:
ans->hidden = eBiffHVeryHidden ;
break ;
default:
printf ("Unknown sheet hiddenness %d\n", (GETBYTE(q->data+4)) & 0x3) ;
ans->hidden = eBiffHVisible ;
break ;
}
if (ver==eBiffV8)
{
int strlen = GETWORD(q->data+6) ;
ans->name = ms_get_biff_text (q->data+8, strlen) ;
}
else
{
int strlen = GETBYTE(q->data+6) ;
ans->name = ms_get_biff_text (q->data+7, strlen) ;
}
printf ("Blocksheet : '%s', %d:%d offset %lx\n", ans->name, ans->type, ans->hidden, ans->streamStartPos) ;
return ans ;
}
static void free_biff_boundsheet_data (BIFF_BOUNDSHEET_DATA *d)
{
free (d->name) ;
free (d) ;
}
typedef struct _BIFF_XF_DATA
{
WORD font_idx ;
WORD format_idx ;
eBiff_hidden hidden ;
eBiff_locked locked ;
eBiff_xftype xftype ; // -- Very important field...
eBiff_format format ;
WORD parentstyle ;
eBiff_alignment alignment ;
eBiff_wrap wrap ;
eBiff_vert_align valign ;
BYTE rotation ;
eBiff_eastern eastern ;
BYTE border_color[4] ; // Array [eBiff_direction]
eBiff_border_linestyle border_line[4] ; // Array [eBiff_direction]
eBiff_border_orientation border_orientation ;
eBiff_border_linestyle border_linestyle ;
BYTE fill_pattern_idx ;
BYTE foregnd_col ;
BYTE backgnd_col ;
} BIFF_XF_DATA ;
// See S59E1E.HTM !
static BIFF_XF_DATA *new_biff_xf_data (BIFF_QUERY *q, eBiff_version ver)
{
BIFF_XF_DATA *xf = (BIFF_XF_DATA *)malloc (sizeof(BIFF_XF_DATA)) ;
LONG data, subdata ;
xf->font_idx = GETWORD(q->data) ;
xf->format_idx= GETWORD(q->data+2) ;
data = GETWORD(q->data+4) ;
xf->locked = (data&0x0001)?eBiffLLocked:eBiffLUnlocked ;
xf->hidden = (data&0x0002)?eBiffHHidden:eBiffHVisible ;
xf->xftype = (data&0x0004)?eBiffXStyle:eBiffXCell ;
xf->format = (data&0x0008)?eBiffFLotus:eBiffFMS ;
xf->parentstyle = (data>>4) ;
data = GETWORD(q->data+6) ;
xf->alignment = (data&0x0007) ;
xf->wrap = (data&0x0008)?eBiffWWrap:eBiffWNoWrap ;
subdata = (data&0x0070)>>4 ;
xf->valign = ( ((subdata == 0) & eBiffVAtop) |
((subdata == 1) & eBiffVAcentre) |
((subdata == 2) & eBiffVAbottom) |
((subdata == 3) & eBiffVAjustify) ) ;
// FIXME: ignored bit 0x0080
if (ver == eBiffV8)
xf->rotation = (data>>8) ;
else
{
subdata = (data&0x0300)>>8 ;
xf->rotation = ( ((subdata == 0) & 0) |
((subdata == 1) & 255) | // vertical letters no rotation
((subdata == 2) & 90) | // 90deg anti-clock
((subdata == 3) & 180) ) ; // 90deg clock
}
if (ver == eBiffV8)
{
// FIXME: Got bored and stop implementing everything, there is just too much !
data = GETWORD(q->data+8) ;
subdata = (data&0x00C0)>>10 ;
xf->eastern = ( ((subdata == 0) & eBiffEContext) |
((subdata == 1) & eBiffEleftToRight) |
((subdata == 2) & eBiffErightToLeft) ) ;
}
if (ver == eBiffV8) // Very different now !
{
data = GETWORD(q->data+10) ;
subdata = data ;
xf->border_line[eBiffDirLeft] = (subdata&0xf) ;
subdata = subdata>>4 ;
xf->border_line[eBiffDirRight] = (subdata&0xf) ;
subdata = subdata>>4 ;
xf->border_line[eBiffDirTop] = (subdata&0xf) ;
subdata = subdata>>4 ;
xf->border_line[eBiffDirBottom] = (subdata&0xf) ;
subdata = subdata>>4 ;
data = GETWORD(q->data+12) ;
subdata = data ;
xf->border_color[eBiffDirLeft] = (subdata&0x7f) ;
subdata = subdata >> 7 ;
xf->border_color[eBiffDirRight] = (subdata&0x7f) ;
subdata = (data&0xc000)>>30 ;
xf->border_orientation = ( ((subdata == 1) & eBiffBODiagDown) |
((subdata == 2) & eBiffBODiagUp) |
((subdata == 3) & eBiffBODiagBoth) ) ;
data = GETLONG(q->data+14) ;
subdata = data ;
xf->border_color[eBiffDirTop] = (subdata&0x7f) ;
subdata = subdata >> 7 ;
xf->border_color[eBiffDirBottom] = (subdata&0x7f) ;
subdata = subdata >> 7 ;
xf->border_linestyle = (data&0x01e00000)>>21 ;
xf->fill_pattern_idx = (data&0xfc000000)>>26 ;
data = GETWORD(q->data+18) ;
xf->foregnd_col = (data&0x007f) ;
xf->backgnd_col = (data&0x3f80)>>7 ;
}
else
{
data = GETWORD(q->data+8) ;
xf->foregnd_col = (data&0x007f) ;
xf->backgnd_col = (data&0x1f80)>>7 ;
data = GETWORD(q->data+10) ;
xf->fill_pattern_idx = data&0x03f ;
// Luckily this maps nicely onto the new set.
xf->border_line[eBiffDirBottom] = (data&0x1c0)>>6 ;
xf->border_color[eBiffDirBottom] = (data&0xfe00)>>9 ;
data = GETWORD(q->data+12) ;
subdata = data ;
xf->border_line[eBiffDirTop] = (subdata&0x07) ;
subdata = subdata >> 3 ;
xf->border_line[eBiffDirLeft] = (subdata&0x07) ;
subdata = subdata >> 3 ;
xf->border_line[eBiffDirRight] = (subdata&0x07) ;
subdata = subdata >> 3 ;
xf->border_color[eBiffDirTop] = subdata ;
data = GETWORD(q->data+14) ;
subdata = data ;
xf->border_color[eBiffDirLeft] = (subdata&0x7f) ;
subdata = subdata >> 7 ;
xf->border_color[eBiffDirRight] = (subdata&0x7f) ;
}
return xf ;
}
static void free_biff_xf_data (BIFF_XF_DATA *d)
{
free (d) ;
}
typedef struct _MS_EXCEL_SHEET
{
Sheet *gnum_sheet ;
struct _MS_EXCEL_WORKBOOK *wb ;
} MS_EXCEL_SHEET ;
typedef struct _MS_EXCEL_WORKBOOK
{
GList *boundsheet_data ;
GList *XF_records ;
GList *excel_sheets ;
// Gnumeric parallel workbook
Workbook *gnum_wb ;
} MS_EXCEL_WORKBOOK ;
static MS_EXCEL_SHEET *new_ms_excel_sheet (MS_EXCEL_WORKBOOK *wb, char *name)
{
MS_EXCEL_SHEET *ans = (MS_EXCEL_SHEET *)malloc(sizeof(MS_EXCEL_SHEET)) ;
ans->gnum_sheet = sheet_new (wb->gnum_wb, name) ;
ans->wb = wb ;
return ans ;
}
static void ms_excel_sheet_insert (MS_EXCEL_SHEET *sheet, int col, int row, char *text)
{
Cell *cell ;
if (!( (cell = sheet_cell_get (sheet->gnum_sheet, col, row))
||(cell = sheet_cell_new (sheet->gnum_sheet, col, row))))
printf ("No cell error at [%d, %d]\n", col, row) ;
else
cell_set_text_simple(cell, text) ;
}
static void free_ms_excel_sheet (MS_EXCEL_SHEET *ptr)
{
sheet_destroy (ptr->gnum_sheet) ;
free(ptr) ;
}
static MS_EXCEL_WORKBOOK *new_ms_excel_workbook ()
{
MS_EXCEL_WORKBOOK *ans = (MS_EXCEL_WORKBOOK *)malloc(sizeof(MS_EXCEL_WORKBOOK)) ;
ans->gnum_wb = NULL ;
ans->boundsheet_data = NULL ;
ans->excel_sheets = NULL ;
ans->XF_records = NULL ;
return ans ;
}
static void ms_excel_workbook_attach (MS_EXCEL_WORKBOOK *wb, MS_EXCEL_SHEET *ans)
{
workbook_attach_sheet (wb->gnum_wb, ans->gnum_sheet) ;
}
static void free_ms_excel_workbook (MS_EXCEL_WORKBOOK *wb)
{
GList *ptr = g_list_first(wb->boundsheet_data) ;
while (ptr)
{
BIFF_BOUNDSHEET_DATA *dat ;
dat = ptr->data ;
free_biff_boundsheet_data (dat) ;
ptr = ptr->next ;
}
g_list_free (wb->boundsheet_data) ;
ptr = g_list_first(wb->XF_records) ;
while (ptr)
{
BIFF_XF_DATA *dat ;
dat = ptr->data ;
free_biff_xf_data (dat) ;
ptr = ptr->next ;
}
g_list_free (wb->XF_records) ;
}
static void ms_excel_read_cell (BIFF_QUERY *q, MS_EXCEL_SHEET *sheet, eBiff_version ver)
{
Cell *cell ;
// ----------------------------------------------------------------------------------
// NB. Microsoft Docs give offsets from start of biff record, subtract 4 their docs.
// ----------------------------------------------------------------------------------
switch (q->ls_op)
{
case BIFF_BLANK: // FIXME: a cell with just XF info, but no XF yet :-)
printf ("Cell [%d, %d] XF = %x\n", BIFF_GETCOL(q), BIFF_GETROW(q),
BIFF_GETXFREC(q)) ;
break ;
case BIFF_MULBLANK: // FIXME: S95DA7.HTM - Confusing !
printf ("Cells in row %d are blank starting at col %d until col %d\n",
BIFF_GETROW(q), GETWORD(q->data+2), GETWORD(q->data+8)) ;
// Presumably followed by the array of XF indexes ?
break ;
case BIFF_RSTRING: // Ignore formatting for now : FIXME
// printf ("Cell [%d, %d] = ", BIFF_GETCOL(q), BIFF_GETROW(q)) ;
// dump (q->data, q->length) ;
// STRNPRINTF(q->data + 8, BIFF_GETSTRLEN(q)) ;
ms_excel_sheet_insert (sheet, BIFF_GETCOL(q), BIFF_GETROW(q),
ms_get_biff_text(q->data + 8, BIFF_GETSTRLEN(q))) ;
break;
case BIFF_NUMBER: // FIXME: Font info needed
{
char buf[65] ;
double num = GETDOUBLE(q->data + 6) ;
// long long int l = 0x123456789abcdefLL ;
// printf ("Cell [%d, %d] = %f\n", BIFF_GETCOL(q), BIFF_GETROW(q), num) ;
sprintf (buf, "%f", num) ;
ms_excel_sheet_insert (sheet, BIFF_GETCOL(q), BIFF_GETROW(q), buf) ;
// dump (q->data, q->length) ;
break;
}
case BIFF_RK: // FIXME: S59DDA.HTM - test IEEE stuff on other endian platforms
{
LONG number ;
LONG tmp[2] ;
char buf[65] ;
double answer ;
enum eType { eIEEE = 0, eIEEEx10 = 1, eInt = 2, eIntx100 = 3 } type ;
number = GETLONG(q->data+6) ;
printf ("RK number : 0x%x, length 0x%x\n", q->opcode, q->length) ;
printf ("position [%d,%d] = %lx\n", BIFF_GETCOL(q), BIFF_GETROW(q), number) ;
// Ignore XF
type = (number & 0x3) ;
switch (type)
{
case eIEEE:
dump (q->data, q->length) ;
tmp[0] = number & 0xfffffffc ;
tmp[1] = 0 ;
answer = GETDOUBLE(((BYTE *)tmp)) ;
break ;
case eIEEEx10:
dump (q->data, q->length) ;
tmp[0] = number & 0xfffffffc ;
tmp[1] = 0 ;
answer = GETDOUBLE(((BYTE *)tmp)) ;
answer/=100.0 ;
break ;
case eInt:
answer = (double)(number>>2) ;
break ;
case eIntx100:
answer = ((double)(number>>2))/100.0 ;
break ;
default:
printf ("You don't exist go away\n") ;