Commit c7d4cf7c authored by Michael Meeks's avatar Michael Meeks

Rudimentary start to office drawing reading, hacked rather badly.

Updates to 'ole' to dump annotated drawing records.
Silly bugfix in OLE2 function.
parent a39167af
1999-04-13 Michael Meeks <michael@imaginator.com>
* ms-ole.c(ms_biff_query_copy): Fix nasty bug: duplicate
stream pos record too.
* escher-types.h: Created to hold the drawing layer types.
* ole.c (read_types): Expanded to cope with escher types.
(get_opcode_name): Duplicated into:
(get_biff_opcode_name, get_escher_opcode_name): Created.
(main): Updated biff section.
Added 'draw' = search for drawings and dump section.
* README: Updated to doc. 'draw'
1999-04-12 Michael Meeks <michael@imaginator.com>
* ms-objc.c/h renamed ms_obj_read_graphic to
ms_obj_read_obj, the real graphic lurks elsewhere...
* ms-escher.c/h: Created to handle the 'Office drawing layer'
This is a hack and needs better OLE support code.
* ms-ole.c (ms_ole_stream_duplicate): Created, not too bad a hack
(ms_biff_query_data_to_stream): Nasty hack for ms-escher
1999-04-12 Michael Meeks <michael@imaginator.com>
* ms-obj.c: Created to deal with the horrors of embedded graphic
......
......@@ -12,6 +12,8 @@ libexcel_a_SOURCES = \
boot.h \
ms-excel-biff.h \
ms-biff.h \
ms-escher.c \
ms-escher.h \
ms-excel.c \
ms-excel.h \
ms-formula.c \
......
......@@ -20,4 +20,8 @@ Try for starters:
You _must_ be in the plugins/excel directory for it to find the biff_types.h
which it uses to annotate the biff records nicely; patches welcome.
1999-04-12 Michael Meeks <michael@imaginator.com>
It will also dump the contents of drawing streams inside the BIFF do:
./ole somefile.xls draw workbook
1999-04-13 Michael Meeks <michael@imaginator.com>
/**
* escher-types.h: A long and dull list of types used
* in the MS drawing layer.
*
* Author:
* Michael Meeks (michael@imaginator.com)
**/
/**
* See S59FDC.HTM for the spec.
* MS use similar names with 'msofbt' prefix.
**/
#define DggContainer 0xf000
#define Dgg 0xf006
#define CLSID 0xf016
#define OPT 0xf00b
#define ColorMRU 0xf11a
#define SplitMenuColors 0xf11e
#define BStoreContainer 0xf001
#define BSE 0xf007
#define Blip_START 0xf018 /* Blip types are between */
#define Blip_END 0xf117 /* these two values */
#define DgContainer 0xf002
#define Dg 0xf008
#define RegroupItems 0xf118
#define ColorScheme 0xf120 /* bug in docs */
#define SpgrContainer 0xf003
#define SpContainer 0xf004
#define Spgr 0xf009
#define Sp 0xf00a
#define OPT 0xf00b
#define Textbox 0xf00c
#define ClientTextbox 0xf00d
#define Anchor 0xf00e
#define ChildAnchor 0xf00f
#define ClientAnchor 0xf010
#define ClientData 0xf011
#define OleObject 0xf11f
#define DeletedPspl 0xf11d /* bug in docs */
#define SolverContainer 0xf005
#define ConnectorRule 0xf012 /* bug in docs */
#define AlignRule 0xf013
#define ArcRule 0xf014
#define ClientRule 0xf015
#define CalloutRule 0xf017
#define Selection 0xf119
......@@ -55,4 +55,6 @@ extern BIFF_QUERY *ms_biff_query_copy (const BIFF_QUERY *p) ;
**/
extern int ms_biff_query_next (BIFF_QUERY *) ;
extern void ms_biff_query_destroy (BIFF_QUERY *) ;
/* Returns a stream which contains the data in the BIFF record. */
extern MS_OLE_STREAM *ms_biff_query_data_to_stream (BIFF_QUERY *);
#endif
/**
* ms-escher.c: MS Office drawing layer support
*
* Author:
* Michael Meeks (michael@imaginator.com)
**/
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <assert.h>
#include <config.h>
#include <stdio.h>
#include <ctype.h>
#include <gnome.h>
#include "gnumeric.h"
#include "gnumeric-util.h"
#include "gnome-xml/tree.h"
#include "gnome-xml/parser.h"
#include "gnumeric-sheet.h"
#include "format.h"
#include "color.h"
#include "sheet-object.h"
#include "style.h"
#include "ms-ole.h"
#include "ms-biff.h"
#include "ms-formula.h"
#include "ms-excel.h"
#include "ms-excel-biff.h"
#include "ms-obj.h"
#include "ms-escher.h"
#include "escher-types.h"
typedef struct { /* See: S59FDA.HTM */
guint ver:4;
guint instance:12;
guint16 type; /* fbt */
gint32 length; /* Misleading really 16bits */
guint8 *data;
gint32 length_left;
} ESH_HEADER;
static ESH_HEADER *
esh_header_new (guint8 *data, gint32 length)
{
ESH_HEADER *h = g_new (ESH_HEADER,1);
h->length=-6;
h->type=0;
h->instance=0;
h->data=data;
h->length_left=length;
return h;
}
static int
esh_header_next (ESH_HEADER *h)
{
guint16 split;
g_return_val_if_fail(h, 0);
g_return_val_if_fail(h->data, 0);
h->data+=h->length+6;
h->length_left-=h->length+6;
if (h->length_left<=5)
return 0;
h->length = BIFF_GETWORD(h->data+4);
h->type = BIFF_GETWORD(h->data+2);
split = BIFF_GETWORD(h->data+0);
h->ver = (split&0x0f);
h->instance = (split>>4);
return 1;
}
static void
esh_header_destroy (ESH_HEADER *h)
{
if (h)
g_free(h);
}
/**
* General points:
* For docs. on pointer conversions see: S59FDC.HTM
* BLIP = Big Large Image / Picture see: S59FE3.HTM
**/
static void
disseminate_stream (guint8 *data, gint32 length)
{
ESH_HEADER *h = esh_header_new (data, length);
while (esh_header_next(h)) {
printf ("Header: type 0x%x, inst 0x%x ver 0x%x len 0x%x\n",
h->type, h->instance, h->ver, h->length);
}
esh_header_destroy (h);
}
/**
* Builds a flat record by merging CONTINUE records,
* Have to do until we move this into ms_ole.c
* pass pointers to your length & data variables.
* This is dead sluggish.
**/
static void
biff_to_flat_data (const BIFF_QUERY *q, guint8 **data, guint32 *length)
{
BIFF_QUERY *nq = ms_biff_query_copy (q);
guint8 *ptr;
*length=0;
do {
*length+=nq->length;
ms_biff_query_next(nq);
} while (nq->opcode == BIFF_CONTINUE);
(*data) = g_malloc (*length);
ptr=(*data);
nq = ms_biff_query_copy (q);
do {
memcpy (ptr, nq->data, nq->length);
ptr+=nq->length;
ms_biff_query_next(nq);
} while (nq->opcode == BIFF_CONTINUE);
}
/**
* FIXME: See S59FDA.HTM / S59FDB.HTM
* essentialy the MS_OLE_STREAM needs to be sub-classed by excel, and
* forced to store its data inside BIFF records inside the excel stream.
* For now we'll assume the data is small and doesn't have any CONTINUE
* records !!!.
**/
void
ms_escher_hack_get_drawing (const BIFF_QUERY *q)
{
/* Convert the query to a sort of streeam */
guint8 *data;
guint32 len;
guint32 str_pos=q->streamPos;
biff_to_flat_data (q, &data, &len);
printf ("Drawing data\n");
dump (data, len);
disseminate_stream (data, len);
g_assert (q->streamPos==str_pos);
}
/**
* ms-escher.h: MS Office drawing layer support
*
* Author:
* Michael Meeks (michael@imaginator.com)
**/
void ms_escher_hack_get_drawing (const BIFF_QUERY *q);
......@@ -31,6 +31,7 @@
#include "ms-excel.h"
#include "ms-excel-biff.h"
#include "ms-obj.h"
#include "ms-escher.h"
#define EXCEL_DEBUG 0
......@@ -1857,7 +1858,7 @@ ms_excel_read_sheet (MS_EXCEL_SHEET *sheet, BIFF_QUERY * q, MS_EXCEL_WORKBOOK *
return;
break;
case BIFF_OBJ: /* See: ms-obj.c and S59DAD.HTM */
ms_obj_read_graphic (sheet, q);
ms_obj_read_obj (sheet, q);
break;
case BIFF_SELECTION: /* S59DE2.HTM */
{
......@@ -1891,8 +1892,8 @@ ms_excel_read_sheet (MS_EXCEL_SHEET *sheet, BIFF_QUERY * q, MS_EXCEL_WORKBOOK *
sheet_cursor_set (sheet->gnum_sheet, act_col, act_row, act_col, act_row, act_col, act_row) ;
break ;
}
case BIFF_MS_O_DRAWING: /* FIXME: See: S59DA4.HTM */
printf ("FIXME: MS Drawing\n");
case BIFF_MS_O_DRAWING: /* FIXME: See: ms-escher.c and S59DA4.HTM */
ms_escher_hack_get_drawing (q);
break;
case BIFF_NOTE: /* See: S59DAB.HTM */
{
......@@ -2161,6 +2162,7 @@ ms_excelReadWorkbook (MS_OLE * file)
break;
case BIFF_MS_O_DRAWING_GROUP: /* FIXME: See: S59DA5.HTM */
printf ("FIXME: MS Drawing Group\n");
ms_escher_hack_get_drawing (q);
break;
case BIFF_EXTERNSHEET:
{
......
......@@ -31,6 +31,7 @@
#include "ms-excel.h"
#include "ms-excel-biff.h"
#include "ms-obj.h"
#include "ms-escher.h"
#define EXCEL_DEBUG 0
......@@ -1857,7 +1858,7 @@ ms_excel_read_sheet (MS_EXCEL_SHEET *sheet, BIFF_QUERY * q, MS_EXCEL_WORKBOOK *
return;
break;
case BIFF_OBJ: /* See: ms-obj.c and S59DAD.HTM */
ms_obj_read_graphic (sheet, q);
ms_obj_read_obj (sheet, q);
break;
case BIFF_SELECTION: /* S59DE2.HTM */
{
......@@ -1891,8 +1892,8 @@ ms_excel_read_sheet (MS_EXCEL_SHEET *sheet, BIFF_QUERY * q, MS_EXCEL_WORKBOOK *
sheet_cursor_set (sheet->gnum_sheet, act_col, act_row, act_col, act_row, act_col, act_row) ;
break ;
}
case BIFF_MS_O_DRAWING: /* FIXME: See: S59DA4.HTM */
printf ("FIXME: MS Drawing\n");
case BIFF_MS_O_DRAWING: /* FIXME: See: ms-escher.c and S59DA4.HTM */
ms_escher_hack_get_drawing (q);
break;
case BIFF_NOTE: /* See: S59DAB.HTM */
{
......@@ -2161,6 +2162,7 @@ ms_excelReadWorkbook (MS_OLE * file)
break;
case BIFF_MS_O_DRAWING_GROUP: /* FIXME: See: S59DA5.HTM */
printf ("FIXME: MS Drawing Group\n");
ms_escher_hack_get_drawing (q);
break;
case BIFF_EXTERNSHEET:
{
......
......@@ -80,7 +80,7 @@ object_type_names[] =
};
void
ms_obj_read_graphic (MS_EXCEL_SHEET *sheet, BIFF_QUERY *q)
ms_obj_read_obj (MS_EXCEL_SHEET *sheet, BIFF_QUERY *q)
{
guint8 *data;
gint32 data_len_left;
......
......@@ -8,6 +8,6 @@
#ifndef GNUMERIC_MS_OBJ_H
#define GNUMERIC_MS_OBJ_H
void ms_obj_read_graphic (MS_EXCEL_SHEET *sheet, BIFF_QUERY *q);
void ms_obj_read_obj (MS_EXCEL_SHEET *sheet, BIFF_QUERY *q);
#endif
......@@ -1228,6 +1228,16 @@ ms_ole_stream_open (MS_OLE_DIRECTORY *d, char mode)
return s;
}
/* FIXME: This needs to be more cunning and have new write / read
functions that inser CONTINUE records etc. */
static MS_OLE_STREAM *
ms_ole_stream_duplicate (MS_OLE_STREAM *s)
{
MS_OLE_STREAM *ans = g_new (MS_OLE_STREAM, 1);
memcpy (ans, s, sizeof(MS_OLE_STREAM));
return ans;
}
void
ms_ole_stream_close (MS_OLE_STREAM *s)
{
......@@ -1490,6 +1500,7 @@ ms_biff_query_copy (const BIFF_QUERY *p)
bf->data = (guint8 *)g_malloc (p->length);
memcpy (bf->data, p->data, p->length);
}
bf->pos=ms_ole_stream_duplicate (p->pos);
return bf;
}
......@@ -1545,6 +1556,21 @@ ms_biff_query_destroy (BIFF_QUERY *bq)
}
}
/* FIXME: Too nasty ! */
MS_OLE_STREAM *
ms_biff_query_data_to_stream (BIFF_QUERY *bq)
{
MS_OLE_STREAM *ans=ms_ole_stream_duplicate (bq->pos);
/* ans->advance(ans, -bq->length);
This will never work !
*/
/* Hack size down to biff length */
/* Can't be done non-destructively ! sod ! */
/* Should cut the length down a lot, hope we can know where
the end is somehow */
return ans;
}
#if G_BYTE_ORDER != G_LITTLE_ENDIAN
double biff_getdouble(guint8 *p)
{
......
......@@ -14,24 +14,28 @@
#include "ms-ole.h"
#include "ms-biff.h"
#include "biff-types.h"
#define TYPES_FILE "biff-types.h"
#define BIFF_TYPES_FILE "biff-types.h"
#define ESCHER_TYPES_FILE "escher-types.h"
typedef struct {
guint16 opcode;
char *name;
} BIFF_TYPE;
} GENERIC_TYPE;
static GPtrArray *types=NULL;
static GPtrArray *biff_types = NULL;
static GPtrArray *escher_types = NULL;
typedef enum { eBiff=0, eEscher=1 } typeType;
static void
read_types ()
read_types (char *fname, GPtrArray **types, typeType t)
{
FILE *file = fopen(TYPES_FILE, "r");
FILE *file = fopen(fname, "r");
char buffer[1024];
types = g_ptr_array_new ();
*types = g_ptr_array_new ();
if (!file) {
printf ("Can't find vital file '%s'\n", TYPES_FILE);
printf ("Can't find vital file '%s'\n", fname);
return;
}
while (!feof(file)) {
......@@ -39,18 +43,21 @@ read_types ()
fgets(buffer,1023,file);
for (p=buffer;*p;p++)
if (*p=='0' && *(p+1)=='x') {
BIFF_TYPE *bt = g_new (BIFF_TYPE,1);
GENERIC_TYPE *bt = g_new (GENERIC_TYPE,1);
char *name, *pt;
bt->opcode=strtol(p+2,0,16);
pt = buffer;
while (*pt && *pt != '#') pt++; /* # */
while (*pt && !isspace(*pt)) pt++; /* define */
while (*pt && isspace(*pt)) pt++; /* ' ' */
while (*pt && *pt != '_') pt++; /* BIFF_ */
name = *pt?pt+1:pt;
if (t==eBiff) {
while (*pt && *pt != '_') pt++; /* BIFF_ */
name = *pt?pt+1:pt;
} else
name = pt;
while (*pt && !isspace(*pt)) pt++;
bt->name=g_strndup(name, (pt-name));
g_ptr_array_add (types, bt);
g_ptr_array_add (*types, bt);
break;
}
}
......@@ -58,13 +65,32 @@ read_types ()
}
static char*
get_opcode_name (guint16 opcode)
get_biff_opcode_name (guint16 opcode)
{
int lp;
if (!types)
read_types ();
for (lp=0;lp<types->len;lp++) {
BIFF_TYPE *bt = g_ptr_array_index (types, lp);
if (!biff_types)
read_types (BIFF_TYPES_FILE, &biff_types, eBiff);
for (lp=0;lp<biff_types->len;lp++) {
GENERIC_TYPE *bt = g_ptr_array_index (biff_types, lp);
if (bt->opcode>0xff) {
if (bt->opcode == opcode)
return bt->name;
} else {
if (bt->opcode == (opcode&0xff))
return bt->name;
}
}
return "Unknown";
}
static char*
get_escher_opcode_name (guint16 opcode)
{
int lp;
if (!escher_types)
read_types (ESCHER_TYPES_FILE, &escher_types, eEscher);
for (lp=0;lp<escher_types->len;lp++) {
GENERIC_TYPE *bt = g_ptr_array_index (escher_types, lp);
if (bt->opcode>0xff) {
if (bt->opcode == opcode)
return bt->name;
......@@ -115,11 +141,92 @@ syntax_error(char *err)
printf (" -i: Interactive, queries for fresh commands\n\n");
printf ("command can be one or all of:\n");
printf (" * ls: list files\n");
printf (" * biff <stream name>: dump biff records\n");
printf (" * draw <stream name>: dump drawing records\n");
printf (" * dump <stream name>: dump stream\n");
printf (" * quit,exit,bye: exit\n");
exit(1);
}
/* ---------------------------- Start cut from ms-escher.c ---------------------------- */
typedef struct { /* See: S59FDA.HTM */
guint ver:4;
guint instance:12;
guint16 type; /* fbt */
gint32 length; /* Misleading really 16bits */
guint8 *data;
gint32 length_left;
} ESH_HEADER;
static ESH_HEADER *
esh_header_new (guint8 *data, gint32 length)
{
ESH_HEADER *h = g_new (ESH_HEADER,1);
h->length=-6;
h->type=0;
h->instance=0;
h->data=data;
h->length_left=length;
return h;
}
static int
esh_header_next (ESH_HEADER *h)
{
guint16 split;
g_return_val_if_fail(h, 0);
g_return_val_if_fail(h->data, 0);
h->data+=h->length+6;
h->length_left-=h->length+6;
if (h->length_left<=5)
return 0;
h->length = BIFF_GETWORD(h->data+4);
h->type = BIFF_GETWORD(h->data+2);
split = BIFF_GETWORD(h->data+0);
h->ver = (split&0x0f);
h->instance = (split>>4);
return 1;
}
static void
esh_header_destroy (ESH_HEADER *h)
{
if (h)
g_free(h);
}
/**
* Builds a flat record by merging CONTINUE records,
* Have to do until we move this into ms_ole.c
* pass pointers to your length & data variables.
* This is dead sluggish.
**/
static void
biff_to_flat_data (const BIFF_QUERY *q, guint8 **data, guint32 *length)
{
BIFF_QUERY *nq = ms_biff_query_copy (q);
guint8 *ptr;
*length=0;
do {
*length+=nq->length;
ms_biff_query_next(nq);
} while (nq->opcode == BIFF_CONTINUE);
(*data) = g_malloc (*length);
ptr=(*data);
nq = ms_biff_query_copy (q);
do {
memcpy (ptr, nq->data, nq->length);
ptr+=nq->length;
ms_biff_query_next(nq);
} while (nq->opcode == BIFF_CONTINUE);
}
/* ---------------------------- End cut ---------------------------- */
int main (int argc, char **argv)
{
MS_OLE *ole;
......@@ -201,7 +308,7 @@ int main (int argc, char **argv)
printf ("\n");
count=0;
printf ("Opcode 0x%3x : %15s, length %d",
q->opcode, get_opcode_name (q->opcode), q->length);
q->opcode, get_biff_opcode_name (q->opcode), q->length);
}
last_opcode=q->opcode;
last_length=q->length;
......@@ -212,6 +319,38 @@ int main (int argc, char **argv)
printf ("Need a stream name\n");
return 0;
}
} else if (g_strcasecmp(ptr, "draw")==0) { /* Assume its in a BIFF file */
MS_OLE_DIRECTORY *dir;
ptr = strtok (NULL, delim);
if ((dir = get_file_handle (ole, ptr)))
{
MS_OLE_STREAM *stream = ms_ole_stream_open (dir, 'r');
BIFF_QUERY *q = ms_biff_query_new (stream);
while (ms_biff_query_next(q)) {
if (q->ls_op == BIFF_MS_O_DRAWING ||
q->ls_op == BIFF_MS_O_DRAWING_GROUP ||
q->ls_op == BIFF_MS_O_DRAWING_SELECTION) {
guint8 *data;
guint32 len;
guint32 str_pos=q->streamPos;
ESH_HEADER *h ;
printf("Drawing: '%s'\n", get_biff_opcode_name(q->opcode));
biff_to_flat_data (q, &data, &len);
h = esh_header_new (data, len);
while (esh_header_next(h)) {
printf ("Header: type 0x%4x : '%s', inst 0x%x ver 0x%x len 0x%x\n",
h->type, get_escher_opcode_name (h->type), h->instance,
h->ver, h->length);
}
esh_header_destroy (h);
}
}
printf ("\n");
ms_ole_stream_close (stream);
} else {
printf ("Need a stream name\n");
return 0;
}
} else if (g_strcasecmp(ptr,"exit")==0 ||
g_strcasecmp(ptr,"quit")==0 ||
g_strcasecmp(ptr,"bye")==0)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment