ms-ole.c 41.7 KB
Newer Older
Michael Meeks's avatar
Michael Meeks committed
1
/**
2 3 4 5
 * ms-ole.c: MS Office OLE support for Gnumeric
 *
 * Author:
 *    Michael Meeks (michael@imaginator.com)
Michael Meeks's avatar
Michael Meeks committed
6
 **/
7 8 9 10 11 12 13 14 15
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <fcntl.h>
#include <malloc.h>
#include <assert.h>
#include <ctype.h>
16
#include <glib.h>
17 18
#include "ms-ole.h"

19 20
/* Implementational detail - not for global header */

21
#define OLE_DEBUG 0
22

23
/* These take a _guint8_ pointer */
24 25 26 27 28 29
#define GET_GUINT8(p)  (*((const guint8 *)(p)+0))
#define GET_GUINT16(p) (*((const guint8 *)(p)+0)+(*((const guint8 *)(p)+1)<<8))
#define GET_GUINT32(p) (*((const guint8 *)(p)+0)+ \
		    (*((const guint8 *)(p)+1)<<8)+ \
		    (*((const guint8 *)(p)+2)<<16)+ \
		    (*((const guint8 *)(p)+3)<<24))
30

31 32 33 34 35 36 37
#define SET_GUINT8(p,n)  (*((guint8 *)(p)+0)=n)
#define SET_GUINT16(p,n) ((*((guint8 *)(p)+0)=((n)&0xff)), \
                          (*((guint8 *)(p)+1)=((n)>>8)&0xff))
#define SET_GUINT32(p,n) ((*((guint8 *)(p)+0)=((n))&0xff), \
                          (*((guint8 *)(p)+1)=((n)>>8)&0xff), \
                          (*((guint8 *)(p)+2)=((n)>>16)&0xff), \
                          (*((guint8 *)(p)+3)=((n)>>24)&0xff))
38 39


Michael Meeks's avatar
Michael Meeks committed
40 41 42
#define SPECIAL_BLOCK  0xfffffffd
#define END_OF_CHAIN   0xfffffffe
#define UNUSED_BLOCK   0xffffffff
43

Michael Meeks's avatar
Michael Meeks committed
44 45
#define BB_BLOCK_SIZE     512
#define SB_BLOCK_SIZE      64
46

47 48
#if OLE_DEBUG > 0
/* Very grim, but quite necessary */
49 50
#       define ms_array_index(a,b,c) (b)my_array_hack ((a), sizeof(b), (c))

51 52 53 54 55 56 57 58 59 60 61
static guint32
my_array_hack (GArray *a, guint s, guint32 idx)
{
	g_assert (a);
	g_assert (idx>=0);
	g_assert (idx<a->len);
	g_assert (s==4);
	return ((guint32 *)a->data)[idx];
}
#else
/* Far far faster... */
62
#       define ms_array_index(a,b,c) g_array_index (a, b, c)
63 64
#endif

65
#define BB_THRESHOLD   0x1000
Michael Meeks's avatar
Michael Meeks committed
66

67 68 69
#define PPS_ROOT_BLOCK    0
#define PPS_BLOCK_SIZE 0x80
#define PPS_END_OF_CHAIN 0xffffffff
70

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
typedef struct _PPS PPS;

struct _PPS {
	char    *name;
	PPS_IDX  next, prev, dir, pps;
	guint32  size;
	BLP      start;
	PPS_TYPE type;
};

#if OLE_MMAP
#       define BBPTR(f,b)  ((f)->mem + (b+1)*BB_BLOCK_SIZE)
#       define GET_SB_START_PTR(f,b) (BBPTR(f, g_array_index ((f)->sbf, BLP, (b)/(BB_BLOCK_SIZE/SB_BLOCK_SIZE))) \
				      + (((b)%(BB_BLOCK_SIZE/SB_BLOCK_SIZE))*SB_BLOCK_SIZE))
#else
#       define BBPTR(f,b)  (get_block_ptr (f, b))
#endif


static guint8 *
get_block_ptr (MS_OLE *f, BLP b)
{
	/* Reads it in if neccessary */
	return NULL;
}
96

97 98 99 100
/* This is a list of big blocks which contain a flat description of all blocks in the file.
   Effectively inside these blocks is a FAT of chains of other BBs, so the theoretical max
   size = 128 BB Fat blocks, thus = 128*512*512/4 blocks ~= 8.4MBytes */
/* The number of Big Block Descriptor (fat) Blocks */
Michael Meeks's avatar
Michael Meeks committed
101
#define GET_NUM_BBD_BLOCKS(f)   (GET_GUINT32((f)->mem + 0x2c))
Michael Meeks's avatar
Michael Meeks committed
102
#define SET_NUM_BBD_BLOCKS(f,n) (SET_GUINT32((f)->mem + 0x2c, (n)))
103
/* The block locations of the Big Block Descriptor Blocks */
Michael Meeks's avatar
Michael Meeks committed
104
#define GET_BBD_LIST(f,i)           (GET_GUINT32((f)->mem + 0x4c + (i)*4))
Michael Meeks's avatar
Michael Meeks committed
105
#define SET_BBD_LIST(f,i,n)         (SET_GUINT32((f)->mem + 0x4c + (i)*4, (n)))
106 107
#define NEXT_BB(f,n)                (g_array_index ((f)->bb, BLP, n))
#define NEXT_SB(f,n)                (g_array_index ((f)->sb, BLP, n))
Michael Meeks's avatar
Michael Meeks committed
108
/* Get the start block of the root directory ( PPS ) chain */
Michael Meeks's avatar
Michael Meeks committed
109 110
#define GET_ROOT_STARTBLOCK(f)   (GET_GUINT32((f)->mem + 0x30))
#define SET_ROOT_STARTBLOCK(f,i) (SET_GUINT32((f)->mem + 0x30, i))
Michael Meeks's avatar
Michael Meeks committed
111
/* Get the start block of the SBD chain */
Michael Meeks's avatar
Michael Meeks committed
112 113
#define GET_SBD_STARTBLOCK(f)    (GET_GUINT32((f)->mem + 0x3c))
#define SET_SBD_STARTBLOCK(f,i)  (SET_GUINT32((f)->mem + 0x3c, i))
114

115

Michael Meeks's avatar
Michael Meeks committed
116 117 118
/* NB it is misleading to assume that Microsofts linked lists link correctly.
   It is not the case that pps_next(f, pps_prev(f, n)) = n ! For the final list
   item there are no valid links. Cretins. */
119
#define PPS_GET_NAME_LEN(p)   (GET_GUINT16(p + 0x40))
Michael Meeks's avatar
Michael Meeks committed
120
#define PPS_SET_NAME_LEN(p,i) (SET_GUINT16(p + 0x40, (i)))
121 122 123 124 125 126 127
#define PPS_NAME(f,n)     (pps_get_text (p, PPS_GET_NAME_LEN(f,n)))
#define PPS_GET_PREV(p)   ((PPS_IDX) GET_GUINT32(p + 0x44))
#define PPS_GET_NEXT(p)   ((PPS_IDX) GET_GUINT32(p + 0x48))
#define PPS_GET_DIR(p)    ((PPS_IDX) GET_GUINT32(p + 0x4c))
#define PPS_SET_PREV(p,i) ((PPS_IDX) SET_GUINT32(p + 0x44, i))
#define PPS_SET_NEXT(p,i) ((PPS_IDX) SET_GUINT32(p + 0x48, i))
#define PPS_SET_DIR(p,i)  ((PPS_IDX) SET_GUINT32(p + 0x4c, i))
Michael Meeks's avatar
Michael Meeks committed
128
/* These get other interesting stuff from the PPS record */
129 130 131 132 133 134
#define PPS_GET_STARTBLOCK(p)      ( GET_GUINT32(p + 0x74))
#define PPS_GET_SIZE(p)            ( GET_GUINT32(p + 0x78))
#define PPS_GET_TYPE(p) ((PPS_TYPE)( GET_GUINT8(p + 0x42)))
#define PPS_SET_STARTBLOCK(p,i)    ( SET_GUINT32(p + 0x74, i))
#define PPS_SET_SIZE(p,i)          ( SET_GUINT32(p + 0x78, i))
#define PPS_SET_TYPE(p,i)          ( SET_GUINT8 (p + 0x42, i))
135

136
/* FIXME: This needs proper unicode support ! current support is a guess */
Michael Meeks's avatar
Michael Meeks committed
137
/* Length is in bytes == 1/2 the final text length */
138 139
/* NB. Different from biff_get_text, looks like a bug ! */
static char *
140
pps_get_text (guint8 *ptr, int length)
141
{
Arturo Espinosa's avatar
Arturo Espinosa committed
142 143
	int lp, skip;
	char *ans;
Michael Meeks's avatar
Michael Meeks committed
144
	guint16 c;
145
	guint8 *inb;
146
	
Michael Meeks's avatar
Michael Meeks committed
147 148
	length = (length+1)/2;

149
	if (length <= 0 ||
Michael Meeks's avatar
Michael Meeks committed
150 151 152 153
	    length > (PPS_BLOCK_SIZE/4)) {
#if OLE_DEBUG > 0
		printf ("Nulled name of length %d\n", length);
#endif
Arturo Espinosa's avatar
Arturo Espinosa committed
154
		return 0;
Michael Meeks's avatar
Michael Meeks committed
155
	}
156
	
157
	ans = (char *)g_malloc (sizeof(char) * length + 1);
158
	
Michael Meeks's avatar
Michael Meeks committed
159 160
	c = GET_GUINT16(ptr);
	if (c<0x30) /* Magic unicode number I made up */
Arturo Espinosa's avatar
Arturo Espinosa committed
161
		inb = ptr + 2;
162
	else
Arturo Espinosa's avatar
Arturo Espinosa committed
163
		inb = ptr;
Michael Meeks's avatar
Michael Meeks committed
164 165 166
	for (lp=0;lp<length;lp++) {
		c = GET_GUINT16(inb);
		ans[lp] = (char)c;
Arturo Espinosa's avatar
Arturo Espinosa committed
167
		inb+=2;
168
	}
Arturo Espinosa's avatar
Arturo Espinosa committed
169 170
	ans[lp] = 0;
	return ans;
171
}
172

173 174
static void
dump_header (MS_OLE *f)
175
{
Arturo Espinosa's avatar
Arturo Espinosa committed
176 177
	int lp;
	printf ("--------------------------MS_OLE HEADER-------------------------\n");
178 179 180 181 182 183 184 185
	printf ("Num BBD Blocks : %d Root %d, SB blocks %d\n",
		f->bb?f->bb->len:-1,
		f->pps?f->pps->len:-1,
		f->sb?f->sb->len:-1);

	for (lp=0;lp<f->bb->len;lp++)
		printf ("Block %d -> block %d\n", lp,
			g_array_index (f->bb, BLP, lp));
186
	
Michael Meeks's avatar
Michael Meeks committed
187 188 189 190 191 192 193 194 195 196
	if (f->pps) {
		printf ("Root blocks : %d\n", f->pps->len);
		for (lp=0;lp<f->pps->len;lp++) {
			PPS *p = g_ptr_array_index (f->pps, lp);
			printf ("root_list[%d] = '%s' ( <-%d, V %d, %d->)\n", lp, p->name?p->name:"Null",
				p->prev, p->dir, p->next);
		}
	} else
		printf ("No root yet\n");
/*	
197 198
	printf ("sbd blocks : %d\n", h->sbd_list->len);
	for (lp=0;lp<h->sbd_list->len;lp++)
199
	printf ("sbd_list[%d] = %d\n", lp, (int)ms_array_index (h->sbd_list, SBPtr, lp));*/
Arturo Espinosa's avatar
Arturo Espinosa committed
200
	printf ("-------------------------------------------------------------\n");
201 202
}

203 204
static BLP
get_next_block (MS_OLE *f, BLP blk)
205
{
206 207
	BLP bbd     = GET_BBD_LIST (f, blk/(BB_BLOCK_SIZE/4));
	return        GET_GUINT32 (BBPTR(f,bbd) + 4*(blk%(BB_BLOCK_SIZE/4)));
208 209
}

210 211
static int
read_bb (MS_OLE *f)
212
{
213
	guint32 numbbd;
Michael Meeks's avatar
Michael Meeks committed
214
	BLP     lp;
215
	GArray *ans;
216

217 218 219 220 221 222 223
	g_return_val_if_fail (f, 0);
	g_return_val_if_fail (f->mem, 0);

	ans     = g_array_new (FALSE, FALSE, sizeof(BLP));
	numbbd  = GET_NUM_BBD_BLOCKS  (f);

        /* Sanity checks */
Michael Meeks's avatar
Michael Meeks committed
224
	if (numbbd < ((f->length - BB_BLOCK_SIZE + ((BB_BLOCK_SIZE*BB_BLOCK_SIZE)/4) - 1) /
225 226 227 228 229 230 231 232
		      ((BB_BLOCK_SIZE*BB_BLOCK_SIZE)/4))) {
		printf ("Duff block descriptors\n");
		return 0;
	}
	
	for (lp=0;lp<(f->length+BB_BLOCK_SIZE-1)/BB_BLOCK_SIZE;lp++) {
		BLP tmp = get_next_block (f, lp);
		g_array_append_val (ans, tmp);
233
	}
234

235 236 237 238 239 240 241 242 243
	g_assert ((f->length+BB_BLOCK_SIZE-1)/BB_BLOCK_SIZE <= ans->len);

	/* More sanity checks */
/*	for (lp=0;lp<numbbd;lp++) {
		BLP bbdblk = GET_BBD_LIST(f, lp);
		if (g_array_index(ans, BLP, bbdblk) != SPECIAL_BLOCK) {
			printf ("Error - BBD blocks not marked correctly\n");
			g_array_free (ans, TRUE);
			return 0;
244
		}
245 246 247 248 249 250 251
		}*/

	f->bb = ans;
#if OLE_DEBUG > 1
	dump_header (f);
#endif
	return 1;
252 253
}

Michael Meeks's avatar
Michael Meeks committed

static void
extend_file (MS_OLE *f, guint blocks)
{
#ifndef OLE_MMAP
#       error Simply add more blocks at the end in memory
#else
	struct stat st;
	int file;
	guint8 *newptr, zero = 0;
	guint32 oldlen;
	guint32 blk, lp;

	g_assert (f);
	file = f->file_descriptor;

#if OLE_DEBUG > 5
	printf ("Before extend\n");
	dump_allocation(f);
#endif

	g_assert (munmap(f->mem, f->length) != -1);
	/* Extend that file by blocks */

	if ((fstat(file, &st)==-1) ||
	    (lseek (file, st.st_size + BB_BLOCK_SIZE*blocks - 1, SEEK_SET)==(off_t)-1) ||
	    (write (file, &zero, 1)==-1))
	{
		printf ("Serious error extending file\n");
		f->mem = 0;
		return;
	}

	oldlen = st.st_size;
	fstat(file, &st);
	f->length = st.st_size;
	g_assert (f->length == BB_BLOCK_SIZE*blocks + oldlen);
	if (f->length%BB_BLOCK_SIZE)
		printf ("Warning file %d non-integer number of blocks\n", f->length);
	newptr = mmap (f->mem, f->length, PROT_READ|PROT_WRITE, MAP_SHARED, file, 0);
#if OLE_DEBUG > 0
	if (newptr != f->mem)
		printf ("Memory map moved from %p to %p\n",
			f->mem, newptr);
#endif
	f->mem = newptr;

#if OLE_DEBUG > 5
	printf ("After extend\n");
	dump_allocation(f);
#endif
#endif
}

static BLP
next_free_bb (MS_OLE *f)
{
	BLP blk, tblk;
	guint32 idx, lp;
  
	g_assert (f);

	blk = 0;
	while (blk < f->bb->len)
		if (g_array_index (f->bb, BLP, blk) == UNUSED_BLOCK)
			return blk;
	        else 
			blk++;

	extend_file (f, 2);
	tblk = UNUSED_BLOCK;
	g_array_append_val (f->bb, tblk);
	g_array_append_val (f->bb, tblk);
#ifndef OLE_MMAP
#       error Need to extend bbptr as well.
#endif
	g_assert ((g_array_index (f->bb, BLP, blk) == UNUSED_BLOCK));
	return blk;
}

static int
write_bb (MS_OLE *f)
{
	guint32 numbbd;
	BLP     ptr, lp, lpblk;
	GArray *ans;

	g_return_val_if_fail (f, 0);
	g_return_val_if_fail (f->mem, 0);
	g_return_val_if_fail (f->bb,  0);

	numbbd  = (f->bb->len + (BB_BLOCK_SIZE*BB_BLOCK_SIZE/4) - 1) /
		((BB_BLOCK_SIZE*BB_BLOCK_SIZE/4) - 1); /* Think carefully ! */
	SET_NUM_BBD_BLOCKS (f, numbbd);

	for (lp=0;lp<numbbd;lp++) {
		BLP blk = next_free_bb(f);
		SET_BBD_LIST (f, lp, blk);
		g_array_index (f->bb, BLP, blk) = SPECIAL_BLOCK;
	}

	lpblk = 0;
	while (lpblk<f->bb->len) { /* Described blocks */
		guint8 *mem = BBPTR(f, GET_BBD_LIST(f, lpblk/(BB_BLOCK_SIZE/4)));
		SET_GUINT32 (mem + (lpblk%(BB_BLOCK_SIZE/4))*4,
			     g_array_index (f->bb, BLP, lpblk));
		lpblk++;
	}
	while (lpblk%(BB_BLOCK_SIZE/4) != 0) { /* Undescribed blocks */
		guint8 *mem = BBPTR(f, GET_BBD_LIST(f, lpblk/(BB_BLOCK_SIZE/4)));
		SET_GUINT32 (mem + (lpblk%(BB_BLOCK_SIZE/4))*4,
			     UNUSED_BLOCK);
		lpblk++;
	}
	g_array_free (f->bb, TRUE);
	f->bb = 0;
	return 1;
}

static BLP
next_free_sb (MS_OLE *f)
{
	BLP blk, tblk;
	guint32 idx, lp;
  
	g_assert (f);

	blk = 0;
	while (blk < f->sb->len)
		if (g_array_index (f->sb, BLP, blk) == UNUSED_BLOCK)
			return blk;
	        else 
			blk++;
	
	tblk = UNUSED_BLOCK;
	g_array_append_val (f->sb, tblk);
	g_assert ((g_array_index (f->sb, BLP, blk) == UNUSED_BLOCK));
	g_assert (blk < f->sb->len);

	if ((f->sb->len + (BB_BLOCK_SIZE/SB_BLOCK_SIZE) - 1) / (BB_BLOCK_SIZE/SB_BLOCK_SIZE) >= f->sbf->len) {
	/* Create an extra big block on the small block stream */
		BLP new_sbf = next_free_bb(f);
		g_array_append_val (f->sbf, new_sbf);
        /* We don't need to chain it in as we have this info in f->sbf */
		g_array_index (f->bb, BLP, new_sbf) = END_OF_CHAIN;
	}

	g_assert ((f->sb->len + (BB_BLOCK_SIZE/SB_BLOCK_SIZE) - 1) / (BB_BLOCK_SIZE/SB_BLOCK_SIZE) <= f->sbf->len);

	return blk;
}

405 406
static PPS *
pps_decode (guint8 *mem)
407
{
408 409 410 411
	PPS *pps     = g_new (PPS, 1);
	pps->name    = pps_get_text  (mem, PPS_GET_NAME_LEN(mem));
	pps->type    = PPS_GET_TYPE  (mem);
	pps->size    = PPS_GET_SIZE  (mem);
Michael Meeks's avatar
Michael Meeks committed
412 413 414 415 416 417 418 419 420 421 422
	if (pps->name) {
		pps->next    = PPS_GET_NEXT  (mem);
		pps->prev    = PPS_GET_PREV  (mem);
		pps->dir     = PPS_GET_DIR   (mem);
		pps->start   = PPS_GET_STARTBLOCK (mem);
	} else { /* Make safe */
		pps->next    = PPS_END_OF_CHAIN;
		pps->prev    = PPS_END_OF_CHAIN;
		pps->dir     = PPS_END_OF_CHAIN;
		pps->start   = PPS_END_OF_CHAIN;
	}
423 424 425 426 427
#if OLE_DEBUG > 1
	printf ("PPS decode : '%s'\n", pps->name?pps->name:"Null");
	dump (mem, PPS_BLOCK_SIZE);
#endif
	return pps;
428 429
}

Michael Meeks's avatar
Michael Meeks committed
430
static void
Michael Meeks's avatar
Michael Meeks committed
431
pps_encode (guint8 *mem, PPS *pps)
Michael Meeks's avatar
Michael Meeks committed
432
{
Michael Meeks's avatar
Michael Meeks committed
433 434
	int lp, max;

435 436
	g_return_if_fail (pps);
	
Michael Meeks's avatar
Michael Meeks committed
437 438 439
	/* Blank stuff I don't understand */
	for (lp=0;lp<PPS_BLOCK_SIZE;lp++)
		SET_GUINT8(mem+lp, 0);
Michael Meeks's avatar
Michael Meeks committed
440 441 442 443 444 445 446 447 448 449 450

	if (pps->name) {
		max = strlen (pps->name);
		if (max >= (PPS_BLOCK_SIZE/4))
			max = (PPS_BLOCK_SIZE/4);
		for (lp=0;lp<max;lp++)
			SET_GUINT16(mem + lp*2, pps->name[lp]);
	} else {
		printf ("No name %d\n", pps->pps);
		max = -1;
	}
451
	
Michael Meeks's avatar
Michael Meeks committed
452
	PPS_SET_NAME_LEN(mem, (max+1)*2);
453 454
	
	/* Magic numbers */
Michael Meeks's avatar
Michael Meeks committed
455
	SET_GUINT8   (mem + 0x43, 0x01); /* Or zero ? */
456 457 458 459 460 461 462 463 464
	SET_GUINT32  (mem + 0x50, 0x00020900);
	SET_GUINT32  (mem + 0x58, 0x000000c0);
	SET_GUINT32  (mem + 0x5c, 0x46000000);

	PPS_SET_TYPE (mem, pps->type);
	PPS_SET_SIZE (mem, pps->size);
	PPS_SET_NEXT (mem, pps->next);
	PPS_SET_PREV (mem, pps->prev);
	PPS_SET_DIR  (mem, pps->dir);
Michael Meeks's avatar
Michael Meeks committed
465
        PPS_SET_STARTBLOCK(mem, pps->start);
466
}
Michael Meeks's avatar
Michael Meeks committed
467

468 469 470 471 472
static int
read_pps (MS_OLE *f)
{
	BLP blk;
	GPtrArray *ans = g_ptr_array_new ();
Michael Meeks's avatar
Michael Meeks committed
473

474 475 476
	g_return_val_if_fail (f, 0);

	blk = GET_ROOT_STARTBLOCK (f);
Michael Meeks's avatar
Michael Meeks committed
477
#if OLE_DEBUG > 0
478
	printf ("Root start block %d\n", blk);
Michael Meeks's avatar
Michael Meeks committed
479
#endif
480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500
	while (blk != END_OF_CHAIN) {
		int lp;
		BLP last;

		if (blk == SPECIAL_BLOCK ||
		    blk == UNUSED_BLOCK) {
			printf ("Duff block in root chain\n");
			return 0;
		}

		for (lp=0;lp<BB_BLOCK_SIZE/PPS_BLOCK_SIZE;lp++) {
			PPS *p  = pps_decode(BBPTR(f,blk) + lp*PPS_BLOCK_SIZE);
			p->pps  = lp;
			g_ptr_array_add (ans, p);
		}
		last = blk;
		blk = NEXT_BB(f, blk);
		g_array_index (f->bb, BLP, last) = UNUSED_BLOCK;
	}
	
	f->pps = ans;
Michael Meeks's avatar
Michael Meeks committed
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
	if (f->pps->len < 1) {
		printf ("Root directory too small\n");
		return 0;
	}
	return 1;
}

static int
write_pps (MS_OLE *f)
{
	int ppslp;
	BLP blk  = END_OF_CHAIN;
	BLP last = END_OF_CHAIN;

	for (ppslp=0;ppslp<f->pps->len;ppslp++) {
		PPS *cur;
		if (ppslp%(BB_BLOCK_SIZE/PPS_BLOCK_SIZE)==0) {
			last  = blk;
			blk   = next_free_bb (f);
			g_assert (g_array_index (f->bb, BLP, blk) == UNUSED_BLOCK);
			if (last != END_OF_CHAIN)
				g_array_index (f->bb, BLP, last) = blk;
		        else {
#if OLE_DEBUG > 0
				printf ("Set root block to %d\n", blk);
#endif
				SET_ROOT_STARTBLOCK (f, blk);
			}

			g_array_index (f->bb, BLP, blk) = END_OF_CHAIN;
		}
		cur = g_ptr_array_index (f->pps, ppslp);

		pps_encode (BBPTR(f,blk) + (ppslp%(BB_BLOCK_SIZE/PPS_BLOCK_SIZE))*PPS_BLOCK_SIZE,
			    cur);
		if (cur->name)
			g_free (cur->name);
		cur->name = 0;
	}
	g_ptr_array_free (f->pps, TRUE);
	f->pps = 0;
542
	return 1;
Michael Meeks's avatar
Michael Meeks committed
543 544
}

545 546
static int
read_sb (MS_OLE *f)
547
{
548
	BLP ptr;
Michael Meeks's avatar
Michael Meeks committed
549
	int lp, lastidx, idx;
550 551 552 553
	PPS *root;

	g_return_val_if_fail (f, 0);
	g_return_val_if_fail (f->pps, 0);
554

555 556 557 558 559 560 561 562
	root = g_ptr_array_index (f->pps, 0);
	g_return_val_if_fail (root, 0);

	f->sbf = g_array_new (FALSE, FALSE, sizeof(BLP));
	f->sb  = g_array_new (FALSE, FALSE, sizeof(BLP));
	
	/* List of big blocks in SB file */
	ptr = root->start;
Michael Meeks's avatar
Michael Meeks committed
563
#if OLE_DEBUG > 0
564
	printf ("Starting Small block file at %d\n", root->start);
Michael Meeks's avatar
Michael Meeks committed
565
#endif
566
	while (ptr != END_OF_CHAIN) {
Michael Meeks's avatar
Michael Meeks committed
567 568
		if (ptr == UNUSED_BLOCK ||
		    ptr == SPECIAL_BLOCK) {
569 570 571 572
			printf ("Corrupt small block file: serious error, "
				"invalid block in chain\n");
			g_array_free (f->sbf, TRUE);
			f->sbf = 0;
Michael Meeks's avatar
Michael Meeks committed
573 574
			return 0;
		}
575
		g_array_append_val (f->sbf, ptr);
Arturo Espinosa's avatar
Arturo Espinosa committed
576
		ptr = NEXT_BB (f, ptr);
577
	}
578 579

	/* Description of small blocks */
Michael Meeks's avatar
Michael Meeks committed
580 581
	lastidx = -1;
	idx     = 0;
582 583 584 585 586 587 588 589 590 591 592 593 594 595
	ptr = GET_SBD_STARTBLOCK (f);
	while (ptr != END_OF_CHAIN) {
		guint32 lp;
		if (ptr == UNUSED_BLOCK ||
		    ptr == SPECIAL_BLOCK) {
			printf ("Corrupt file descriptor: serious error, "
				"invalid block in chain\n");
			g_array_free (f->sb, TRUE);
			f->sb = 0;
			return 0;
		}
		for (lp=0;lp<BB_BLOCK_SIZE/4;lp++) {
			BLP p = GET_GUINT32 (BBPTR(f, ptr) + lp*4);
			g_array_append_val (f->sb, p);
Michael Meeks's avatar
Michael Meeks committed
596 597 598 599
			
			if (p != UNUSED_BLOCK)
				lastidx = idx;
			idx++;
600 601 602
		}
		ptr = NEXT_BB (f, ptr);
	}
Michael Meeks's avatar
Michael Meeks committed
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
	if (lastidx>0)
		g_array_set_size (f->sb, lastidx+1);
	
	if (f->sbf->len * BB_BLOCK_SIZE < f->sb->len*SB_BLOCK_SIZE) {
		printf ("Not enough small block file for descriptors\n"
			"sbf->len == %d, sb->len == %d\n", f->sbf->len,
			f->sb->len);
		return 0;
	}

	return 1;
}

static int
write_sb (MS_OLE *f)
{
	guint32 lp, lastused;
	PPS *root;
	BLP sbd_start  = END_OF_CHAIN;
622
	BLP sbf_start  = END_OF_CHAIN;
Michael Meeks's avatar
Michael Meeks committed
623 624 625 626 627 628

	g_return_val_if_fail (f, 0);
	g_return_val_if_fail (f->pps, 0);

	root        = g_ptr_array_index (f->pps, PPS_ROOT_BLOCK);

629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
	if (f->sbf->len * BB_BLOCK_SIZE < f->sb->len*SB_BLOCK_SIZE) {
		printf ("Not enough descriptor / blocks being written %d %d\n",
			f->sbf->len, f->sb->len);
	}
	if (f->sbf->len>0)
		sbf_start = g_array_index (f->sbf, BLP, 0);
	/* Chain up the sbf blocks */
	for (lp=0;lp<f->sbf->len-1;lp++) {
		BLP blk, next ;
		blk  = g_array_index (f->sbf, BLP, lp);
		next = g_array_index (f->sbf, BLP, lp+1);
		/* this assert is not really important, its just how we left it */
		g_assert (g_array_index (f->bb, BLP, blk) == END_OF_CHAIN);
		g_array_index (f->bb, BLP, blk) = next;
	}

Michael Meeks's avatar
Michael Meeks committed
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681
	lastused = END_OF_CHAIN;
	for (lp=0;lp<f->sb->len;lp++) {
		if (g_array_index (f->sb, BLP, lp) != UNUSED_BLOCK)
			lastused = lp;
	}

	if (lastused != END_OF_CHAIN) { /* Bother writing stuff */
		guint8 *mem = 0;
		guint32 num_sbdf = (lastused + (BB_BLOCK_SIZE/4)-1) /
			(BB_BLOCK_SIZE/4);
		BLP blk = END_OF_CHAIN, last;

#if OLE_DEBUG > 0
		printf ("Num SB descriptor blocks : %d\n", num_sbdf);
#endif
		for (lp=0;lp<num_sbdf*(BB_BLOCK_SIZE/4);lp++) {
			BLP set;
			if (lp%(BB_BLOCK_SIZE/4) == 0) {
				last = blk;
				blk = next_free_bb(f);
				if (!lp)
					sbd_start = blk;
				if (last != END_OF_CHAIN)
					g_array_index (f->bb, BLP, last) = blk;
				g_array_index (f->bb, BLP, blk) = END_OF_CHAIN;
				mem = BBPTR (f, blk);
			}
			if (lp<f->sb->len)
				set = g_array_index (f->sb, BLP, lp);
			else
				set = UNUSED_BLOCK;
			SET_GUINT32 (mem + (lp%(BB_BLOCK_SIZE/4))*4, set);
		}
	} else {
#if OLE_DEBUG > 0
		printf ("Blank SB allocation\n");
#endif
682
		sbf_start = END_OF_CHAIN;
Michael Meeks's avatar
Michael Meeks committed
683 684
	}

685
	root->start = sbf_start;
Michael Meeks's avatar
Michael Meeks committed
686 687 688 689 690
	SET_SBD_STARTBLOCK (f, sbd_start);
	g_array_free (f->sb,  TRUE);
	g_array_free (f->sbf, TRUE);
	f->sb       = 0;
	f->sbf      = 0;
691
	return 1;
692 693
}

694
static int
695
ms_ole_setup (MS_OLE *f)
696
{
697 698
	if (read_bb  (f) &&
	    read_pps (f) &&
Michael Meeks's avatar
Michael Meeks committed
699 700 701 702 703
	    read_sb  (f)) {
#if OLE_DEBUG > 1
		printf ("Just read header of\n");
		dump_header (f);
#endif		
704
		return 1;
Michael Meeks's avatar
Michael Meeks committed
705
	}
706
	return 0;
707 708
}

709
static int
710
ms_ole_cleanup (MS_OLE *f)
711
{
712 713
	if (f->mode != 'w') /* Nothing to write */
		return 1;
Michael Meeks's avatar
Michael Meeks committed
714 715 716 717 718 719 720 721
#if OLE_DEBUG > 1
	printf ("About to write header of: \n");
	dump_header (f);
#endif
	if (write_sb  (f) &&
	    write_pps (f) &&
	    write_bb  (f))
		return 1;
722
	return 0;
723 724
}

725 726
static MS_OLE *
new_null_msole ()
727
{
728 729 730 731 732 733 734 735 736 737 738 739
	MS_OLE *f = g_new0 (MS_OLE, 1);

	f->mem    = (guint8 *)0xdeadbeef;
	f->length = 0;
	f->mode   = 'r';
	f->bb     = 0;
#ifndef OLE_MMAP
	f->bbptr  = 0;
#endif
	f->sb     = 0;
	f->sbf    = 0;
	f->pps    = 0;
Michael Meeks's avatar
Michael Meeks committed
740
	f->dirty  = 0;
741 742

	return f;
743 744
}

745 746
MS_OLE *
ms_ole_open (const char *name)
747
{
748 749 750 751 752 753 754 755 756 757 758 759 760
	struct stat st;
	int prot = PROT_READ | PROT_WRITE;
	int file;
	char mode;
	MS_OLE *f;

#if OLE_DEBUG > 0
	printf ("New OLE file '%s'\n", name);
#endif

	f = new_null_msole();
#if OLE_MMAP
	f->file_descriptor = file = open (name, O_RDWR);
Michael Meeks's avatar
Michael Meeks committed
761
	f->mode = 'w';
762 763
	if (file == -1) {
		f->file_descriptor = file = open (name, O_RDONLY);
Michael Meeks's avatar
Michael Meeks committed
764
		f->mode = 'r';
765 766 767
		prot &= ~PROT_WRITE;
	}
	if (file == -1 || fstat(file, &st))
768
	{
769 770 771
		printf ("No such file '%s'\n", name);
		g_free (f) ;
		return 0;
772
	}
773 774 775 776 777 778 779 780 781 782 783 784
	f->length = st.st_size;
	if (f->length<=0x4c)  /* Bad show */
	{
		printf ("File '%s' too short\n", name);
		close (file) ;
		g_free (f) ;
		return 0;
	}

	f->mem = mmap (0, f->length, prot, MAP_SHARED, file, 0);
#else
	f->mem = read (dfjlsdfj, first block only);
785
#endif
786

787 788 789
	if (GET_GUINT32(f->mem    ) != 0xe011cfd0 ||
	    GET_GUINT32(f->mem + 4) != 0xe11ab1a1)
	{
Michael Meeks's avatar
Michael Meeks committed
790
#if OLE_DEBUG > 0
791 792
		printf ("Failed OLE2 magic number %x %x\n",
			GET_GUINT32(f->mem), GET_GUINT32(f->mem+4));
Michael Meeks's avatar
Michael Meeks committed
793
#endif
794 795 796 797 798
		ms_ole_destroy (f);
		return 0;
	}
	if (f->length%BB_BLOCK_SIZE)
		printf ("Warning file '%s':%d non-integer number of blocks\n", name, f->length);
Michael Meeks's avatar
Michael Meeks committed
799 800 801 802

	if (!ms_ole_setup(f)) {
		printf ("'%s' : duff file !\n", name);
		ms_ole_destroy (f);
803 804 805 806 807 808 809 810
		return 0;
	}

#if OLE_DEBUG > 0
	printf ("New OLE file '%s'\n", name);
#endif
	/* If writing then when destroy commit it */
	return f;
811 812
}

Michael Meeks's avatar
Michael Meeks committed
813 814
MS_OLE *
ms_ole_create (const char *name)
815
{
Arturo Espinosa's avatar
Arturo Espinosa committed
816
	struct stat st;
817
	int file, zero=0;
Arturo Espinosa's avatar
Arturo Espinosa committed
818
	MS_OLE *f;
819 820
	int init_blocks = 1, lp;
	guint8 *mem;
821

Michael Meeks's avatar
Michael Meeks committed
822
	if ((file = open (name, O_RDWR|O_CREAT|O_TRUNC|O_NONBLOCK,
823 824
			  S_IRUSR|S_IWUSR|S_IRGRP|S_IWGRP)) == -1)
	{
Arturo Espinosa's avatar
Arturo Espinosa committed
825 826
		printf ("Can't create file '%s'\n", name);
		return 0;
827 828 829 830 831
	}

	if ((lseek (file, BB_BLOCK_SIZE*init_blocks - 1, SEEK_SET)==(off_t)-1) ||
	    (write (file, &zero, 1)==-1))
	{
Arturo Espinosa's avatar
Arturo Espinosa committed
832 833
		printf ("Serious error extending file to %d bytes\n", BB_BLOCK_SIZE*init_blocks);
		return 0;
834 835
	}

836 837
	f = new_null_msole ();

Michael Meeks's avatar
Michael Meeks committed
838 839
	f->file_descriptor  = file;
	f->mode             = 'w';
Arturo Espinosa's avatar
Arturo Espinosa committed
840 841
	fstat(file, &st);
	f->length = st.st_size;
842
	if (f->length%BB_BLOCK_SIZE)
Arturo Espinosa's avatar
Arturo Espinosa committed
843
		printf ("Warning file %d non-integer number of blocks\n", f->length);
844

845
#ifdef OLE_MMAP
Arturo Espinosa's avatar
Arturo Espinosa committed
846
	f->mem  = mmap (0, f->length, PROT_READ|PROT_WRITE, MAP_SHARED, file, 0);
847 848
	if (!f->mem)
	{
Arturo Espinosa's avatar
Arturo Espinosa committed
849 850 851 852
		printf ("Serious error mapping file to %d bytes\n", BB_BLOCK_SIZE*init_blocks);
		close (file);
		g_free (f);
		return 0;
853
	}
854 855 856
#else
#       error Not implemented yet
#endif
857 858
	/* The header block */
	for (lp=0;lp<BB_BLOCK_SIZE/4;lp++)
859
		SET_GUINT32(f->mem + lp*4, (lp<(0x52/4))?0:UNUSED_BLOCK);
860

Arturo Espinosa's avatar
Arturo Espinosa committed
861 862
	SET_GUINT32(f->mem, 0xe011cfd0); /* Magic number */
	SET_GUINT32(f->mem + 4, 0xe11ab1a1);
Michael Meeks's avatar
Michael Meeks committed
863 864

	/* More magic numbers */
Michael Meeks's avatar
Michael Meeks committed
865
	SET_GUINT32(f->mem + 0x18, 0x0003003e);
Michael Meeks's avatar
Michael Meeks committed
866 867 868
	SET_GUINT32(f->mem + 0x1c, 0x0009fffe);
	SET_GUINT32(f->mem + 0x20, 0x6); 
	SET_GUINT32(f->mem + 0x38, 0x00001000); 
869
/*	SET_GUINT32(f->mem + 0x40, 0x1);  */
Michael Meeks's avatar
Michael Meeks committed
870 871
	SET_GUINT32(f->mem + 0x44, 0xfffffffe); 

Michael Meeks's avatar
Michael Meeks committed
872 873 874
	SET_NUM_BBD_BLOCKS  (f, 0);
	SET_ROOT_STARTBLOCK (f, END_OF_CHAIN);
	SET_SBD_STARTBLOCK  (f, END_OF_CHAIN);
875

Michael Meeks's avatar
Michael Meeks committed
876 877 878 879 880 881 882 883 884 885 886 887 888 889 890
	{
		PPS *p;

		f->bb  = g_array_new (FALSE, FALSE, sizeof(BLP));
		f->sb  = g_array_new (FALSE, FALSE, sizeof(BLP));
		f->sbf = g_array_new (FALSE, FALSE, sizeof(BLP));
		f->pps = g_ptr_array_new ();
		p = g_new(PPS, 1);
		p->name  = g_strdup ("Root Entry");
		p->prev  = p->dir = p->next = PPS_END_OF_CHAIN;
		p->pps   = PPS_ROOT_BLOCK;
		p->start = END_OF_CHAIN;
		p->type  = MS_OLE_PPS_ROOT;
		p->size  = 0;
		g_ptr_array_add (f->pps, p);
891
	}
Arturo Espinosa's avatar
Arturo Espinosa committed
892
	return f;
893 894
}

895 896 897
/**
 * This closes the file and truncates any free blocks
 **/
898
void
Michael Meeks's avatar
Michael Meeks committed
899
ms_ole_destroy (MS_OLE *f)
900
{
901 902 903 904
#if OLE_DEBUG > 0
	printf ("FIXME: should truncate to remove unused blocks\n");
#endif
	if (f) {
Michael Meeks's avatar
Michael Meeks committed
905 906
		if (f->dirty)
			ms_ole_cleanup (f);
907

908
#ifdef OLE_MMAP
Arturo Espinosa's avatar
Arturo Espinosa committed
909 910
		munmap (f->mem, f->length);
		close (f->file_descriptor);
911 912 913
#else
#               error No destroy code yet	       
#endif
Arturo Espinosa's avatar
Arturo Espinosa committed
914
		g_free (f);
915

916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940
#if OLE_DEBUG > 0
		printf ("Closing OLE file\n");
#endif
	}
}

void
dump (guint8 *ptr, guint32 len)
{
	guint32 lp,lp2;
	guint32 off;

	for (lp = 0;lp<(len+15)/16;lp++)
	{
		printf ("%8x  |  ", lp*16);
		for (lp2=0;lp2<16;lp2++) {
			off = lp2 + (lp<<4);
			off<len?printf("%2x ", ptr[off]):printf("XX ");
		}
		printf ("  |  ");
		for (lp2=0;lp2<16;lp2++) {
			off = lp2 + (lp<<4);
			printf ("%c", off<len?(ptr[off]>'!'&&ptr[off]<127?ptr[off]:'.'):'*');
		}
		printf ("\n");
941
	}
942
}
943

944
static void
945 946
dump_stream (MS_OLE_STREAM *s)
{
Michael Meeks's avatar
Michael Meeks committed
947 948
	g_return_if_fail (s);

949
	if (s->size>=BB_THRESHOLD)
Arturo Espinosa's avatar
Arturo Espinosa committed
950
		printf ("Big block : ");
951
	else
Arturo Espinosa's avatar
Arturo Espinosa committed
952
		printf ("Small block : ");
953
	printf ("position %d\n", s->position);
954 955
}

956
static void
Michael Meeks's avatar
Michael Meeks committed
957
check_stream (MS_OLE_STREAM *s)
958
{
Michael Meeks's avatar
Michael Meeks committed
959 960 961 962
	BLP blk;
	guint32 idx;
	PPS *p;
	MS_OLE *f;
Michael Meeks's avatar
Michael Meeks committed
963

Michael Meeks's avatar
Michael Meeks committed
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978 979 980
	g_return_if_fail (s);
	g_return_if_fail (s->file);

	f = s->file;
	p = g_ptr_array_index (f->pps, s->pps);

	g_return_if_fail (p);
	blk = p->start;
	idx = 0;
	if (s->strtype == MS_OLE_SMALL_BLOCK) {
		while (blk != END_OF_CHAIN) {
			guint8 *ptr;
			g_assert (g_array_index (s->blocks, BLP, idx) ==
				  blk);
#if OLE_DEBUG > 2
			ptr = GET_SB_START_PTR(f, blk);
			dump (ptr, SB_BLOCK_SIZE);
981
#endif
Michael Meeks's avatar
Michael Meeks committed
982 983 984 985 986 987 988 989 990 991 992
			blk = NEXT_SB(f, blk);
			idx++;
		}
	} else {
		while (blk != END_OF_CHAIN) {
			guint8 *ptr;
			g_assert (g_array_index (s->blocks, BLP, idx) ==
				  blk);
#if OLE_DEBUG > 2
			ptr = BBPTR(f, blk);
			dump (ptr, BB_BLOCK_SIZE);
Michael Meeks's avatar
Michael Meeks committed
993
#endif
Michael Meeks's avatar
Michael Meeks committed
994 995 996
			blk = NEXT_BB(f, blk);
			idx++;
		}
997 998 999
	}
}

1000 1001 1002 1003 1004 1005
static ms_ole_pos_t
tell_pos (MS_OLE_STREAM *s)
{
	return s->position;
}

1006 1007 1008 1009 1010 1011 1012 1013
/**
 * Free the allocation chains, and free up the blocks.
 * "It was for freedom that Christ has set us free."
 *   Galatians 5:11
 **/
static void
free_allocation (MS_OLE *f, guint32 startblock, gboolean is_big_block_stream)
{
Michael Meeks's avatar
Michael Meeks committed
1014 1015
	g_return_if_fail (f);

Michael Meeks's avatar
Michael Meeks committed
1016 1017 1018 1019 1020
#if OLE_DEBUG > 0
	printf ("Free allocation %d : (%d)\n", startblock,
		is_big_block_stream);
#endif
       
1021 1022
	if (is_big_block_stream)
	{
1023
		BLP p = startblock;
Arturo Espinosa's avatar
Arturo Espinosa committed
1024
		printf ("FIXME: this should also free up blocks\n");
1025 1026
		while (p != END_OF_CHAIN) {
			BLP next = NEXT_BB(f,p);
Michael Meeks's avatar
Michael Meeks committed
1027 1028 1029 1030 1031 1032 1033 1034 1035
			if (next == p) {
				printf ("Serious bug: cyclic ring in BB allocation\n");
				return;
			} else if (p == SPECIAL_BLOCK ||
				   p == UNUSED_BLOCK) {
				printf ("Serious bug: Special / Unused block "
					"in BB allocation\n");
				return;
			}
1036
			g_array_index (f->bb, BLP, p) = UNUSED_BLOCK;
Arturo Espinosa's avatar
Arturo Espinosa committed
1037
			p = next;
1038 1039 1040 1041
		}
	}
	else
	{
1042 1043 1044
		BLP p = startblock;
		while (p != END_OF_CHAIN) {
			BLP next = NEXT_SB(f,p);
Michael Meeks's avatar
Michael Meeks committed
1045 1046 1047 1048 1049 1050 1051 1052 1053
			if (next == p) {
				printf ("Serious bug: cyclic ring in SB allocation\n");
				return;
			} else if (p == SPECIAL_BLOCK ||
				   p == UNUSED_BLOCK) {
				printf ("Serious bug: Special / Unused block "
					"in SB allocation\n");
				return;
			}
1054
			g_array_index (f->sb, BLP, p) = UNUSED_BLOCK;
Arturo Espinosa's avatar
Arturo Espinosa committed
1055
			p = next;
1056
		}
Michael Meeks's avatar
Michael Meeks committed
1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086
		/* Seek forwards to find blank sbf blocks */
		{
			guint32 lp;
			BLP     lastused = END_OF_CHAIN;
			for (lp=0;lp<f->sb->len;lp++) {
				if (g_array_index (f->sb, BLP, lp) != UNUSED_BLOCK)
					lastused = lp;
			}
			if (lastused == END_OF_CHAIN) {
				for (lp=0;lp<f->sbf->len;lp++) {
					BLP sbfd = g_array_index (f->sbf, BLP, lp);
					g_array_index (f->bb, BLP, sbfd) = UNUSED_BLOCK;
				}
				g_array_set_size (f->sbf, 0);
				g_array_set_size (f->sb, 0);
			} else {
				guint32 sbf_needed = (lastused+(BB_BLOCK_SIZE/SB_BLOCK_SIZE)-1) /
					             (BB_BLOCK_SIZE/SB_BLOCK_SIZE);

				if (sbf_needed == f->sbf->len)
					return;
				
				for (lp=sbf_needed;lp<f->sbf->len;lp++) {
					BLP sbfd = g_array_index (f->sbf, BLP, lp);
					g_array_index (f->bb, BLP, sbfd) = UNUSED_BLOCK;
				}
				g_array_set_size (f->sbf, sbf_needed);
				g_array_set_size (f->sb, lastused+1);
			}
		}
1087