Commit 899f6452 authored by Michael Meeks's avatar Michael Meeks

Jon's new excel col/row sizing code.

parent 5d47bc15
1999-11-18 Michael Meeks <mmeeks@gnu.org>
1999-11-18 Jon K Hellan <hellan@acm.org>
* src/sheet.c (sheet_row_set_height_units): Use rows->default_style
not cols->default_style.
1999-11-18 Michael Meeks <mmeeks@gnu.org>
* src/dialogs/Makefile.am (glade_msgs): Add cell-sort.glade.h
1999-11-18 Miguel de Icaza <miguel@gnu.org>
......
Gnumeric 0.44
Jon:
* Vastly improved col / row sizing in excel import
* ditto for Excel object placement.
--------------------------------------------------------------------------
Gnumeric 0.43
* Minor build fix.
--------------------------------------------------------------------------
Gnumeric 0.42
Michael:
......
1999-11-18 Michael Meeks <mmeeks@gnu.org>
1999-11-18 Jon K Hellan <hellan@acm.org>
* src/sheet.c (sheet_row_set_height_units): Use rows->default_style
not cols->default_style.
1999-11-18 Michael Meeks <mmeeks@gnu.org>
* src/dialogs/Makefile.am (glade_msgs): Add cell-sort.glade.h
1999-11-18 Miguel de Icaza <miguel@gnu.org>
......
1999-11-18 Michael Meeks <mmeeks@gnu.org>
* ms-excel-read.c (lookup_base_char_width): kill double init of
sample & add check.
1999-11-18 Jon K Hellan <hellan@acm.org>
* ms-obj.c (ms_parse_object_anchor): Just extract anchor data,
don't try to convert to pixels. Extracted anchor data has type
anchor_point[4], i.e. 4 pairs of col/row number and fractions of
col/row.
(ms_obj_realize): Now receives uninterpreted anchor data. Thus,
conversion to position is delayed until the entire sheet has been
read. Use object_anchor_to_position to interpret. 'anchor'
variable renamed to 'position'.
(object_anchor_to_position): New function: Convert an anchor to
position in points. Based on data interpretation in previous
version of ms_parse_object_anchor. Interpret as 256th's rather
than 1024th's vertically. Base computation on actual cell
width/height.
* ms-obj.h (type anchor_point): define it.
(type MSObj): Change type of anchor from int[4] to
anchor_point[4].
Update signature of ms_parse_object_anchor.
* ms-excel-read.h (_ExcelSheet): Add base_char_width member.
(EXCEL_DEFAULT_CHAR_WIDTH): Define it.
* ms-excel-read.c (get_substitute_font):
New function: Try to find a gnome font which matches the Excel font.
(ms_excel_get_style_from_xf): Use get_substitute_font.
(ms_excel_sheet_new): Initialize base_char_width to 0, i.e. unknown.
(print_font_mapping_debug_info): Debug print in separate routine.
(get_base_char_width): New function: Return base char width for
column sizing. If not yet known, call lookup_base_char_width to
find it.
(lookup_base_char_width): New function. See get_base_char_width.
(get_row_height_units): New function. We could adjust excel
heights here, but there seems to be no need.
(get_units_net_of_margins): New function. Subtract margins from a
point size.
(ms_excel_read_cell):
case BIFF_ROW: Specify height in points, not pixels. Use
get_row_height_units and get_units_net_of_margins. Call
sheet_row_set_height_units rather than sheet_row_set_height.
case BIFF_COLINFO: Specify width in points, not pixels. Scale
column based on actual font. Use get_base_char_width and
get_units_net_of_margins. Call sheet_col_set_width_units rather
than sheet_col_set_width.
(ms_excel_read_sheet):
case BIFF_DEFAULTROWHEIGHT: Handle it. Like BIFF_ROW, but use
sheet_row_set_internal_height on default style.
case BIFF_DEFCOLWIDTH: Handle it. Like BIFF_COLINFO, but use
sheet_col_set_internal_height on default style.
* ms-escher.c (_MSEscherHeader):
Change type of anchor to anchor_point[4]
1999-11-18 Michael Meeks <mmeeks@gnu.org>
* ms-excel-read.c (ms_excel_read_guts): rename.
......
......@@ -45,8 +45,8 @@ typedef struct _MSEscherHeader
struct _MSEscherHeader * container;
/* TODO : decide were to put these cause they dont belong here */
gboolean anchor_set;
int anchor[4];
gboolean anchor_set;
anchor_point anchor[4];
int blip_id;
} MSEscherHeader;
#define common_header_len 8
......
......@@ -992,6 +992,30 @@ ms_excel_get_xf (ExcelSheet *sheet, int const xfidx)
return xf;
}
static gchar *
get_substitute_font (gchar *fontname)
{
/* Try find a gnome font which matches the Excel font */
char (*(*p)[2]);
gchar *res = NULL;
/* Strictly for testing. (Wanna bet how long it stays in?) */
static char *temporary[][2] = {
{ "Times New Roman", "Times"},
{ "Arial", "Helvetica"},
{ "Courier New", "Courier"},
{ NULL }
};
for (p = temporary; (*p)[0]; p++)
if (strcasecmp ((*p)[0], fontname) == 0) {
res = (*p)[1];
break;
}
return res;
}
static void
style_optimize (ExcelSheet *sheet, int col, int row)
{
......@@ -1036,6 +1060,7 @@ ms_excel_get_style_from_xf (ExcelSheet *sheet, guint16 xfidx)
int pattern_index, back_index, font_index;
MStyle *mstyle;
int i;
char *subs_fontname;
g_return_val_if_fail (xf != NULL, NULL);
......@@ -1060,7 +1085,11 @@ ms_excel_get_style_from_xf (ExcelSheet *sheet, guint16 xfidx)
/* Font */
fd = ms_excel_get_font (sheet, xf->font_idx);
if (fd != NULL) {
mstyle_set_font_name (mstyle, fd->fontname);
subs_fontname = get_substitute_font (fd->fontname);
if (subs_fontname)
mstyle_set_font_name (mstyle, subs_fontname);
else
mstyle_set_font_name (mstyle, fd->fontname);
mstyle_set_font_size (mstyle, fd->height / 20.0);
mstyle_set_font_bold (mstyle, fd->boldness >= 0x2bc);
mstyle_set_font_italic (mstyle, fd->italic);
......@@ -1890,6 +1919,7 @@ ms_excel_sheet_new (ExcelWorkbook *wb, const char *name)
ans->style_optimize.start.row = 0;
ans->style_optimize.end.col = 0;
ans->style_optimize.end.row = 0;
ans->base_char_width = 0;
return ans;
}
......@@ -2269,6 +2299,132 @@ ms_excel_externname (BiffQuery *q, ExcelSheet *sheet)
biff_name_data_new (sheet->wb, name, 0, defn, defnlen, TRUE, FALSE);
}
#ifndef NO_DEBUG_EXCEL
static void
print_font_mapping_debug_info (ExcelSheet *sheet, MStyle const *ms)
{
BiffXFData const *xf = NULL;
BiffFontData const *fd = NULL;
if ((xf = ms_excel_get_xf (sheet, 0)) != NULL &&
(fd = ms_excel_get_font (sheet, xf->font_idx))) {
printf ("Font: %s %g",
fd->fontname, fd->height/20.0);
if (ms) {
const char *msfn
= mstyle_get_font_name (ms);
printf (" mapped to %s %.6g",
msfn,
mstyle_get_font_size
(ms));
}
printf ("\n");
}
}
#endif
static double
lookup_base_char_width (ExcelSheet *sheet)
{
/*
* There is no such thing as a typical width, but we have to
* do something.
*
* Looks like the width of 'n' is very close to Excel's concept of
* width. For Times in 3 sizes, the average is .45% too low, for
* Helvetica in 3 sizes it is .02% too low.
*
* Widths based on text samples come out lower, but can be used if
* scaled appropriately. Using the sample below, we reduce the
* difference in precision between Times and Helvetica to 0.13%.
*/
MStyle const *ms;
double res;
gboolean def;
/*
* The char width is based on the font in the "Normal" style.
* I'm only guessing that 0 is the right index, but I've been
* right so far.
*/
def = !sheet->wb->XF_cell_records ||
sheet->wb->XF_cell_records->len == 0;
if (!def) {
ms = ms_excel_get_style_from_xf (sheet, 0);
if (!ms)
def = TRUE;
}
if (def)
res = EXCEL_DEFAULT_CHAR_WIDTH;
else {
StyleFont *sf;
double samplewidth, average;
double scaling = 1.2304;
static char *sample;
sf = mstyle_get_font (ms, 1.0);
sample =
"Widths based on text samples come out too low, but "
"can be used if scaled appropriately. Experiments "
"showed that a 2 line sample was very slightly more";
samplewidth = gnome_font_get_width_string
(style_font_gnome_font (sf), sample);
average = samplewidth / strlen (sample);
res = average * scaling;
#ifndef NO_DEBUG_EXCEL
if (ms_excel_read_debug > 2) {
print_font_mapping_debug_info (sheet, ms);
printf ("Character width based on %d character sample:"
" %g - adjusted to %g\n",
strlen (sample), average, res);
}
#endif
}
return res;
}
static double
get_base_char_width (ExcelSheet *sheet)
{
/*
* The char width is based on the font in the "Normal" style.
* This style is actually common to all sheets in the
* workbook, but I find it more robust to treat it as a sheet
* attribute.
*/
if (sheet->base_char_width <= 0)
sheet->base_char_width = lookup_base_char_width (sheet);
return sheet->base_char_width;
}
static double
get_row_height_units (guint16 height)
{
/*
* the height is specified in 1/20 of a point. But we can not
* assume that 1pt = 1pixel. However, what we now print out
* is just 0.5% shorter than theoretical height. The height
* of what Excel prints out varies in mysterious
* ways. Sometimes it is close to theoretical, sometimes it is
* a few % shorter. I don't see any point in correcting for
* the 0.5% until we know the whole story. */
return 1. / 20. * height;
}
static double
get_units_net_of_margins (double units, const ColRowInfo * cri)
{
units -= (cri->margin_a_pt + cri->margin_b_pt);
if (units < 0)
units = 0;
return units;
}
/**
* Parse the cell BIFF tag, and act on it as neccessary
* NB. Microsoft Docs give offsets from start of biff record, subtract 4 their docs.
......@@ -2398,14 +2554,15 @@ ms_excel_read_cell (BiffQuery *q, ExcelSheet *sheet)
if (ms_excel_read_debug > 1)
printf ("Row %d height 0x%x;\n", row+1, height);
#endif
/* FIXME : the height is specified in 1/20 of a point.
* but we can not assume that 1pt = 1pixel.
* MS seems to assume that it is closer to 1point = .75 pixels
* verticaly.
*/
if ((height&0x8000) == 0)
sheet_row_set_height (sheet->gnum_sheet, row,
height/(20 * .75), TRUE);
if ((height&0x8000) == 0) {
double hu = get_row_height_units (height);
/* Subtract margins */
hu = get_units_net_of_margins
(hu,
sheet_row_get_info (sheet->gnum_sheet, row));
sheet_row_set_height_units (sheet->gnum_sheet,
row, hu, TRUE);
}
if (flags & 0x80) {
#ifndef NO_DEBUG_EXCEL
......@@ -2423,13 +2580,11 @@ ms_excel_read_cell (BiffQuery *q, ExcelSheet *sheet)
case BIFF_COLINFO:
{
int lp;
int char_width = 1;
BiffXFData const *xf = NULL;
BiffFontData const *fd = NULL;
double char_width = EXCEL_DEFAULT_CHAR_WIDTH;
double col_width;
guint16 const firstcol = MS_OLE_GET_GUINT16(q->data);
guint16 lastcol = MS_OLE_GET_GUINT16(q->data+2);
guint16 width = MS_OLE_GET_GUINT16(q->data+4);
guint16 const cols_xf = MS_OLE_GET_GUINT16(q->data+6);
guint16 const options = MS_OLE_GET_GUINT16(q->data+8);
gboolean const hidden = (options & 0x0001) ? TRUE : FALSE;
#if 0
......@@ -2446,18 +2601,7 @@ ms_excel_read_cell (BiffQuery *q, ExcelSheet *sheet)
firstcol, lastcol, width/256.0);
}
#endif
/*
* FIXME FIXME FIXME
* 1) As a default 12 seems seems to match the sheet I
* calibrated against.
* 2) the docs say charwidth not height. Assume that
* width = 1.2 * height ?
*/
if ((xf = ms_excel_get_xf (sheet, cols_xf)) != NULL &&
(fd = ms_excel_get_font (sheet, xf->font_idx)))
char_width = 1.2 *fd->height / 20.;
else
char_width = 12.;
char_width = get_base_char_width (sheet);
if (width>>8 == 0) {
if (hidden)
......@@ -2466,24 +2610,21 @@ ms_excel_read_cell (BiffQuery *q, ExcelSheet *sheet)
printf ("FIXME: 0 sized column ???\n");
/* FIXME : Make the magic default col width a define or function somewhere */
width = 62;
col_width = 62;
} else
/* NOTE : Do NOT use *= we need to do the width*char_width before the division */
width = (width * char_width) / 256.;
/* FIXME : the width is specified in points (1/72 of an inch)
* but we can not assume that 1pt = 1pixel.
* MS seems to assume that it is closer to 1 point = .7 pixels
* horizontally. (NOTE : this is different from vertically)
*/
width *= .70;
col_width = (width * char_width) / 256.;
/* Subtract margins */
col_width = get_units_net_of_margins
(col_width,
sheet_col_get_info (sheet->gnum_sheet, firstcol));
/* NOTE : seems like this is inclusive firstcol, inclusive lastcol */
if (lastcol >= SHEET_MAX_COLS)
lastcol = SHEET_MAX_COLS-1;
for (lp = firstcol; lp <= lastcol; ++lp)
sheet_col_set_width (sheet->gnum_sheet, lp,
width);
sheet_col_set_width_units
(sheet->gnum_sheet, lp, col_width);
break;
}
......@@ -2851,11 +2992,62 @@ ms_excel_read_sheet (ExcelSheet *sheet, BiffQuery *q, ExcelWorkbook *wb)
case BIFF_DELTA:
case BIFF_SAVERECALC:
case BIFF_PRINTHEADERS:
case BIFF_DEFAULTROWHEIGHT:
case BIFF_COUNTRY:
case BIFF_WSBOOL:
break;
case BIFF_DEFAULTROWHEIGHT:
{
guint16 const flags = MS_OLE_GET_GUINT16(q->data);
guint16 const height = MS_OLE_GET_GUINT16(q->data+2);
double height_units;
ColRowInfo *cri;
#ifndef NO_DEBUG_EXCEL
if (ms_excel_read_debug > 1) {
printf ("Default row height 0x%x;\n", height);
if (flags & 0x04)
printf (" + extra space above;\n");
if (flags & 0x08)
printf (" + extra space below;\n");
}
#endif
height_units = get_row_height_units (height);
cri = &sheet->gnum_sheet->rows.default_style;
/* Subtract margins */
height_units = get_units_net_of_margins
(height_units,
&sheet->gnum_sheet->rows.default_style);
/* Don't know why, but it's too late now to
just change the default */
sheet_row_set_internal_height
(sheet->gnum_sheet, cri, height_units);
break;
}
case BIFF_DEFCOLWIDTH:
{
guint16 const width = MS_OLE_GET_GUINT16(q->data);
double char_width = EXCEL_DEFAULT_CHAR_WIDTH;
double col_width;
ColRowInfo *cri;
#ifndef NO_DEBUG_EXCEL
if (ms_excel_read_debug > 1) {
printf ("Default column width %d "
"characters\n", width);
}
#endif
char_width = get_base_char_width (sheet);
col_width = width * char_width;
cri = &sheet->gnum_sheet->cols.default_style;
/* Subtract margins */
col_width = get_units_net_of_margins (col_width, cri);
/* Don't know why, but it's too late now to
just change the default */
sheet_col_set_internal_width
(sheet->gnum_sheet, cri, col_width);
break;
}
case BIFF_GUTS:
ms_excel_read_guts (q, sheet);
break;
......@@ -2997,9 +3189,6 @@ ms_excel_read_sheet (ExcelSheet *sheet, BiffQuery *q, ExcelWorkbook *wb)
g_warning ("Duff BIFF_SETUP");
break;
case BIFF_DEFCOLWIDTH:
break;
case BIFF_SCL:
if (q->length == 4) {
/* Zoom stored as an Egyptian fraction */
......
......@@ -20,6 +20,7 @@ typedef struct _ExcelSheet
GHashTable *shared_formulae;
GList *obj_queue;
Range style_optimize;
double base_char_width;
} ExcelSheet;
typedef struct _BiffBoundsheetData
......@@ -108,6 +109,8 @@ typedef struct _ExcelWorkbook
Workbook *gnum_wb;
} ExcelWorkbook;
#define EXCEL_DEFAULT_CHAR_WIDTH 12
extern ExcelSheet * ms_excel_workbook_get_sheet (ExcelWorkbook *wb, guint idx);
extern Sheet* biff_get_externsheet_name (ExcelWorkbook *wb, guint16 idx, gboolean get_first);
extern char* biff_get_text (guint8 const *ptr, guint32 length, guint32 *byte_length);
......
......@@ -36,40 +36,79 @@ extern int ms_excel_read_debug;
#define GR_CHECKBOX_FORMULA 0x14
#define GR_COMMON_OBJ_DATA 0x15
static void
object_anchor_to_position (double points[4], MSObj*obj, Sheet const * sheet)
{
/*
* NOTE: According to docs, distance is expressed as 1/1024 of
* cell dimension. However, this can't be true vertically, at
* least not for Excel 97. We use 256, which seems correct. A
* version issue?
*/
int i;
for (i = 0; i < 4; i++) {
int pos = obj->anchor[i].pos;
int nths = obj->anchor[i].nths;
if (i & 1) { /* odds are rows */
points[i] = sheet_row_get_unit_distance (sheet, pos,
pos + 1);
points[i] *= nths / 256.;
points[i] += sheet_row_get_unit_distance (sheet, 0,
pos);
} else {
points[i] = sheet_col_get_unit_distance (sheet, pos,
pos + 1);
points[i] *= nths / 1024.;
points[i] += sheet_col_get_unit_distance (sheet, 0,
pos);
}
}
#ifndef NO_DEBUG_EXCEL
if (ms_excel_read_debug > 0)
printf ("Anchor position in points"
" left = %g, top = %g, right = %g, bottom = %g;\n",
points[0], points[1], points[2], points[3]);
#endif
}
/*
* Attempt to install an object in supplied work book.
*/
gboolean
ms_obj_realize (MSObj *obj, ExcelWorkbook *wb, ExcelSheet *sheet)
{
int *anchor = NULL;
double position[4];
g_return_val_if_fail (sheet != NULL, TRUE);
if (obj == NULL)
return TRUE;
anchor = obj->anchor;
object_anchor_to_position (position, obj, sheet->gnum_sheet);
switch (obj->gnumeric_type) {
case SHEET_OBJECT_BUTTON :
sheet_object_create_button (sheet->gnum_sheet,
anchor[0], anchor[1],
anchor[2], anchor[3]);
position[0], position[1],
position[2], position[3]);
break;
case SHEET_OBJECT_CHECKBOX :
sheet_object_create_checkbox (sheet->gnum_sheet,
anchor[0], anchor[1],
anchor[2], anchor[3]);
position[0], position[1],
position[2], position[3]);
break;
case SHEET_OBJECT_BOX :
sheet_object_realize (
sheet_object_create_filled (sheet->gnum_sheet,
SHEET_OBJECT_BOX,
anchor[0], anchor[1],
anchor[2], anchor[3],
position[0], position[1],
position[2], position[3],
"white", "black", 1));
break;
......@@ -95,8 +134,8 @@ ms_obj_realize (MSObj *obj, ExcelWorkbook *wb, ExcelSheet *sheet)
g_return_val_if_fail (blip->stream != NULL, FALSE);
g_return_val_if_fail (blip->reproid != NULL, FALSE);
so = sheet_object_container_new (sheet->gnum_sheet,
anchor[0], anchor[1],
anchor[2], anchor[3],
position[0], position[1],
position[2], position[3],
blip->reproid);
if (!sheet_object_bonobo_load (SHEET_OBJECT_BONOBO (so), blip->stream))
g_warning ("Failed to load '%s' from stream",
......@@ -153,58 +192,32 @@ ms_excel_sheet_destroy_objs (ExcelSheet *sheet)
}
gboolean
ms_parse_object_anchor (int anchor[4],
ms_parse_object_anchor (anchor_point anchor[4],
Sheet const * sheet, guint8 const * data)
{
/* Words 0, 4, 8, 12 : The row/col of the corners */
/* Words 2, 6, 10, 14 : distance from cell edge measured in 1/1024 of an inch */
int i;
float const zoom = sheet->last_zoom_factor_used;
/* Words 2, 6, 10, 14 : distance from cell edge */
/* FIXME : How to handle objects not in sheets ?? */
g_return_val_if_fail (sheet != NULL, TRUE);
int i;
for (i = 0; i < 4; ++i) {
guint16 const pos = MS_OLE_GET_GUINT16 (data + 4 * i);
/* FIXME : we are slightly off. Tweak the pixels/inch ratio
* to make this come out on my screen for pic.xls.
* See BIFF_COLINFO or BIFF_ROW for more info
*
* This constant should be made into a std routine somewhere.
*/
float margin = (MS_OLE_GET_GUINT16 (data + 4 * i + 2) / (1024. / 72.));
int tmp;
anchor[i].pos = MS_OLE_GET_GUINT16 (data + 4 * i);
anchor[i].nths = MS_OLE_GET_GUINT16 (data + 4 * i + 2);
if (i&1) { /* odds are rows */
tmp = sheet_row_get_unit_distance (sheet, 0, pos);
margin /= .75;
} else {
tmp = sheet_col_get_unit_distance (sheet, 0, pos);
margin *= .75;
}
#ifndef NO_DEBUG_EXCEL
if (ms_excel_read_debug > 1) {
printf ("%f units (%d pixels) from ",
margin, (int)(margin));
int pos = anchor[i].pos;
printf ("%d/%d cell %s from ",
anchor[i].nths, (i & 1) ? 256 : 1024,
(i & 1) ? "heights" : "widths");
if (i & 1)
printf ("row %d;\n", pos + 1);
else
printf ("col %s (%d);\n", col_name(pos), pos);
}
#endif
margin *= zoom;
margin += tmp;
anchor[i] = (int)margin;
}
#ifndef NO_DEBUG_EXCEL
if (ms_excel_read_debug > 0)
printf ("In pixels left = %d, top = %d, right = %d, bottom =d %d;\n",
anchor[0], anchor[1], anchor[2], anchor[3]);
#endif
return FALSE;
}
......
......@@ -22,10 +22,16 @@
#include "ms-excel-read.h"
typedef struct
{
int pos; /* Cell or row number */
int nths; /* No of 1/1024th, 1/256th */
} anchor_point;
typedef struct
{
/* In pixels */
int anchor[4];
anchor_point anchor[4];
gboolean anchor_set;
int id;
......@@ -40,7 +46,7 @@ typedef struct
} v;
} MSObj;