parse-util.c 39.8 KB
Newer Older
1
/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 3 4 5
/*
 * parse-util.c: Various utility routines to parse or produce
 *     string representations of common reference types.
 *
6
 * Copyright (C) 2000-2007 Jody Goldberg (jody@gnome.org)
Morten Welinder's avatar
Morten Welinder committed
7
 * Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
21
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
22 23
 * USA
 */
24 25
#include <gnumeric-config.h>
#include "gnumeric.h"
26
#include "parse-util.h"
27

28
#include "application.h"
29
#include "workbook.h"
30 31 32
#include "sheet.h"
#include "value.h"
#include "ranges.h"
Jody Goldberg's avatar
Jody Goldberg committed
33 34
#include "cell.h"
#include "expr.h"
35
#include "number-match.h"
36
#include "gnm-format.h"
37
#include "expr-name.h"
38
#include "func.h"
39 40
#include "mstyle.h"
#include "sheet-style.h"
Jody Goldberg's avatar
Jody Goldberg committed
41
/* For std_expr_name_handler: */
42
#include "expr-impl.h"
43
#include "gutils.h"
44
#include <goffice/goffice.h>
45

46
#include <errno.h>
47
#include <stdlib.h>
48
#include <glib.h>
Jody Goldberg's avatar
Jody Goldberg committed
49
#include <string.h>
50

51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
static GnmLexerItem *
gnm_lexer_item_copy (GnmLexerItem *li)
{
	GnmLexerItem *res = g_new (GnmLexerItem, 1);
	*res = *li;
	return res;
}

GType
gnm_lexer_item_get_type (void)
{
	static GType t = 0;

	if (t == 0) {
		t = g_boxed_type_register_static ("GnmLexerItem",
			 (GBoxedCopyFunc)gnm_lexer_item_copy,
			 (GBoxedFreeFunc)g_free);
	}
	return t;
}

72 73 74
static void
col_name_internal (GString *target, int col)
{
Jody Goldberg's avatar
Jody Goldberg committed
75
	static int const steps[] = {
76 77 78 79 80 81 82 83 84 85 86
		26,
		26 * 26,
		26 * 26 * 26,
		26 * 26 * 26 * 26,
		26 * 26 * 26 * 26 * 26,
		26 * 26 * 26 * 26 * 26 * 26,
		INT_MAX
	};
	int i;
	char *dst;

87 88 89 90 91 92
	if (col < 0) {
		/* Invalid column.  */
		g_string_append_printf (target, "[C%d]", col);
		return;
	}

93 94 95 96 97 98 99 100 101
	for (i = 0; col >= steps[i]; i++)
		col -= steps[i];

	g_string_set_size (target, target->len + (i + 1));
	dst = target->str + target->len;
	while (i-- >= 0) {
		*--dst = 'A' + col % 26;
		col /= 26;
	}
102 103
}

104 105 106
char const *
col_name (int col)
{
107 108
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
109
		buffer = g_string_new (NULL);
110 111 112 113 114
	g_string_truncate (buffer, 0);

	col_name_internal (buffer, col);

	return buffer->str;
115 116 117 118 119
}

char const *
cols_name (int start_col, int end_col)
{
120 121
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
122
		buffer = g_string_new (NULL);
123
	g_string_truncate (buffer, 0);
124

125
	col_name_internal (buffer, start_col);
126
	if (start_col != end_col) {
127 128
		g_string_append_c (buffer, ':');
		col_name_internal (buffer, end_col);
129
	}
130 131

	return buffer->str;
132 133 134
}

char const *
135
col_parse (char const *str, GnmSheetSize const *ss,
136
	   int *res, unsigned char *relative)
137
{
138
	char const *ptr, *start = str;
139
	int col = -1;
140
	int max = ss->max_cols;
141

142 143
	if (!(*relative = (*start != '$')))
		start++;
144

145
	for (ptr = start; col < max ; ptr++)
146 147 148 149
		if (('a' <= *ptr && *ptr <= 'z'))
			col = 26 * (col + 1) + (*ptr - 'a');
		else if (('A' <= *ptr && *ptr <= 'Z'))
			col = 26 * (col + 1) + (*ptr - 'A');
150
		else if (ptr != start) {
151 152 153
			*res = col;
			return ptr;
		} else
154 155
			return NULL;
	return NULL;
156 157 158 159
}

/***************************************************************************/

160 161
static void
row_name_internal (GString *target, int row)
162
{
163
	g_string_append_printf (target, "%d", row + 1);
164 165
}

166 167 168
char const *
row_name (int row)
{
169 170
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
171
		buffer = g_string_new (NULL);
172 173 174 175 176
	g_string_truncate (buffer, 0);

	row_name_internal (buffer, row);

	return buffer->str;
177 178 179 180 181
}

char const *
rows_name (int start_row, int end_row)
{
182 183
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
184
		buffer = g_string_new (NULL);
185
	g_string_truncate (buffer, 0);
186

187
	row_name_internal (buffer, start_row);
188
	if (start_row != end_row) {
189 190
		g_string_append_c (buffer, ':');
		row_name_internal (buffer, end_row);
191
	}
192 193

	return buffer->str;
194 195
}

196
char const *
197
row_parse (char const *str, GnmSheetSize const *ss,
198
	   int *res, unsigned char *relative)
199 200
{
	char const *end, *ptr = str;
201
	long int row;
202
	int max = ss->max_rows;
203 204 205 206

	if (!(*relative = (*ptr != '$')))
		ptr++;

207 208 209 210
	/* Initial '0' is not allowed.  */
	if (*ptr <= '0' || *ptr > '9')
		return NULL;

211 212 213 214 215
	/*
	 * Do not allow letters after the row number.  If we did, then
	 * the name "K3P" would lex as the reference K3 followed by the
	 * name "P".
	 */
216
	row = strtol (ptr, (char **)&end, 10);
217 218
	if (ptr != end &&
	    !g_unichar_isalnum (g_utf8_get_char (end)) && *end != '_' &&
219
	    0 < row && row <= max) {
220 221 222
		*res = row - 1;
		return end;
	} else
223
		return NULL;
224 225
}

226
/***************************************************************************/
227

228 229 230 231 232 233 234 235 236 237 238 239
static void
r1c1_add_index (GString *target, char type, int num, unsigned char relative)
{
	if (relative) {
		if (num != 0)
			g_string_append_printf (target, "%c[%d]", type, num);
		else
			g_string_append_c (target, type);
	} else
		g_string_append_printf (target, "%c%d", type, num + 1);
}

240 241 242
static char *
wb_rel_uri (Workbook *wb, Workbook *ref_wb)
{
243 244
	char const *uri = go_doc_get_uri ((GODoc *)wb);
	char const *ref_uri = go_doc_get_uri ((GODoc *)ref_wb);
245 246 247 248 249 250 251 252 253 254
	char *rel_uri = go_url_make_relative (uri, ref_uri);

	if (rel_uri == NULL || rel_uri[0] == '/') {
		g_free (rel_uri);
		return g_strdup (uri);
	}

	return rel_uri;
}

255 256
/**
 * cellref_as_string :
257 258 259
 * @out: #GnmConventionsOut
 * @cell_ref:
 * @no_sheetname:
260 261 262 263 264
 *
 * Returns a string that the caller needs to free containing the A1 format
 * representation of @ref as evaluated at @pp.  @no_sheetname can be used to
 * suppress the addition of the sheetname for non-local references.
 **/
265
void
266
cellref_as_string (GnmConventionsOut *out,
Jody Goldberg's avatar
Jody Goldberg committed
267
		   GnmCellRef const *cell_ref,
268
		   gboolean no_sheetname)
269
{
270
	GString *target = out->accum;
271
	Sheet const *sheet = cell_ref->sheet;
272

273 274
	/* If it is a non-local reference, add the path to the external sheet */
	if (sheet != NULL && !no_sheetname) {
275
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
276 277
			/* For the expression leak printer.  */
			g_string_append (target, "'?'");
278
		else if (NULL == out->pp->wb || sheet->workbook == out->pp->wb)
279 280
			g_string_append (target, sheet->name_quoted);
		else {
281
			char *rel_uri = wb_rel_uri (sheet->workbook, out->pp->wb);
282
			g_string_append_c (target, '[');
283
			g_string_append (target, rel_uri);
284 285
			g_string_append_c (target, ']');
			g_string_append (target, sheet->name_quoted);
286
			g_free (rel_uri);
287
		}
288
		g_string_append_unichar (target, out->convs->sheet_name_sep);
289 290
	}

291
	if (out->convs->r1c1_addresses) { /* R1C1 handler */
292 293 294
		r1c1_add_index (target, 'R', cell_ref->row, cell_ref->row_relative);
		r1c1_add_index (target, 'C', cell_ref->col, cell_ref->col_relative);
	} else {
295
		GnmCellPos pos;
296
		Sheet const *size_sheet = eval_sheet (sheet, out->pp->sheet);
297 298
		GnmSheetSize const *ss =
			gnm_sheet_get_size2 (size_sheet, out->pp->wb);
299

300
		gnm_cellpos_init_cellref_ss (&pos, cell_ref, &out->pp->eval, ss);
301

302
		if (!cell_ref->col_relative)
303
			g_string_append_c (target, '$');
304
		col_name_internal (target, pos.col);
305

306 307 308
		if (!cell_ref->row_relative)
			g_string_append_c (target, '$');
		row_name_internal (target, pos.row);
309
	}
310 311
}

312
/**
313
 * rangeref_as_string :
314 315
 * @out: #GnmConventionsOut
 * @ref: #GnmRangeRef
316 317
 *
 **/
318
void
319
rangeref_as_string (GnmConventionsOut *out, GnmRangeRef const *ref)
320
{
Jody Goldberg's avatar
Jody Goldberg committed
321
	GnmRange r;
322
	GString *target = out->accum;
323
	Sheet *start_sheet, *end_sheet;
324 325 326
	GnmSheetSize const *end_ss;

	gnm_rangeref_normalize_pp (ref, out->pp, &start_sheet, &end_sheet, &r);
327

328
	end_ss = gnm_sheet_get_size2 (end_sheet, out->pp->wb);
329

330
	if (ref->a.sheet) {
331 332
		if (NULL != out->pp->wb && ref->a.sheet->workbook != out->pp->wb) {
			char *rel_uri = wb_rel_uri (ref->a.sheet->workbook, out->pp->wb);
333
			g_string_append_c (target, '[');
334
			g_string_append (target, rel_uri);
335
			g_string_append_c (target, ']');
336
			g_free (rel_uri);
337
		}
338 339
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
			/* For the expression leak printer.  */
340 341 342 343 344 345 346 347
			g_string_append (target, "'?'");
		else if (ref->b.sheet == NULL || ref->a.sheet == ref->b.sheet)
			g_string_append (target, ref->a.sheet->name_quoted);
		else {
			g_string_append (target, ref->a.sheet->name_quoted);
			g_string_append_c (target, ':');
			g_string_append (target, ref->b.sheet->name_quoted);
		}
348
		g_string_append_unichar (target, out->convs->sheet_name_sep);
349 350
	}

351
	if (out->convs->r1c1_addresses) { /* R1C1 handler */
352
		/* be sure to use else if so that a1:iv65535 does not vanish */
353
		if (r.start.col == 0 && r.end.col == end_ss->max_cols - 1) {
354 355 356 357 358 359
			r1c1_add_index (target, 'R', ref->a.row, ref->a.row_relative);
			if (ref->a.row != ref->b.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'R', ref->b.row, ref->b.row_relative);
			}
360
		} else if (r.start.row == 0 && r.end.row == end_ss->max_rows - 1) {
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
			r1c1_add_index (target, 'C', ref->a.col, ref->a.col_relative);
			if (ref->a.col != ref->b.col ||
			    ref->a.col_relative != ref->b.col_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'C', ref->b.col, ref->b.col_relative);
			}
		} else {
			r1c1_add_index (target, 'R', ref->a.row, ref->a.row_relative);
			r1c1_add_index (target, 'C', ref->a.col, ref->a.col_relative);
			if (r.start.col != r.end.col ||
			    ref->a.col_relative != ref->b.col_relative ||
			    r.start.row != r.end.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'R', ref->b.row, ref->b.row_relative);
				r1c1_add_index (target, 'C', ref->b.col, ref->b.col_relative);
			}
		}
379
	} else {
380
		/* be sure to use else if so that a1:iv65535 does not vanish */
381
		if (r.start.col == 0 && r.end.col == end_ss->max_cols - 1) {
382 383 384 385 386 387 388
			if (!ref->a.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.start.row);
			g_string_append_c (target, ':');
			if (!ref->b.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.end.row);
389
		} else if (r.start.row == 0 && r.end.row == end_ss->max_rows - 1) {
390 391 392
			if (!ref->a.col_relative)
				g_string_append_c (target, '$');
			col_name_internal (target, r.start.col);
393
			g_string_append_c (target, ':');
394
			if (!ref->b.col_relative)
395 396
				g_string_append_c (target, '$');
			col_name_internal (target, r.end.col);
397 398
		} else {
			if (!ref->a.col_relative)
399
				g_string_append_c (target, '$');
400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
			col_name_internal (target, r.start.col);
			if (!ref->a.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.start.row);

			if (r.start.col != r.end.col ||
			    ref->a.col_relative != ref->b.col_relative ||
			    r.start.row != r.end.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				if (!ref->b.col_relative)
					g_string_append_c (target, '$');
				col_name_internal (target, r.end.col);
				if (!ref->b.row_relative)
					g_string_append_c (target, '$');
				row_name_internal (target, r.end.row);
			}
417 418 419 420
		}
	}
}

421 422
/**
 * gnm_1_0_rangeref_as_string :
423 424
 * @out: #GnmConventionsOut
 * @ref: #GnmRangeRef
425
 *
426 427 428
 * Simplified variant of rangeref_as_string that old versions of gnumeric can
 * read.  It drops support for full col/row references.  We can remap them on
 * import.
429 430
 *
 * This function also ignores R1C1 settings.
431 432
 **/
void
433
gnm_1_0_rangeref_as_string (GnmConventionsOut *out, GnmRangeRef const *ref)
434 435
{
	GnmRange r;
436
	GString *target = out->accum;
437
	Sheet *start_sheet, *end_sheet;
438

439
	gnm_rangeref_normalize_pp (ref, out->pp, &start_sheet, &end_sheet, &r);
440 441

	if (ref->a.sheet) {
442 443
		if (NULL != out->pp->wb && ref->a.sheet->workbook != out->pp->wb) {
			char *rel_uri = wb_rel_uri (ref->a.sheet->workbook, out->pp->wb);
444
			g_string_append_c (target, '[');
445
			g_string_append (target, rel_uri);
446
			g_string_append_c (target, ']');
447
			g_free (rel_uri);
448
		}
449
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
450 451 452 453 454 455 456 457 458
			/* For the expression leak printer. */
			g_string_append (target, "'?'");
		else if (ref->b.sheet == NULL || ref->a.sheet == ref->b.sheet)
			g_string_append (target, ref->a.sheet->name_quoted);
		else {
			g_string_append (target, ref->a.sheet->name_quoted);
			g_string_append_c (target, ':');
			g_string_append (target, ref->b.sheet->name_quoted);
		}
459
		g_string_append_unichar (target, out->convs->sheet_name_sep);
460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
	}

	if (!ref->a.col_relative)
		g_string_append_c (target, '$');
	col_name_internal (target, r.start.col);
	if (!ref->a.row_relative)
		g_string_append_c (target, '$');
	row_name_internal (target, r.start.row);

	if (r.start.col != r.end.col ||
	    ref->a.col_relative != ref->b.col_relative ||
	    r.start.row != r.end.row ||
	    ref->a.row_relative != ref->b.row_relative) {
		g_string_append_c (target, ':');
		if (!ref->b.col_relative)
			g_string_append_c (target, '$');
		col_name_internal (target, r.end.col);
		if (!ref->b.row_relative)
			g_string_append_c (target, '$');
		row_name_internal (target, r.end.row);
	}
}

483
static char const *
484
cellref_a1_get (GnmCellRef *out, GnmSheetSize const *ss,
485
		char const *in, GnmCellPos const *pos)
486
{
487 488
	int col;
	int row;
489

490 491
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);
492

493
	in = col_parse (in, ss, &col, &out->col_relative);
494
	if (!in)
495
		return NULL;
496

497
	in = row_parse (in, ss, &row, &out->row_relative);
498
	if (!in)
499
		return NULL;
500 501 502

	/* Setup the cell reference information */
	if (out->row_relative)
Jody Goldberg's avatar
Jody Goldberg committed
503
		out->row = row - pos->row;
504 505 506 507
	else
		out->row = row;

	if (out->col_relative)
Jody Goldberg's avatar
Jody Goldberg committed
508
		out->col = col - pos->col;
509 510 511 512 513
	else
		out->col = col;

	out->sheet = NULL;

514
	return in;
515 516
}

517 518
/* skip first character (which was R or C) */
static char const *
519
r1c1_get_index (char const *str, GnmSheetSize const *ss,
520
		int *num, unsigned char *relative, gboolean is_col)
521
{
522
	char *end;
523 524 525
	long l;
	int max = is_col ? ss->max_cols : ss->max_rows;

526 527
	if (str[0] == '\0')
		return NULL;
528

529
	str++;
530 531
	*relative = (*str == '[');
	if (*relative)
532
		str++;
533 534 535 536 537 538
	else if (*str == '-' || *str == '+') { /* handle RC-10 as RC followed by -10 */
		*relative = TRUE;
		*num = 0;
		return str;
	}

539
	errno = 0;
540 541 542
	*num = l = strtol (str, &end, 10);
	if (errno == ERANGE || l <= G_MININT || l > G_MAXINT) {
		/* Note: this includes G_MININT to avoid negation overflow.  */
543
		return NULL;
544
	}
545 546 547 548 549 550 551 552
	if (str == end) {
		if (*relative)
			return NULL;
		*relative = TRUE;
		*num = 0;
	} else if (*relative) {
		if (*end != ']')
			return NULL;
553 554 555
		*num = (*num > 0
			? *num % max
			: -(-*num % max));
556 557
		return end + 1;
	} else {
558
		if (*num <= 0 || *num > max)
559 560
			return NULL;
		(*num)--;
561
	}
562
	return end;
563 564
}

565
static char const *
566
cellref_r1c1_get (GnmCellRef *out, GnmSheetSize const *ss,
567
		  char const *in, GnmCellPos const *pos)
568 569
{
	out->sheet = NULL;
570
	if (*in != 'R' && *in != 'r')
571
		return NULL;
572
	if (NULL == (in = r1c1_get_index (in, ss,
573 574
					  &out->row, &out->row_relative,
					  FALSE)))
575
		return NULL;
576
	if (*in != 'C' && *in != 'c')
577
		return NULL;
578
	if (NULL == (in = r1c1_get_index (in, ss,
579 580
					  &out->col, &out->col_relative,
					  TRUE)))
581 582 583 584
		return NULL;
	if (g_ascii_isalpha (*in))
		return NULL;
	return in;
585 586 587
}

/**
588
 * cellref_parse:
Jody Goldberg's avatar
Jody Goldberg committed
589
 * @out: destination GnmCellRef
590
 * @in: reference description text, no leading
591
 *      whitespace allowed.
Morten Welinder's avatar
Morten Welinder committed
592
 * @pos:
593
 *
594 595
 * Converts the char * representation of a Cell reference into
 * an internal representation.
596
 *
Morten Welinder's avatar
Morten Welinder committed
597
 * Return value: a pointer to the character following the cellref.
598
 **/
599
char const *
600
cellref_parse (GnmCellRef *out, GnmSheetSize const *ss,
601
	       char const *in, GnmCellPos const *pos)
602
{
603
	char const *res;
Morten Welinder's avatar
Morten Welinder committed
604

605 606 607
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);

608
	res = cellref_a1_get (out, ss, in, pos);
609 610
	if (res != NULL)
		return res;
611
	return cellref_r1c1_get (out, ss, in, pos);
612 613 614 615
}

/****************************************************************************/

616 617
static char const *
cell_coord_name2 (int col, int row, gboolean r1c1)
618
{
619
	static GString *buffer = NULL;
620 621 622
	if (buffer)
		g_string_truncate (buffer, 0);
	else
Morten Welinder's avatar
Morten Welinder committed
623
		buffer = g_string_new (NULL);
624

625 626 627 628 629 630 631
	if (r1c1) {
		r1c1_add_index (buffer, 'R', row, FALSE);
		r1c1_add_index (buffer, 'C', col, FALSE);
	} else {
		col_name_internal (buffer, col);
		row_name_internal (buffer, row);
	}
632 633

	return buffer->str;
634
}
635

636 637 638 639 640 641
char const *
cell_coord_name (int col, int row)
{
	return cell_coord_name2 (col, row, FALSE);
}

642
char const *
Jody Goldberg's avatar
Jody Goldberg committed
643
cellpos_as_string (GnmCellPos const *pos)
644 645 646 647 648 649
{
	g_return_val_if_fail (pos != NULL, "ERROR");

	return cell_coord_name (pos->col, pos->row);
}

650 651 652 653 654 655 656
char const *
parsepos_as_string (GnmParsePos const *pp)
{
	g_return_val_if_fail (pp != NULL, "ERROR");

	return cell_coord_name2 (pp->eval.col,
				 pp->eval.row,
657
				 pp->sheet && pp->sheet->convs->r1c1_addresses);
658 659
}

660
char const *
661
cell_name (GnmCell const *cell)
662 663 664
{
	g_return_val_if_fail (cell != NULL, "ERROR");

665 666
	return cell_coord_name2 (cell->pos.col,
				 cell->pos.row,
667
				 cell->base.sheet->convs->r1c1_addresses);
668 669 670
}

/**
671 672 673 674
 * cellpos_parse:
 * @cell_str:   a string representation of a cell name.
 * @ss:          #GnmSheetSize
 * @res:         result
675
 * @strict:      if this is TRUE, then parsing stops at possible errors,
Morten Welinder's avatar
Morten Welinder committed
676 677
 *               otherwise an attempt is made to return cell names with
 *               trailing garbage.
678
 *
679
 * Return value: pointer to following char on success, NULL on failure.
Morten Welinder's avatar
Morten Welinder committed
680
 * (In the strict case, that would be a pointer to the \0 or NULL.)
681
 */
682
char const *
683
cellpos_parse (char const *cell_str, GnmSheetSize const *ss,
684
	       GnmCellPos *res, gboolean strict)
685
{
686
	unsigned char dummy_relative;
687

688
	cell_str = col_parse (cell_str, ss, &res->col, &dummy_relative);
689
	if (!cell_str)
690
		return NULL;
691

692
	cell_str = row_parse (cell_str, ss, &res->row, &dummy_relative);
693
	if (!cell_str)
694
		return NULL;
695

Jody Goldberg's avatar
Jody Goldberg committed
696
	if (*cell_str != 0 && strict)
697
		return NULL;
698

699
	return cell_str;
700 701
}

702
/**
703
 * gnm_expr_char_start_p:
704 705 706 707 708 709 710 711 712 713 714
 *
 * Can the supplied string be an expression ?  It does not guarantee that it is,
 * however, it is possible.  If it is possible it strips off any header
 * characters that are not relevant.
 *
 * NOTE : things like -1,234 will match
 */
char const *
gnm_expr_char_start_p (char const * c)
{
	char c0;
715
	int N = 1;
716 717 718 719 720 721

	if (NULL == c)
		return NULL;

	c0 = *c;

722 723 724 725
	if (c0 == '=' || c0 == '@' || c0 == '+' || c0 == '-')
		while (c[N] == ' ')
			N++;

726
	if (c0 == '=' || c0 == '@' || (c0 == '+' && c[1] == 0))
727
		return c + N;
728 729 730 731 732 733 734 735 736 737 738 739 740 741

	if ((c0 == '-' || c0 == '+') && c0 != c[1]) {
		char *end;

		/*
		 * Ok, we have a string that
		 * 1. starts with a sign
		 * 2. does not start with the sign repeated (think --------)
		 * 3. is more than one character
		 *
		 * Now we check whether we have a number.  We don't want
		 * numbers to be treated as formulae.  FIXME: this really
		 * just checks for C-syntax numbers.
		 */
742
		(void) gnm_strto (c, &end);
743
		if (errno || *end != 0 || end == c)
744
			return (c0 == '+') ? c + N : c;
745 746 747 748 749
		/* Otherwise, it's a number.  */
	}
	return NULL;
}

750
/**
751
 * parse_text_value_or_expr:
752
 * @pos: If the string looks like an expression parse it at this location.
753
 * @text: The text to be parsed.
754
 * @val: Returns a GnmValue* if the text was a value, otherwise NULL.
755
 * @texpr: Returns a GnmExprTop* if the text was an expression, otherwise NULL.
756
 *
757 758
 * Utility routine to parse a string and convert it into an expression or value.
 *
Jody Goldberg's avatar
Jody Goldberg committed
759
 * If there is a parse failure for an expression an error GnmValue with the syntax
760 761
 * error is returned.
 */
762
void
Morten Welinder's avatar
Morten Welinder committed
763
parse_text_value_or_expr (GnmParsePos const *pos, char const *text,
764
			  GnmValue **val, GnmExprTop const **texpr)
765
{
766
	char const *expr_start;
767 768 769 770
	GODateConventions const *date_conv;
	GOFormat const *cur_fmt;
	GOFormat const *cell_fmt;
	GnmStyle const *cell_style;
771

772
	*texpr = NULL;
773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
	*val = NULL;

	/* Determine context information.  */
	date_conv =
		pos->sheet
		? workbook_date_conv (pos->sheet->workbook)
		: (pos->wb
		   ? workbook_date_conv (pos->wb)
		   : NULL);
	cell_style = pos->sheet
		? sheet_style_get (pos->sheet, pos->eval.col, pos->eval.row)
		: NULL;
	cur_fmt = cell_fmt = cell_style ? gnm_style_get_format (cell_style) : NULL;
	if (cell_fmt && go_format_is_general (cell_fmt)) {
		GnmCell const *cell = pos->sheet
			? sheet_cell_get (pos->sheet, pos->eval.col, pos->eval.row)
			: NULL;
		if (cell && cell->value && VALUE_FMT (cell->value))
			cur_fmt = VALUE_FMT (cell->value);
	}
793

794
	/* Does it match any formats?  */
795
	*val = format_match (text, cur_fmt, date_conv);
796 797 798 799 800
	if (*val != NULL) {
		GOFormat const *val_fmt = VALUE_FMT (*val);
		/* Avoid value formats we don't need.  */
		if (val_fmt && go_format_eq (cell_fmt, val_fmt))
			value_set_fmt (*val, NULL);
801
		return;
802
	}
803

804
	/* If it does not match known formats, see if it is an expression */
805
	expr_start = gnm_expr_char_start_p (text);
806
	if (NULL != expr_start && *expr_start) {
807
		*texpr = gnm_expr_parse_str (expr_start, pos,
808
			GNM_EXPR_PARSE_DEFAULT, NULL, NULL);
809
		if (*texpr != NULL)
810
			return;
811 812 813 814
	}

	/* Fall back on string */
	*val = value_new_string (text);
815
}
816

Morten Welinder's avatar
Morten Welinder committed
817 818
GnmParseError *
parse_error_init (GnmParseError *pe)
819
{
820 821 822
	pe->err		= NULL;
	pe->begin_char	= 0;
	pe->end_char	= 0;
823

824 825 826 827
	return pe;
}

void
Morten Welinder's avatar
Morten Welinder committed
828
parse_error_free (GnmParseError *pe)
829
{
830 831 832
	if (pe->err != NULL) {
		g_error_free (pe->err);
		pe->err = NULL;
833 834
	}
}
835

836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858
static GnmParseError *
gnm_parse_error_copy (GnmParseError *pe)
{
	GnmParseError *res = g_new (GnmParseError, 1);
	res->begin_char = pe->begin_char;
	res->end_char = pe->end_char;
	res->err = (pe->err)? g_error_copy (pe->err): NULL;
	return res;
}

GType
gnm_parse_error_get_type (void)
{
	static GType t = 0;

	if (t == 0) {
		t = g_boxed_type_register_static ("GnmParseError",
			 (GBoxedCopyFunc)gnm_parse_error_copy,
			 (GBoxedFreeFunc)parse_error_free);
	}
	return t;
}

859
/***************************************************************************/
860

861
static char const *
862
check_quoted (char const *start, int *num_escapes)
863
{
864
	char const *str = start;
865
	if (*str == '\'' || *str == '\"') {
866
		char const quote = *str++;
867 868 869
		*num_escapes = 0;
		for (; *str && *str != quote; str = g_utf8_next_char (str))
			if (*str == '\\' && str[1]) {
870
				str++;
871 872
				(*num_escapes)++;
			}
873 874
		if (*str)
			return str+1;
875 876
	} else
		*num_escapes = -1;
877
	return start;
878
}
879

880 881
static void
unquote (char *dst, char const *src, int n)
882
{
883
	while (n-- > 0)
884
		if (*src == '\\' && src[1]) {
885 886 887 888 889
			int l = g_utf8_skip [*(guchar *)(++src)];
			strncpy (dst, src, l);
			dst += l;
			src += l;
			n -= l;
890 891 892 893
		} else
			*dst++ = *src++;
	*dst = 0;
}
894

895 896
/**
 * wbref_parse :
897 898 899
 * @convs: #GnmConventions const
 * @start:
 * @wb:
900 901 902 903 904
 *
 * Returns : NULL if there is a valid workbook name but it is unknown.
 *           If the string is a valid workbook known name it returns a pointer
 *           the end of the name.
 *           Otherwise returns @start and does not modify @wb.
905
 **/
906
static char const *
907 908
wbref_parse (GnmConventions const *convs,
	     char const *start, Workbook **wb, Workbook *ref_wb)
909 910 911
{
	/* Is this an external reference ? */
	if (*start == '[') {
912 913
		Workbook *tmp_wb;

914
		int num_escapes;
915
		char const *end = check_quoted (start+1, &num_escapes);
916 917
		char *name;

918
		if (end == start+1) {
919 920 921
			end = strchr (start, ']');
			if (end == NULL)
				return start;
922
		}
923
		if (*end != ']')
924
			return start;
925

926 927 928 929
		if (num_escapes < 0)
			name = g_strndup (start + 1, end - start - 1);
		else {
			name = g_malloc (1 + end - start - 2);
930
			unquote (name, start+2, end-start-2);
931
		}
932

933
		tmp_wb = (*convs->input.external_wb) (convs, ref_wb, name);
934
		g_free (name);
935 936
		if (tmp_wb == NULL)
			return NULL;
937

938
		*wb = tmp_wb;
939 940 941 942 943 944
		return end + 1;
	}

	return start;
}

945 946
/**
 * sheetref_parse :
947 948 949 950 951
 * @convs:
 * @start:
 * @sheet:
 * @wb:
 * @allow_3d:
952 953
 *
 * Returns : NULL if there is a valid sheet name but it is unknown.
954
 *           If the string is a valid sheet name it returns a pointer
955 956 957
 *           the end of the name.
 *           Otherwise returns @start and does not modify @sheet.
 **/
958 959 960
static char const *
sheetref_parse (GnmConventions const *convs,
		char const *start, Sheet **sheet, Workbook const *wb,
961
		gboolean allow_3d)
962
{
963
	GString *sheet_name;
Jody Goldberg's avatar
Jody Goldberg committed
964
	char const *end;
965 966

	*sheet = NULL;
967 968
	if (*start == '\'' || *start == '"') {
		sheet_name = g_string_new (NULL);
Jody Goldberg's avatar
Jody Goldberg committed
969
		end = go_strunescape (sheet_name, start);
970 971 972 973 974
		if (end == NULL) {
			g_string_free (sheet_name, TRUE);
			return start;
		}
	} else {
975 976 977 978 979 980 981 982 983 984 985 986
		gboolean only_digits = TRUE;
		end = start;

		/*
		 * Valid: Normal!a1
		 * Valid: x.y!a1
		 * Invalid: .y!a1
		 *
		 * Some names starting with digits are actually valid, but
		 * unparse quoted. Things are quite tricky: most sheet names
		 * starting with a digit are ok, but not those starting with
		 * "[0-9]*\." or "[0-9]+[eE]".
987
		 *
988 989 990 991 992 993 994 995
		 * Valid: 42!a1
		 * Valid: 4x!a1
		 * Invalid: 1.!a1
		 * Invalid: 1e!a1
		 */

		while (1) {
			gunichar uc = g_utf8_get_char (end);
996
			if (g_unichar_isalpha (uc) || uc == '_') {
997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
				if (only_digits && end != start &&
				    (uc == 'e' || uc == 'E')) {
					end = start;
					break;
				}
				only_digits = FALSE;
				end = g_utf8_next_char (end);
			} else if (g_unichar_isdigit (uc)) {
				end = g_utf8_next_char (end);
			} else if (uc == '.') {
				/* Valid, except after only digits.  */
				if (only_digits) {
					end = start;
					break;
				}
				end++;
1013
			} else
1014 1015
				break;
		}
1016

1017 1018
		if (*end != '!' && (!allow_3d || *end != ':'))
			return start;
1019

1020 1021
		sheet_name = g_string_new_len (start, end - start);
	}
1022

1023 1024 1025
	*sheet = workbook_sheet_by_name (wb, sheet_name->str);
	if (*sheet == NULL)
		end = start;
1026

1027 1028
	g_string_free (sheet_name, TRUE);
	return end;
1029 1030
}

1031 1032 1033 1034
static char const *
r1c1_rangeref_parse (GnmRangeRef *res, char const *ptr, GnmParsePos const *pp)
{
	char const *tmp;
1035 1036 1037 1038 1039 1040
	GnmSheetSize const *a_ss, *b_ss;
	Sheet const *a_sheet, *b_sheet;

	a_sheet = eval_sheet (res->a.sheet, pp->sheet);
	b_sheet = eval_sheet (res->b.sheet, a_sheet);

1041 1042
	a_ss = gnm_sheet_get_size2 (a_sheet, pp->wb);
	b_ss = gnm_sheet_get_size2 (b_sheet, pp->wb);
1043 1044

	if (*ptr == 'R' || *ptr == 'r') {
1045
		ptr = r1c1_get_index (ptr, a_ss,
1046 1047 1048
				      &res->a.row, &res->a.row_relative,
				      FALSE);
		if (!ptr)
1049
			return NULL;
1050 1051 1052 1053 1054 1055 1056
		if (*ptr != 'C' && *ptr != 'c') {
			if (g_ascii_isalpha (*ptr))
				return NULL;
			/* full row R# */
			res->a.col_relative = FALSE;
			res->a.col = 0;
			res->b = res->a;
1057
			res->b.col = a_ss->max_cols - 1;
1058 1059
			if (ptr[0] != ':' || (ptr[1] != 'R' && ptr[1] != 'r'))
				return ptr;
1060
			tmp = r1c1_get_index (ptr+1, a_ss,
1061 1062 1063
					      &res->b.row, &res->b.row_relative,
					      FALSE);
			if (!tmp)
1064 1065
				return ptr; /* fallback to just the initial R */
			return tmp;
1066
		} else {
1067
			ptr = r1c1_get_index (ptr, a_ss,
1068 1069 1070 1071 1072
					      &res->a.col, &res->a.col_relative,
					      TRUE);
			if (!ptr)
				return NULL;
		}
1073 1074 1075

		res->b = res->a;
		if (ptr[0] != ':' || (ptr[1] != 'R' && ptr[1] != 'r') ||
1076
		    NULL == (tmp = r1c1_get_index (ptr+1, b_ss,
1077
						   &res->b.row, &res->b.row_relative, FALSE)) ||
1078
		    (*tmp != 'C' && *tmp != 'c') ||
1079
		    NULL == (tmp = r1c1_get_index (tmp, b_ss,
1080
						   &res->b.col, &res->b.col_relative, FALSE)))
1081 1082
			return ptr;
		return tmp;
1083
	} else if (*ptr == 'C' || *ptr == 'c') {
1084
		if (NULL == (ptr = r1c1_get_index (ptr, a_ss,
1085
						   &res->a.col, &res->a.col_relative, TRUE)))
1086
			return NULL;
1087 1088 1089 1090
		if (g_ascii_isalpha (*ptr))
			return NULL;
		 /* full col C[#] */
		res->a.row_relative = FALSE;
1091
		res->a.row = 0;
1092
		res->b = res->a;
1093
		res->b.row = b_ss->max_rows - 1;
1094 1095
		if (ptr[0] != ':' || (ptr[1] != 'C' && ptr[1] != 'c'))
			return ptr;
1096
		tmp = r1c1_get_index (ptr, b_ss,
1097 1098 1099
				      &res->b.col, &res->b.col_relative,
				      TRUE);
		if (!tmp)
1100 1101 1102 1103 1104 1105 1106
			return ptr; /* fallback to just the initial C */
		return tmp;
	}

	return NULL;
}

1107 1108
/**
 * rangeref_parse :
1109 1110 1111
 * @res: where to store the result
 * @start: the start of the string to parse
 * @pp: the location to parse relative to
1112
 * @convs: #GnmConventions
1113
 *
Morten Welinder's avatar
Morten Welinder committed
1114
 * Returns a pointer to the first invalid character.
1115 1116 1117
 * If the result != @start then @res is valid.
 **/
char const *
1118
rangeref_parse (GnmRangeRef *res, char const *start, GnmParsePos const *pp,
1119
		GnmConventions const *convs)
1120
{
1121
	char const *ptr = start, *start_sheet, *start_wb, *tmp1, *tmp2;
1122
	Workbook *wb;
1123
	Workbook *ref_wb;
1124
	Sheet *a_sheet, *b_sheet;
1125
	GnmSheetSize const *a_ss, *b_ss;
1126 1127 1128 1129 1130

	g_return_val_if_fail (start != NULL, start);
	g_return_val_if_fail (pp != NULL, start);

	wb = pp->wb;
1131
	ref_wb = wb ? wb : pp->sheet->workbook;
1132
	start_wb = start;
1133
	start_sheet = wbref_parse (convs, start, &wb, ref_wb);
1134 1135
	if (start_sheet == NULL)
		return start; /* TODO error unknown workbook */
1136
	ptr = sheetref_parse (convs, start_sheet, &res->a.sheet, wb, TRUE);
1137
	if (ptr == NULL)
1138
		return start; /* TODO error unknown sheet */
1139
	if (ptr != start_sheet) {
1140 1141
		const char *ref;

1142
		if (*ptr == ':') { /* 3d ref */
1143
			ptr = sheetref_parse (convs, ptr+1, &res->b.sheet, wb, FALSE);
1144 1145 1146 1147 1148
			if (ptr == NULL)
				return start; /* TODO error unknown sheet */
		} else
			res->b.sheet = NULL;

1149
		if (*ptr++ != '!')
Jody Goldberg's avatar