parse-util.c 37.2 KB
Newer Older
1
/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 3 4 5
/*
 * parse-util.c: Various utility routines to parse or produce
 *     string representations of common reference types.
 *
6
 * Copyright (C) 2000-2007 Jody Goldberg (jody@gnome.org)
Morten Welinder's avatar
Morten Welinder committed
7
 * Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
J.H.M. Dassen (Ray)'s avatar
J.H.M. Dassen (Ray) committed
21
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
22 23
 * USA
 */
24 25
#include <gnumeric-config.h>
#include "gnumeric.h"
26
#include "parse-util.h"
27

28
#include "application.h"
29
#include "workbook.h"
30 31 32
#include "sheet.h"
#include "value.h"
#include "ranges.h"
Jody Goldberg's avatar
Jody Goldberg committed
33 34
#include "cell.h"
#include "expr.h"
35
#include "number-match.h"
36
#include "gnm-format.h"
37
#include "expr-name.h"
38
#include "func.h"
Jody Goldberg's avatar
Jody Goldberg committed
39
/* For std_expr_name_handler: */
40
#include "expr-impl.h"
41
#include "gutils.h"
42
#include <goffice/goffice.h>
43

44
#include <errno.h>
45
#include <stdlib.h>
46
#include <glib.h>
Jody Goldberg's avatar
Jody Goldberg committed
47
#include <string.h>
48

49 50 51
static void
col_name_internal (GString *target, int col)
{
Jody Goldberg's avatar
Jody Goldberg committed
52
	static int const steps[] = {
53 54 55 56 57 58 59 60 61 62 63
		26,
		26 * 26,
		26 * 26 * 26,
		26 * 26 * 26 * 26,
		26 * 26 * 26 * 26 * 26,
		26 * 26 * 26 * 26 * 26 * 26,
		INT_MAX
	};
	int i;
	char *dst;

64 65 66 67 68 69
	if (col < 0) {
		/* Invalid column.  */
		g_string_append_printf (target, "[C%d]", col);
		return;
	}

70 71 72 73 74 75 76 77 78
	for (i = 0; col >= steps[i]; i++)
		col -= steps[i];

	g_string_set_size (target, target->len + (i + 1));
	dst = target->str + target->len;
	while (i-- >= 0) {
		*--dst = 'A' + col % 26;
		col /= 26;
	}
79 80
}

81 82 83
char const *
col_name (int col)
{
84 85
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
86
		buffer = g_string_new (NULL);
87 88 89 90 91
	g_string_truncate (buffer, 0);

	col_name_internal (buffer, col);

	return buffer->str;
92 93 94 95 96
}

char const *
cols_name (int start_col, int end_col)
{
97 98
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
99
		buffer = g_string_new (NULL);
100
	g_string_truncate (buffer, 0);
101

102
	col_name_internal (buffer, start_col);
103
	if (start_col != end_col) {
104 105
		g_string_append_c (buffer, ':');
		col_name_internal (buffer, end_col);
106
	}
107 108

	return buffer->str;
109 110 111
}

char const *
112
col_parse (char const *str, GnmSheetSize const *ss,
113
	   int *res, unsigned char *relative)
114
{
115
	char const *ptr, *start = str;
116
	int col = -1;
117
	int max = ss->max_cols;
118

119 120
	if (!(*relative = (*start != '$')))
		start++;
121

122
	for (ptr = start; col < max ; ptr++)
123 124 125 126
		if (('a' <= *ptr && *ptr <= 'z'))
			col = 26 * (col + 1) + (*ptr - 'a');
		else if (('A' <= *ptr && *ptr <= 'Z'))
			col = 26 * (col + 1) + (*ptr - 'A');
127
		else if (ptr != start) {
128 129 130
			*res = col;
			return ptr;
		} else
131 132
			return NULL;
	return NULL;
133 134 135 136
}

/***************************************************************************/

137 138
static void
row_name_internal (GString *target, int row)
139
{
140
	g_string_append_printf (target, "%d", row + 1);
141 142
}

143 144 145
char const *
row_name (int row)
{
146 147
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
148
		buffer = g_string_new (NULL);
149 150 151 152 153
	g_string_truncate (buffer, 0);

	row_name_internal (buffer, row);

	return buffer->str;
154 155 156 157 158
}

char const *
rows_name (int start_row, int end_row)
{
159 160
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
161
		buffer = g_string_new (NULL);
162
	g_string_truncate (buffer, 0);
163

164
	row_name_internal (buffer, start_row);
165
	if (start_row != end_row) {
166 167
		g_string_append_c (buffer, ':');
		row_name_internal (buffer, end_row);
168
	}
169 170

	return buffer->str;
171 172
}

173
char const *
174
row_parse (char const *str, GnmSheetSize const *ss,
175
	   int *res, unsigned char *relative)
176 177
{
	char const *end, *ptr = str;
178
	long int row;
179
	int max = ss->max_rows;
180 181 182 183

	if (!(*relative = (*ptr != '$')))
		ptr++;

184 185 186 187
	/* Initial '0' is not allowed.  */
	if (*ptr <= '0' || *ptr > '9')
		return NULL;

188 189 190 191 192
	/*
	 * Do not allow letters after the row number.  If we did, then
	 * the name "K3P" would lex as the reference K3 followed by the
	 * name "P".
	 */
193
	row = strtol (ptr, (char **)&end, 10);
194 195
	if (ptr != end &&
	    !g_unichar_isalnum (g_utf8_get_char (end)) && *end != '_' &&
196
	    0 < row && row <= max) {
197 198 199
		*res = row - 1;
		return end;
	} else
200
		return NULL;
201 202
}

203
/***************************************************************************/
204

205 206 207 208 209 210 211 212 213 214 215 216
static void
r1c1_add_index (GString *target, char type, int num, unsigned char relative)
{
	if (relative) {
		if (num != 0)
			g_string_append_printf (target, "%c[%d]", type, num);
		else
			g_string_append_c (target, type);
	} else
		g_string_append_printf (target, "%c%d", type, num + 1);
}

217 218 219
static char *
wb_rel_uri (Workbook *wb, Workbook *ref_wb)
{
220 221
	char const *uri = go_doc_get_uri ((GODoc *)wb);
	char const *ref_uri = go_doc_get_uri ((GODoc *)ref_wb);
222 223 224 225 226 227 228 229 230 231
	char *rel_uri = go_url_make_relative (uri, ref_uri);

	if (rel_uri == NULL || rel_uri[0] == '/') {
		g_free (rel_uri);
		return g_strdup (uri);
	}

	return rel_uri;
}

232 233
/**
 * cellref_as_string :
234
 * @out : #GnmConventionsOut
235 236 237 238 239 240 241
 * @ref :
 * @no_sheetname :
 *
 * Returns a string that the caller needs to free containing the A1 format
 * representation of @ref as evaluated at @pp.  @no_sheetname can be used to
 * suppress the addition of the sheetname for non-local references.
 **/
242
void
243
cellref_as_string (GnmConventionsOut *out,
Jody Goldberg's avatar
Jody Goldberg committed
244
		   GnmCellRef const *cell_ref,
245
		   gboolean no_sheetname)
246
{
247
	GString *target = out->accum;
248
	Sheet const *sheet = cell_ref->sheet;
249

250 251
	/* If it is a non-local reference, add the path to the external sheet */
	if (sheet != NULL && !no_sheetname) {
252
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
253 254
			/* For the expression leak printer.  */
			g_string_append (target, "'?'");
255
		else if (NULL == out->pp->wb || sheet->workbook == out->pp->wb)
256 257
			g_string_append (target, sheet->name_quoted);
		else {
258
			char *rel_uri = wb_rel_uri (sheet->workbook, out->pp->wb);
259
			g_string_append_c (target, '[');
260
			g_string_append (target, rel_uri);
261 262
			g_string_append_c (target, ']');
			g_string_append (target, sheet->name_quoted);
263
			g_free (rel_uri);
264
		}
265
		g_string_append_unichar (target, out->convs->sheet_name_sep);
266 267
	}

268
	if (out->convs->r1c1_addresses) { /* R1C1 handler */
269 270 271
		r1c1_add_index (target, 'R', cell_ref->row, cell_ref->row_relative);
		r1c1_add_index (target, 'C', cell_ref->col, cell_ref->col_relative);
	} else {
272
		GnmCellPos pos;
273
		Sheet const *size_sheet = eval_sheet (sheet, out->pp->sheet);
274 275
		GnmSheetSize const *ss =
			gnm_sheet_get_size2 (size_sheet, out->pp->wb);
276

277
		gnm_cellpos_init_cellref_ss (&pos, cell_ref, &out->pp->eval, ss);
278

279
		if (!cell_ref->col_relative)
280
			g_string_append_c (target, '$');
281
		col_name_internal (target, pos.col);
282

283 284 285
		if (!cell_ref->row_relative)
			g_string_append_c (target, '$');
		row_name_internal (target, pos.row);
286
	}
287 288
}

289
/**
290
 * rangeref_as_string :
291 292
 * @out : #GnmConventionsOut
 * @ref : #GnmRangeRef
293 294
 *
 **/
295
void
296
rangeref_as_string (GnmConventionsOut *out, GnmRangeRef const *ref)
297
{
Jody Goldberg's avatar
Jody Goldberg committed
298
	GnmRange r;
299
	GString *target = out->accum;
300
	Sheet *start_sheet, *end_sheet;
301 302 303
	GnmSheetSize const *end_ss;

	gnm_rangeref_normalize_pp (ref, out->pp, &start_sheet, &end_sheet, &r);
304

305
	end_ss = gnm_sheet_get_size2 (end_sheet, out->pp->wb);
306

307
	if (ref->a.sheet) {
308 309
		if (NULL != out->pp->wb && ref->a.sheet->workbook != out->pp->wb) {
			char *rel_uri = wb_rel_uri (ref->a.sheet->workbook, out->pp->wb);
310
			g_string_append_c (target, '[');
311
			g_string_append (target, rel_uri);
312
			g_string_append_c (target, ']');
313
			g_free (rel_uri);
314
		}
315 316
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
			/* For the expression leak printer.  */
317 318 319 320 321 322 323 324
			g_string_append (target, "'?'");
		else if (ref->b.sheet == NULL || ref->a.sheet == ref->b.sheet)
			g_string_append (target, ref->a.sheet->name_quoted);
		else {
			g_string_append (target, ref->a.sheet->name_quoted);
			g_string_append_c (target, ':');
			g_string_append (target, ref->b.sheet->name_quoted);
		}
325
		g_string_append_unichar (target, out->convs->sheet_name_sep);
326 327
	}

328
	if (out->convs->r1c1_addresses) { /* R1C1 handler */
329
		/* be sure to use else if so that a1:iv65535 does not vanish */
330
		if (r.start.col == 0 && r.end.col == end_ss->max_cols - 1) {
331 332 333 334 335 336
			r1c1_add_index (target, 'R', ref->a.row, ref->a.row_relative);
			if (ref->a.row != ref->b.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'R', ref->b.row, ref->b.row_relative);
			}
337
		} else if (r.start.row == 0 && r.end.row == end_ss->max_rows - 1) {
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
			r1c1_add_index (target, 'C', ref->a.col, ref->a.col_relative);
			if (ref->a.col != ref->b.col ||
			    ref->a.col_relative != ref->b.col_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'C', ref->b.col, ref->b.col_relative);
			}
		} else {
			r1c1_add_index (target, 'R', ref->a.row, ref->a.row_relative);
			r1c1_add_index (target, 'C', ref->a.col, ref->a.col_relative);
			if (r.start.col != r.end.col ||
			    ref->a.col_relative != ref->b.col_relative ||
			    r.start.row != r.end.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'R', ref->b.row, ref->b.row_relative);
				r1c1_add_index (target, 'C', ref->b.col, ref->b.col_relative);
			}
		}
356
	} else {
357
		/* be sure to use else if so that a1:iv65535 does not vanish */
358
		if (r.start.col == 0 && r.end.col == end_ss->max_cols - 1) {
359 360 361 362 363 364 365
			if (!ref->a.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.start.row);
			g_string_append_c (target, ':');
			if (!ref->b.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.end.row);
366
		} else if (r.start.row == 0 && r.end.row == end_ss->max_rows - 1) {
367 368 369
			if (!ref->a.col_relative)
				g_string_append_c (target, '$');
			col_name_internal (target, r.start.col);
370
			g_string_append_c (target, ':');
371
			if (!ref->b.col_relative)
372 373
				g_string_append_c (target, '$');
			col_name_internal (target, r.end.col);
374 375
		} else {
			if (!ref->a.col_relative)
376
				g_string_append_c (target, '$');
377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393
			col_name_internal (target, r.start.col);
			if (!ref->a.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.start.row);

			if (r.start.col != r.end.col ||
			    ref->a.col_relative != ref->b.col_relative ||
			    r.start.row != r.end.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				if (!ref->b.col_relative)
					g_string_append_c (target, '$');
				col_name_internal (target, r.end.col);
				if (!ref->b.row_relative)
					g_string_append_c (target, '$');
				row_name_internal (target, r.end.row);
			}
394 395 396 397
		}
	}
}

398 399
/**
 * gnm_1_0_rangeref_as_string :
400 401
 * @out : #GnmConventionsOut
 * @ref : #GnmRangeRef
402
 *
403 404 405
 * Simplified variant of rangeref_as_string that old versions of gnumeric can
 * read.  It drops support for full col/row references.  We can remap them on
 * import.
406 407
 *
 * This function also ignores R1C1 settings.
408 409
 **/
void
410
gnm_1_0_rangeref_as_string (GnmConventionsOut *out, GnmRangeRef const *ref)
411 412
{
	GnmRange r;
413
	GString *target = out->accum;
414
	Sheet *start_sheet, *end_sheet;
415

416
	gnm_rangeref_normalize_pp (ref, out->pp, &start_sheet, &end_sheet, &r);
417 418

	if (ref->a.sheet) {
419 420
		if (NULL != out->pp->wb && ref->a.sheet->workbook != out->pp->wb) {
			char *rel_uri = wb_rel_uri (ref->a.sheet->workbook, out->pp->wb);
421
			g_string_append_c (target, '[');
422
			g_string_append (target, rel_uri);
423
			g_string_append_c (target, ']');
424
			g_free (rel_uri);
425
		}
426
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
427 428 429 430 431 432 433 434 435
			/* For the expression leak printer. */
			g_string_append (target, "'?'");
		else if (ref->b.sheet == NULL || ref->a.sheet == ref->b.sheet)
			g_string_append (target, ref->a.sheet->name_quoted);
		else {
			g_string_append (target, ref->a.sheet->name_quoted);
			g_string_append_c (target, ':');
			g_string_append (target, ref->b.sheet->name_quoted);
		}
436
		g_string_append_unichar (target, out->convs->sheet_name_sep);
437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459
	}

	if (!ref->a.col_relative)
		g_string_append_c (target, '$');
	col_name_internal (target, r.start.col);
	if (!ref->a.row_relative)
		g_string_append_c (target, '$');
	row_name_internal (target, r.start.row);

	if (r.start.col != r.end.col ||
	    ref->a.col_relative != ref->b.col_relative ||
	    r.start.row != r.end.row ||
	    ref->a.row_relative != ref->b.row_relative) {
		g_string_append_c (target, ':');
		if (!ref->b.col_relative)
			g_string_append_c (target, '$');
		col_name_internal (target, r.end.col);
		if (!ref->b.row_relative)
			g_string_append_c (target, '$');
		row_name_internal (target, r.end.row);
	}
}

460
static char const *
461
cellref_a1_get (GnmCellRef *out, GnmSheetSize const *ss,
462
		char const *in, GnmCellPos const *pos)
463
{
464 465
	int col;
	int row;
466

467 468
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);
469

470
	in = col_parse (in, ss, &col, &out->col_relative);
471
	if (!in)
472
		return NULL;
473

474
	in = row_parse (in, ss, &row, &out->row_relative);
475
	if (!in)
476
		return NULL;
477 478 479

	/* Setup the cell reference information */
	if (out->row_relative)
Jody Goldberg's avatar
Jody Goldberg committed
480
		out->row = row - pos->row;
481 482 483 484
	else
		out->row = row;

	if (out->col_relative)
Jody Goldberg's avatar
Jody Goldberg committed
485
		out->col = col - pos->col;
486 487 488 489 490
	else
		out->col = col;

	out->sheet = NULL;

491
	return in;
492 493
}

494 495
/* skip first character (which was R or C) */
static char const *
496
r1c1_get_index (char const *str, GnmSheetSize const *ss,
497
		int *num, unsigned char *relative, gboolean is_col)
498
{
499 500 501
	char *end;
	if (str[0] == '\0')
		return NULL;
502

503 504 505
	str++;
	if ((*relative = (*str == '[')))
		str++;
506 507 508 509 510 511
	else if (*str == '-' || *str == '+') { /* handle RC-10 as RC followed by -10 */
		*relative = TRUE;
		*num = 0;
		return str;
	}

512 513 514 515 516 517 518 519 520 521 522 523 524 525
	errno = 0;
	*num = strtol (str, &end, 10);
	if (errno == ERANGE)
		return NULL;
	if (str == end) {
		if (*relative)
			return NULL;
		*relative = TRUE;
		*num = 0;
	} else if (*relative) {
		if (*end != ']')
			return NULL;
		return end + 1;
	} else {
526 527
		int max = is_col ? ss->max_cols : ss->max_rows;
		if (*num <= 0 || *num > max)
528 529
			return NULL;
		(*num)--;
530
	}
531
	return end;
532 533
}

534
static char const *
535
cellref_r1c1_get (GnmCellRef *out, GnmSheetSize const *ss,
536
		  char const *in, GnmCellPos const *pos)
537 538
{
	out->sheet = NULL;
539
	if (*in != 'R' && *in != 'r')
540
		return NULL;
541
	if (NULL == (in = r1c1_get_index (in, ss,
542 543
					  &out->row, &out->row_relative,
					  FALSE)))
544
		return NULL;
545
	if (*in != 'C' && *in != 'c')
546
		return NULL;
547
	if (NULL == (in = r1c1_get_index (in, ss,
548 549
					  &out->col, &out->col_relative,
					  TRUE)))
550 551 552 553
		return NULL;
	if (g_ascii_isalpha (*in))
		return NULL;
	return in;
554 555 556
}

/**
557
 * cellref_parse:
Jody Goldberg's avatar
Jody Goldberg committed
558
 * @out: destination GnmCellRef
559
 * @in: reference description text, no leading
560
 *      whitespace allowed.
Morten Welinder's avatar
Morten Welinder committed
561
 * @pos:
562
 *
563 564
 * Converts the char * representation of a Cell reference into
 * an internal representation.
565
 *
Morten Welinder's avatar
Morten Welinder committed
566
 * Return value: a pointer to the character following the cellref.
567
 **/
568
char const *
569
cellref_parse (GnmCellRef *out, GnmSheetSize const *ss,
570
	       char const *in, GnmCellPos const *pos)
571
{
572
	char const *res;
Morten Welinder's avatar
Morten Welinder committed
573

574 575 576
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);

577
	res = cellref_a1_get (out, ss, in, pos);
578 579
	if (res != NULL)
		return res;
580
	return cellref_r1c1_get (out, ss, in, pos);
581 582 583 584
}

/****************************************************************************/

585 586
static char const *
cell_coord_name2 (int col, int row, gboolean r1c1)
587
{
588
	static GString *buffer = NULL;
589 590 591
	if (buffer)
		g_string_truncate (buffer, 0);
	else
Morten Welinder's avatar
Morten Welinder committed
592
		buffer = g_string_new (NULL);
593

594 595 596 597 598 599 600
	if (r1c1) {
		r1c1_add_index (buffer, 'R', row, FALSE);
		r1c1_add_index (buffer, 'C', col, FALSE);
	} else {
		col_name_internal (buffer, col);
		row_name_internal (buffer, row);
	}
601 602

	return buffer->str;
603
}
604

605 606 607 608 609 610
char const *
cell_coord_name (int col, int row)
{
	return cell_coord_name2 (col, row, FALSE);
}

611
char const *
Jody Goldberg's avatar
Jody Goldberg committed
612
cellpos_as_string (GnmCellPos const *pos)
613 614 615 616 617 618
{
	g_return_val_if_fail (pos != NULL, "ERROR");

	return cell_coord_name (pos->col, pos->row);
}

619 620 621 622 623 624 625
char const *
parsepos_as_string (GnmParsePos const *pp)
{
	g_return_val_if_fail (pp != NULL, "ERROR");

	return cell_coord_name2 (pp->eval.col,
				 pp->eval.row,
626
				 pp->sheet && pp->sheet->convs->r1c1_addresses);
627 628
}

629
char const *
630
cell_name (GnmCell const *cell)
631 632 633
{
	g_return_val_if_fail (cell != NULL, "ERROR");

634 635
	return cell_coord_name2 (cell->pos.col,
				 cell->pos.row,
636
				 cell->base.sheet->convs->r1c1_addresses);
637 638 639
}

/**
640
 * cellpos_parse
641
 * @cell_name:   a string representation of a cell name.
Jody Goldberg's avatar
Jody Goldberg committed
642
 * @pos:         result
643
 * @strict:      if this is TRUE, then parsing stops at possible errors,
Morten Welinder's avatar
Morten Welinder committed
644 645
 *               otherwise an attempt is made to return cell names with
 *               trailing garbage.
646
 *
647
 * Return value: pointer to following char on success, NULL on failure.
Morten Welinder's avatar
Morten Welinder committed
648
 * (In the strict case, that would be a pointer to the \0 or NULL.)
649
 */
650
char const *
651
cellpos_parse (char const *cell_str, GnmSheetSize const *ss,
652
	       GnmCellPos *res, gboolean strict)
653
{
654
	unsigned char dummy_relative;
655

656
	cell_str = col_parse (cell_str, ss, &res->col, &dummy_relative);
657
	if (!cell_str)
658
		return NULL;
659

660
	cell_str = row_parse (cell_str, ss, &res->row, &dummy_relative);
661
	if (!cell_str)
662
		return NULL;
663

Jody Goldberg's avatar
fix.  
Jody Goldberg committed
664
	if (*cell_str != 0 && strict)
665
		return NULL;
666

667
	return cell_str;
668 669
}

670 671 672 673 674 675 676 677 678 679 680 681 682
/**
 * gnm_expr_char_start_p :
 *
 * Can the supplied string be an expression ?  It does not guarantee that it is,
 * however, it is possible.  If it is possible it strips off any header
 * characters that are not relevant.
 *
 * NOTE : things like -1,234 will match
 */
char const *
gnm_expr_char_start_p (char const * c)
{
	char c0;
683
	int N = 1;
684 685 686 687 688 689

	if (NULL == c)
		return NULL;

	c0 = *c;

690 691 692 693
	if (c0 == '=' || c0 == '@' || c0 == '+' || c0 == '-')
		while (c[N] == ' ')
			N++;

694
	if (c0 == '=' || c0 == '@' || (c0 == '+' && c[1] == 0))
695
		return c + N;
696 697 698 699 700 701 702 703 704 705 706 707 708 709

	if ((c0 == '-' || c0 == '+') && c0 != c[1]) {
		char *end;

		/*
		 * Ok, we have a string that
		 * 1. starts with a sign
		 * 2. does not start with the sign repeated (think --------)
		 * 3. is more than one character
		 *
		 * Now we check whether we have a number.  We don't want
		 * numbers to be treated as formulae.  FIXME: this really
		 * just checks for C-syntax numbers.
		 */
710
		(void) gnm_strto (c, &end);
711
		if (errno || *end != 0 || end == c)
712
			return (c0 == '+') ? c + N : c;
713 714 715 716 717
		/* Otherwise, it's a number.  */
	}
	return NULL;
}

718 719 720 721 722 723
/**
 * parse_text_value_or_expr : Utility routine to parse a string and convert it
 *     into an expression or value.
 *
 * @pos : If the string looks like an expression parse it at this location.
 * @text: The text to be parsed.
724 725
 * @val : Returns a GnmValue* if the text was a value, otherwise NULL.
 * @texpr: Returns a GnmExprTop* if the text was an expression, otherwise NULL.
726 727
 * @cur_fmt : Optional, current number format.
 * @date_conv : Optional, date parse conventions
728
 *
Jody Goldberg's avatar
Jody Goldberg committed
729
 * If there is a parse failure for an expression an error GnmValue with the syntax
730 731
 * error is returned.
 */
732
void
Morten Welinder's avatar
Morten Welinder committed
733
parse_text_value_or_expr (GnmParsePos const *pos, char const *text,
734
			  GnmValue **val, GnmExprTop const **texpr,
Morten Welinder's avatar
Morten Welinder committed
735
			  GOFormat const *cur_fmt,
736
			  GODateConventions const *date_conv)
737
{
738
	char const *expr_start;
739

740
	*texpr = NULL;
741

742
	/* Does it match any formats?  */
743
	*val = format_match (text, cur_fmt, date_conv);
744 745
	if (*val != NULL)
		return;
746

747
	/* If it does not match known formats, see if it is an expression */
748
	expr_start = gnm_expr_char_start_p (text);
749
	if (NULL != expr_start && *expr_start) {
750
		*texpr = gnm_expr_parse_str (expr_start, pos,
751
			GNM_EXPR_PARSE_DEFAULT, NULL, NULL);
752
		if (*texpr != NULL)
753
			return;
754 755 756 757
	}

	/* Fall back on string */
	*val = value_new_string (text);
758
}
759

Morten Welinder's avatar
Morten Welinder committed
760 761
GnmParseError *
parse_error_init (GnmParseError *pe)
762
{
763 764 765
	pe->err		= NULL;
	pe->begin_char	= 0;
	pe->end_char	= 0;
Jody Goldberg's avatar
Jody Goldberg committed
766

767 768 769 770
	return pe;
}

void
Morten Welinder's avatar
Morten Welinder committed
771
parse_error_free (GnmParseError *pe)
772
{
773 774 775
	if (pe->err != NULL) {
		g_error_free (pe->err);
		pe->err = NULL;
776 777
	}
}
778

779
/***************************************************************************/
780

781
static char const *
782
check_quoted (char const *start, int *num_escapes)
783
{
784
	char const *str = start;
785
	if (*str == '\'' || *str == '\"') {
786
		char const quote = *str++;
787 788 789
		*num_escapes = 0;
		for (; *str && *str != quote; str = g_utf8_next_char (str))
			if (*str == '\\' && str[1]) {
790
				str++;
791 792
				(*num_escapes)++;
			}
793 794
		if (*str)
			return str+1;
795 796
	} else
		*num_escapes = -1;
797
	return start;
798
}
799

800 801
static void
unquote (char *dst, char const *src, int n)
802
{
803
	while (n-- > 0)
804
		if (*src == '\\' && src[1]) {
805 806 807 808 809 810 811 812
			int n = g_utf8_skip [*(guchar *)(++src)];
			strncpy (dst, src, n);
			dst += n;
			src += n;
		} else
			*dst++ = *src++;
	*dst = 0;
}
813

814 815
/**
 * wbref_parse :
816
 * @convs : #GnmConventions const
817 818 819 820 821 822 823
 * @start :
 * @wb :
 *
 * Returns : NULL if there is a valid workbook name but it is unknown.
 *           If the string is a valid workbook known name it returns a pointer
 *           the end of the name.
 *           Otherwise returns @start and does not modify @wb.
824
 **/
825
static char const *
826 827
wbref_parse (GnmConventions const *convs,
	     char const *start, Workbook **wb, Workbook *ref_wb)
828 829 830
{
	/* Is this an external reference ? */
	if (*start == '[') {
831 832
		Workbook *tmp_wb;

833
		int num_escapes;
834
		char const *end = check_quoted (start+1, &num_escapes);
835 836
		char *name;

837
		if (end == start+1) {
838 839 840
			end = strchr (start, ']');
			if (end == NULL)
				return start;
841
		}
842
		if (*end != ']')
843
			return start;
844 845 846 847 848 849 850 851

		/* might be too big if quoted (remember leading [' */
		name = g_alloca (1 + end - start - 2);
		if (num_escapes < 0) {
			strncpy (name, start+1, end-start-1);
			name [end-start-1] = '\0';
		} else
			unquote (name, start+2, end-start-2);
852

853
		tmp_wb = (*convs->input.external_wb) (convs, ref_wb, name);
854 855
		if (tmp_wb == NULL)
			return NULL;
856

857
		*wb = tmp_wb;
858 859 860 861 862 863
		return end + 1;
	}

	return start;
}

864 865
/**
 * sheetref_parse :
866
 * @convs :
867 868 869 870 871 872
 * @start :
 * @sheet :
 * @wb    :
 * @allow_3d :
 *
 * Returns : NULL if there is a valid sheet name but it is unknown.
873
 *           If the string is a valid sheet name it returns a pointer
874 875 876
 *           the end of the name.
 *           Otherwise returns @start and does not modify @sheet.
 **/
877 878 879
static char const *
sheetref_parse (GnmConventions const *convs,
		char const *start, Sheet **sheet, Workbook const *wb,
880
		gboolean allow_3d)
881
{
882
	GString *sheet_name;
Jody Goldberg's avatar
Jody Goldberg committed
883
	char const *end;
884 885

	*sheet = NULL;
886 887
	if (*start == '\'' || *start == '"') {
		sheet_name = g_string_new (NULL);
Jody Goldberg's avatar
Jody Goldberg committed
888
		end = go_strunescape (sheet_name, start);
889 890 891 892 893
		if (end == NULL) {
			g_string_free (sheet_name, TRUE);
			return start;
		}
	} else {
894 895 896 897 898 899 900 901 902 903 904 905
		gboolean only_digits = TRUE;
		end = start;

		/*
		 * Valid: Normal!a1
		 * Valid: x.y!a1
		 * Invalid: .y!a1
		 *
		 * Some names starting with digits are actually valid, but
		 * unparse quoted. Things are quite tricky: most sheet names
		 * starting with a digit are ok, but not those starting with
		 * "[0-9]*\." or "[0-9]+[eE]".
906
		 *
907 908 909 910 911 912 913 914
		 * Valid: 42!a1
		 * Valid: 4x!a1
		 * Invalid: 1.!a1
		 * Invalid: 1e!a1
		 */

		while (1) {
			gunichar uc = g_utf8_get_char (end);
915
			if (g_unichar_isalpha (uc) || uc == '_') {
916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931
				if (only_digits && end != start &&
				    (uc == 'e' || uc == 'E')) {
					end = start;
					break;
				}
				only_digits = FALSE;
				end = g_utf8_next_char (end);
			} else if (g_unichar_isdigit (uc)) {
				end = g_utf8_next_char (end);
			} else if (uc == '.') {
				/* Valid, except after only digits.  */
				if (only_digits) {
					end = start;
					break;
				}
				end++;
932
			} else
933 934
				break;
		}
935

936 937
		if (*end != '!' && (!allow_3d || *end != ':'))
			return start;
938

939 940
		sheet_name = g_string_new_len (start, end - start);
	}
941

942 943 944
	*sheet = workbook_sheet_by_name (wb, sheet_name->str);
	if (*sheet == NULL)
		end = start;
945

946 947
	g_string_free (sheet_name, TRUE);
	return end;
948 949
}

950 951 952 953
static char const *
r1c1_rangeref_parse (GnmRangeRef *res, char const *ptr, GnmParsePos const *pp)
{
	char const *tmp;
954 955 956 957 958 959
	GnmSheetSize const *a_ss, *b_ss;
	Sheet const *a_sheet, *b_sheet;

	a_sheet = eval_sheet (res->a.sheet, pp->sheet);
	b_sheet = eval_sheet (res->b.sheet, a_sheet);

960 961
	a_ss = gnm_sheet_get_size2 (a_sheet, pp->wb);
	b_ss = gnm_sheet_get_size2 (b_sheet, pp->wb);
962 963

	if (*ptr == 'R' || *ptr == 'r') {
964
		ptr = r1c1_get_index (ptr, a_ss,
965 966 967
				      &res->a.row, &res->a.row_relative,
				      FALSE);
		if (!ptr)
968
			return NULL;
969 970 971 972 973 974 975
		if (*ptr != 'C' && *ptr != 'c') {
			if (g_ascii_isalpha (*ptr))
				return NULL;
			/* full row R# */
			res->a.col_relative = FALSE;
			res->a.col = 0;
			res->b = res->a;
976
			res->b.col = a_ss->max_cols - 1;
977 978
			if (ptr[0] != ':' || (ptr[1] != 'R' && ptr[1] != 'r'))
				return ptr;
979
			tmp = r1c1_get_index (ptr+1, a_ss,
980 981 982
					      &res->b.row, &res->b.row_relative,
					      FALSE);
			if (!tmp)
983 984
				return ptr; /* fallback to just the initial R */
			return tmp;
985
		} else {
986
			ptr = r1c1_get_index (ptr, a_ss,