parse-util.c 37 KB
Newer Older
1
/* vim: set sw=8: -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 3 4 5
/*
 * parse-util.c: Various utility routines to parse or produce
 *     string representations of common reference types.
 *
6
 * Copyright (C) 2000-2007 Jody Goldberg (jody@gnome.org)
Morten Welinder's avatar
Morten Welinder committed
7
 * Copyright (C) 2008-2009 Morten Welinder (terra@gnome.org)
8 9 10 11 12 13 14 15 16 17 18 19 20
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
J.H.M. Dassen (Ray)'s avatar
J.H.M. Dassen (Ray) committed
21
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
22 23
 * USA
 */
24 25
#include <gnumeric-config.h>
#include "gnumeric.h"
26
#include "parse-util.h"
27

28
#include "application.h"
29
#include "workbook.h"
30 31 32
#include "sheet.h"
#include "value.h"
#include "ranges.h"
Jody Goldberg's avatar
Jody Goldberg committed
33 34
#include "cell.h"
#include "expr.h"
35
#include "number-match.h"
36
#include "gnm-format.h"
37
#include "expr-name.h"
Jody Goldberg's avatar
Jody Goldberg committed
38
/* For std_expr_name_handler: */
39
#include "expr-impl.h"
40
#include "gutils.h"
41
#include <goffice/goffice.h>
42

43
#include <errno.h>
44
#include <stdlib.h>
45
#include <glib.h>
Jody Goldberg's avatar
Jody Goldberg committed
46
#include <string.h>
47

48 49 50
static void
col_name_internal (GString *target, int col)
{
Jody Goldberg's avatar
Jody Goldberg committed
51
	static int const steps[] = {
52 53 54 55 56 57 58 59 60 61 62
		26,
		26 * 26,
		26 * 26 * 26,
		26 * 26 * 26 * 26,
		26 * 26 * 26 * 26 * 26,
		26 * 26 * 26 * 26 * 26 * 26,
		INT_MAX
	};
	int i;
	char *dst;

63 64 65 66 67 68
	if (col < 0) {
		/* Invalid column.  */
		g_string_append_printf (target, "[C%d]", col);
		return;
	}

69 70 71 72 73 74 75 76 77
	for (i = 0; col >= steps[i]; i++)
		col -= steps[i];

	g_string_set_size (target, target->len + (i + 1));
	dst = target->str + target->len;
	while (i-- >= 0) {
		*--dst = 'A' + col % 26;
		col /= 26;
	}
78 79
}

80 81 82
char const *
col_name (int col)
{
83 84
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
85
		buffer = g_string_new (NULL);
86 87 88 89 90
	g_string_truncate (buffer, 0);

	col_name_internal (buffer, col);

	return buffer->str;
91 92 93 94 95
}

char const *
cols_name (int start_col, int end_col)
{
96 97
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
98
		buffer = g_string_new (NULL);
99
	g_string_truncate (buffer, 0);
100

101
	col_name_internal (buffer, start_col);
102
	if (start_col != end_col) {
103 104
		g_string_append_c (buffer, ':');
		col_name_internal (buffer, end_col);
105
	}
106 107

	return buffer->str;
108 109 110
}

char const *
111
col_parse (char const *str, GnmSheetSize const *ss,
112
	   int *res, unsigned char *relative)
113
{
114
	char const *ptr, *start = str;
115
	int col = -1;
116
	int max = ss->max_cols;
117

118 119
	if (!(*relative = (*start != '$')))
		start++;
120

121
	for (ptr = start; col < max ; ptr++)
122 123 124 125
		if (('a' <= *ptr && *ptr <= 'z'))
			col = 26 * (col + 1) + (*ptr - 'a');
		else if (('A' <= *ptr && *ptr <= 'Z'))
			col = 26 * (col + 1) + (*ptr - 'A');
126
		else if (ptr != start) {
127 128 129
			*res = col;
			return ptr;
		} else
130 131
			return NULL;
	return NULL;
132 133 134 135
}

/***************************************************************************/

136 137
static void
row_name_internal (GString *target, int row)
138
{
139
	g_string_append_printf (target, "%d", row + 1);
140 141
}

142 143 144
char const *
row_name (int row)
{
145 146
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
147
		buffer = g_string_new (NULL);
148 149 150 151 152
	g_string_truncate (buffer, 0);

	row_name_internal (buffer, row);

	return buffer->str;
153 154 155 156 157
}

char const *
rows_name (int start_row, int end_row)
{
158 159
	static GString *buffer = NULL;
	if (!buffer)
Morten Welinder's avatar
Morten Welinder committed
160
		buffer = g_string_new (NULL);
161
	g_string_truncate (buffer, 0);
162

163
	row_name_internal (buffer, start_row);
164
	if (start_row != end_row) {
165 166
		g_string_append_c (buffer, ':');
		row_name_internal (buffer, end_row);
167
	}
168 169

	return buffer->str;
170 171
}

172
char const *
173
row_parse (char const *str, GnmSheetSize const *ss,
174
	   int *res, unsigned char *relative)
175 176
{
	char const *end, *ptr = str;
177
	long int row;
178
	int max = ss->max_rows;
179 180 181 182

	if (!(*relative = (*ptr != '$')))
		ptr++;

183 184 185 186
	/* Initial '0' is not allowed.  */
	if (*ptr <= '0' || *ptr > '9')
		return NULL;

187 188 189 190 191
	/*
	 * Do not allow letters after the row number.  If we did, then
	 * the name "K3P" would lex as the reference K3 followed by the
	 * name "P".
	 */
192
	row = strtol (ptr, (char **)&end, 10);
193 194
	if (ptr != end &&
	    !g_unichar_isalnum (g_utf8_get_char (end)) && *end != '_' &&
195
	    0 < row && row <= max) {
196 197 198
		*res = row - 1;
		return end;
	} else
199
		return NULL;
200 201
}

202
/***************************************************************************/
203

204 205 206 207 208 209 210 211 212 213 214 215
static void
r1c1_add_index (GString *target, char type, int num, unsigned char relative)
{
	if (relative) {
		if (num != 0)
			g_string_append_printf (target, "%c[%d]", type, num);
		else
			g_string_append_c (target, type);
	} else
		g_string_append_printf (target, "%c%d", type, num + 1);
}

216 217 218
static char *
wb_rel_uri (Workbook *wb, Workbook *ref_wb)
{
219 220
	char const *uri = go_doc_get_uri ((GODoc *)wb);
	char const *ref_uri = go_doc_get_uri ((GODoc *)ref_wb);
221 222 223 224 225 226 227 228 229 230
	char *rel_uri = go_url_make_relative (uri, ref_uri);

	if (rel_uri == NULL || rel_uri[0] == '/') {
		g_free (rel_uri);
		return g_strdup (uri);
	}

	return rel_uri;
}

231 232
/**
 * cellref_as_string :
233
 * @out : #GnmConventionsOut
234 235 236 237 238 239 240
 * @ref :
 * @no_sheetname :
 *
 * Returns a string that the caller needs to free containing the A1 format
 * representation of @ref as evaluated at @pp.  @no_sheetname can be used to
 * suppress the addition of the sheetname for non-local references.
 **/
241
void
242
cellref_as_string (GnmConventionsOut *out,
Jody Goldberg's avatar
Jody Goldberg committed
243
		   GnmCellRef const *cell_ref,
244
		   gboolean no_sheetname)
245
{
246
	GString *target = out->accum;
247
	Sheet const *sheet = cell_ref->sheet;
248

249 250
	/* If it is a non-local reference, add the path to the external sheet */
	if (sheet != NULL && !no_sheetname) {
251
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
252 253
			/* For the expression leak printer.  */
			g_string_append (target, "'?'");
254
		else if (NULL == out->pp->wb || sheet->workbook == out->pp->wb)
255 256
			g_string_append (target, sheet->name_quoted);
		else {
257
			char *rel_uri = wb_rel_uri (sheet->workbook, out->pp->wb);
258
			g_string_append_c (target, '[');
259
			g_string_append (target, rel_uri);
260 261
			g_string_append_c (target, ']');
			g_string_append (target, sheet->name_quoted);
262
			g_free (rel_uri);
263
		}
264
		g_string_append_unichar (target, out->convs->sheet_name_sep);
265 266
	}

267
	if (out->convs->r1c1_addresses) { /* R1C1 handler */
268 269 270
		r1c1_add_index (target, 'R', cell_ref->row, cell_ref->row_relative);
		r1c1_add_index (target, 'C', cell_ref->col, cell_ref->col_relative);
	} else {
271
		GnmCellPos pos;
272
		Sheet const *size_sheet = eval_sheet (sheet, out->pp->sheet);
273 274
		GnmSheetSize const *ss =
			gnm_sheet_get_size2 (size_sheet, out->pp->wb);
275

276
		gnm_cellpos_init_cellref_ss (&pos, cell_ref, &out->pp->eval, ss);
277

278
		if (!cell_ref->col_relative)
279
			g_string_append_c (target, '$');
280
		col_name_internal (target, pos.col);
281

282 283 284
		if (!cell_ref->row_relative)
			g_string_append_c (target, '$');
		row_name_internal (target, pos.row);
285
	}
286 287
}

288
/**
289
 * rangeref_as_string :
290 291
 * @out : #GnmConventionsOut
 * @ref : #GnmRangeRef
292 293
 *
 **/
294
void
295
rangeref_as_string (GnmConventionsOut *out, GnmRangeRef const *ref)
296
{
Jody Goldberg's avatar
Jody Goldberg committed
297
	GnmRange r;
298
	GString *target = out->accum;
299
	Sheet *start_sheet, *end_sheet;
300 301 302
	GnmSheetSize const *end_ss;

	gnm_rangeref_normalize_pp (ref, out->pp, &start_sheet, &end_sheet, &r);
303

304
	end_ss = gnm_sheet_get_size2 (end_sheet, out->pp->wb);
305

306
	if (ref->a.sheet) {
307 308
		if (NULL != out->pp->wb && ref->a.sheet->workbook != out->pp->wb) {
			char *rel_uri = wb_rel_uri (ref->a.sheet->workbook, out->pp->wb);
309
			g_string_append_c (target, '[');
310
			g_string_append (target, rel_uri);
311
			g_string_append_c (target, ']');
312
			g_free (rel_uri);
313
		}
314 315
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
			/* For the expression leak printer.  */
316 317 318 319 320 321 322 323
			g_string_append (target, "'?'");
		else if (ref->b.sheet == NULL || ref->a.sheet == ref->b.sheet)
			g_string_append (target, ref->a.sheet->name_quoted);
		else {
			g_string_append (target, ref->a.sheet->name_quoted);
			g_string_append_c (target, ':');
			g_string_append (target, ref->b.sheet->name_quoted);
		}
324
		g_string_append_unichar (target, out->convs->sheet_name_sep);
325 326
	}

327
	if (out->convs->r1c1_addresses) { /* R1C1 handler */
328
		/* be sure to use else if so that a1:iv65535 does not vanish */
329
		if (r.start.col == 0 && r.end.col == end_ss->max_cols - 1) {
330 331 332 333 334 335
			r1c1_add_index (target, 'R', ref->a.row, ref->a.row_relative);
			if (ref->a.row != ref->b.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'R', ref->b.row, ref->b.row_relative);
			}
336
		} else if (r.start.row == 0 && r.end.row == end_ss->max_rows - 1) {
337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354
			r1c1_add_index (target, 'C', ref->a.col, ref->a.col_relative);
			if (ref->a.col != ref->b.col ||
			    ref->a.col_relative != ref->b.col_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'C', ref->b.col, ref->b.col_relative);
			}
		} else {
			r1c1_add_index (target, 'R', ref->a.row, ref->a.row_relative);
			r1c1_add_index (target, 'C', ref->a.col, ref->a.col_relative);
			if (r.start.col != r.end.col ||
			    ref->a.col_relative != ref->b.col_relative ||
			    r.start.row != r.end.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				r1c1_add_index (target, 'R', ref->b.row, ref->b.row_relative);
				r1c1_add_index (target, 'C', ref->b.col, ref->b.col_relative);
			}
		}
355
	} else {
356
		/* be sure to use else if so that a1:iv65535 does not vanish */
357
		if (r.start.col == 0 && r.end.col == end_ss->max_cols - 1) {
358 359 360 361 362 363 364
			if (!ref->a.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.start.row);
			g_string_append_c (target, ':');
			if (!ref->b.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.end.row);
365
		} else if (r.start.row == 0 && r.end.row == end_ss->max_rows - 1) {
366 367 368
			if (!ref->a.col_relative)
				g_string_append_c (target, '$');
			col_name_internal (target, r.start.col);
369
			g_string_append_c (target, ':');
370
			if (!ref->b.col_relative)
371 372
				g_string_append_c (target, '$');
			col_name_internal (target, r.end.col);
373 374
		} else {
			if (!ref->a.col_relative)
375
				g_string_append_c (target, '$');
376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392
			col_name_internal (target, r.start.col);
			if (!ref->a.row_relative)
				g_string_append_c (target, '$');
			row_name_internal (target, r.start.row);

			if (r.start.col != r.end.col ||
			    ref->a.col_relative != ref->b.col_relative ||
			    r.start.row != r.end.row ||
			    ref->a.row_relative != ref->b.row_relative) {
				g_string_append_c (target, ':');
				if (!ref->b.col_relative)
					g_string_append_c (target, '$');
				col_name_internal (target, r.end.col);
				if (!ref->b.row_relative)
					g_string_append_c (target, '$');
				row_name_internal (target, r.end.row);
			}
393 394 395 396
		}
	}
}

397 398
/**
 * gnm_1_0_rangeref_as_string :
399 400
 * @out : #GnmConventionsOut
 * @ref : #GnmRangeRef
401
 *
402 403 404
 * Simplified variant of rangeref_as_string that old versions of gnumeric can
 * read.  It drops support for full col/row references.  We can remap them on
 * import.
405 406
 *
 * This function also ignores R1C1 settings.
407 408
 **/
void
409
gnm_1_0_rangeref_as_string (GnmConventionsOut *out, GnmRangeRef const *ref)
410 411
{
	GnmRange r;
412
	GString *target = out->accum;
413
	Sheet *start_sheet, *end_sheet;
414

415
	gnm_rangeref_normalize_pp (ref, out->pp, &start_sheet, &end_sheet, &r);
416 417

	if (ref->a.sheet) {
418 419
		if (NULL != out->pp->wb && ref->a.sheet->workbook != out->pp->wb) {
			char *rel_uri = wb_rel_uri (ref->a.sheet->workbook, out->pp->wb);
420
			g_string_append_c (target, '[');
421
			g_string_append (target, rel_uri);
422
			g_string_append_c (target, ']');
423
			g_free (rel_uri);
424
		}
425
		if (out->pp->wb == NULL && out->pp->sheet == NULL)
426 427 428 429 430 431 432 433 434
			/* For the expression leak printer. */
			g_string_append (target, "'?'");
		else if (ref->b.sheet == NULL || ref->a.sheet == ref->b.sheet)
			g_string_append (target, ref->a.sheet->name_quoted);
		else {
			g_string_append (target, ref->a.sheet->name_quoted);
			g_string_append_c (target, ':');
			g_string_append (target, ref->b.sheet->name_quoted);
		}
435
		g_string_append_unichar (target, out->convs->sheet_name_sep);
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458
	}

	if (!ref->a.col_relative)
		g_string_append_c (target, '$');
	col_name_internal (target, r.start.col);
	if (!ref->a.row_relative)
		g_string_append_c (target, '$');
	row_name_internal (target, r.start.row);

	if (r.start.col != r.end.col ||
	    ref->a.col_relative != ref->b.col_relative ||
	    r.start.row != r.end.row ||
	    ref->a.row_relative != ref->b.row_relative) {
		g_string_append_c (target, ':');
		if (!ref->b.col_relative)
			g_string_append_c (target, '$');
		col_name_internal (target, r.end.col);
		if (!ref->b.row_relative)
			g_string_append_c (target, '$');
		row_name_internal (target, r.end.row);
	}
}

459
static char const *
460
cellref_a1_get (GnmCellRef *out, GnmSheetSize const *ss,
461
		char const *in, GnmCellPos const *pos)
462
{
463 464
	int col;
	int row;
465

466 467
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);
468

469
	in = col_parse (in, ss, &col, &out->col_relative);
470
	if (!in)
471
		return NULL;
472

473
	in = row_parse (in, ss, &row, &out->row_relative);
474
	if (!in)
475
		return NULL;
476 477 478

	/* Setup the cell reference information */
	if (out->row_relative)
Jody Goldberg's avatar
Jody Goldberg committed
479
		out->row = row - pos->row;
480 481 482 483
	else
		out->row = row;

	if (out->col_relative)
Jody Goldberg's avatar
Jody Goldberg committed
484
		out->col = col - pos->col;
485 486 487 488 489
	else
		out->col = col;

	out->sheet = NULL;

490
	return in;
491 492
}

493 494
/* skip first character (which was R or C) */
static char const *
495
r1c1_get_index (char const *str, GnmSheetSize const *ss,
496
		int *num, unsigned char *relative, gboolean is_col)
497
{
498 499 500
	char *end;
	if (str[0] == '\0')
		return NULL;
501

502 503 504
	str++;
	if ((*relative = (*str == '[')))
		str++;
505 506 507 508 509 510
	else if (*str == '-' || *str == '+') { /* handle RC-10 as RC followed by -10 */
		*relative = TRUE;
		*num = 0;
		return str;
	}

511 512 513 514 515 516 517 518 519 520 521 522 523 524
	errno = 0;
	*num = strtol (str, &end, 10);
	if (errno == ERANGE)
		return NULL;
	if (str == end) {
		if (*relative)
			return NULL;
		*relative = TRUE;
		*num = 0;
	} else if (*relative) {
		if (*end != ']')
			return NULL;
		return end + 1;
	} else {
525 526
		int max = is_col ? ss->max_cols : ss->max_rows;
		if (*num <= 0 || *num > max)
527 528
			return NULL;
		(*num)--;
529
	}
530
	return end;
531 532
}

533
static char const *
534
cellref_r1c1_get (GnmCellRef *out, GnmSheetSize const *ss,
535
		  char const *in, GnmCellPos const *pos)
536 537
{
	out->sheet = NULL;
538
	if (*in != 'R' && *in != 'r')
539
		return NULL;
540
	if (NULL == (in = r1c1_get_index (in, ss,
541 542
					  &out->row, &out->row_relative,
					  FALSE)))
543
		return NULL;
544
	if (*in != 'C' && *in != 'c')
545
		return NULL;
546
	if (NULL == (in = r1c1_get_index (in, ss,
547 548
					  &out->col, &out->col_relative,
					  TRUE)))
549 550 551 552
		return NULL;
	if (g_ascii_isalpha (*in))
		return NULL;
	return in;
553 554 555
}

/**
556
 * cellref_parse:
Jody Goldberg's avatar
Jody Goldberg committed
557
 * @out: destination GnmCellRef
558
 * @in: reference description text, no leading
559
 *      whitespace allowed.
Morten Welinder's avatar
Morten Welinder committed
560
 * @pos:
561
 *
562 563
 * Converts the char * representation of a Cell reference into
 * an internal representation.
564
 *
Morten Welinder's avatar
Morten Welinder committed
565
 * Return value: a pointer to the character following the cellref.
566
 **/
567
char const *
568
cellref_parse (GnmCellRef *out, GnmSheetSize const *ss,
569
	       char const *in, GnmCellPos const *pos)
570
{
571
	char const *res;
Morten Welinder's avatar
Morten Welinder committed
572

573 574 575
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);

576
	res = cellref_a1_get (out, ss, in, pos);
577 578
	if (res != NULL)
		return res;
579
	return cellref_r1c1_get (out, ss, in, pos);
580 581 582 583
}

/****************************************************************************/

584 585
static char const *
cell_coord_name2 (int col, int row, gboolean r1c1)
586
{
587
	static GString *buffer = NULL;
588 589 590
	if (buffer)
		g_string_truncate (buffer, 0);
	else
Morten Welinder's avatar
Morten Welinder committed
591
		buffer = g_string_new (NULL);
592

593 594 595 596 597 598 599
	if (r1c1) {
		r1c1_add_index (buffer, 'R', row, FALSE);
		r1c1_add_index (buffer, 'C', col, FALSE);
	} else {
		col_name_internal (buffer, col);
		row_name_internal (buffer, row);
	}
600 601

	return buffer->str;
602
}
603

604 605 606 607 608 609
char const *
cell_coord_name (int col, int row)
{
	return cell_coord_name2 (col, row, FALSE);
}

610
char const *
Jody Goldberg's avatar
Jody Goldberg committed
611
cellpos_as_string (GnmCellPos const *pos)
612 613 614 615 616 617
{
	g_return_val_if_fail (pos != NULL, "ERROR");

	return cell_coord_name (pos->col, pos->row);
}

618 619 620 621 622 623 624
char const *
parsepos_as_string (GnmParsePos const *pp)
{
	g_return_val_if_fail (pp != NULL, "ERROR");

	return cell_coord_name2 (pp->eval.col,
				 pp->eval.row,
625
				 pp->sheet && pp->sheet->convs->r1c1_addresses);
626 627
}

628
char const *
629
cell_name (GnmCell const *cell)
630 631 632
{
	g_return_val_if_fail (cell != NULL, "ERROR");

633 634
	return cell_coord_name2 (cell->pos.col,
				 cell->pos.row,
635
				 cell->base.sheet->convs->r1c1_addresses);
636 637 638
}

/**
639
 * cellpos_parse
640
 * @cell_name:   a string representation of a cell name.
Jody Goldberg's avatar
Jody Goldberg committed
641
 * @pos:         result
642
 * @strict:      if this is TRUE, then parsing stops at possible errors,
Morten Welinder's avatar
Morten Welinder committed
643 644
 *               otherwise an attempt is made to return cell names with
 *               trailing garbage.
645
 *
646
 * Return value: pointer to following char on success, NULL on failure.
Morten Welinder's avatar
Morten Welinder committed
647
 * (In the strict case, that would be a pointer to the \0 or NULL.)
648
 */
649
char const *
650
cellpos_parse (char const *cell_str, GnmSheetSize const *ss,
651
	       GnmCellPos *res, gboolean strict)
652
{
653
	unsigned char dummy_relative;
654

655
	cell_str = col_parse (cell_str, ss, &res->col, &dummy_relative);
656
	if (!cell_str)
657
		return NULL;
658

659
	cell_str = row_parse (cell_str, ss, &res->row, &dummy_relative);
660
	if (!cell_str)
661
		return NULL;
662

Jody Goldberg's avatar
fix.  
Jody Goldberg committed
663
	if (*cell_str != 0 && strict)
664
		return NULL;
665

666
	return cell_str;
667 668
}

669 670 671 672 673 674 675 676 677 678 679 680 681
/**
 * gnm_expr_char_start_p :
 *
 * Can the supplied string be an expression ?  It does not guarantee that it is,
 * however, it is possible.  If it is possible it strips off any header
 * characters that are not relevant.
 *
 * NOTE : things like -1,234 will match
 */
char const *
gnm_expr_char_start_p (char const * c)
{
	char c0;
682
	int N = 1;
683 684 685 686 687 688

	if (NULL == c)
		return NULL;

	c0 = *c;

689 690 691 692
	if (c0 == '=' || c0 == '@' || c0 == '+' || c0 == '-')
		while (c[N] == ' ')
			N++;

693
	if (c0 == '=' || c0 == '@' || (c0 == '+' && c[1] == 0))
694
		return c + N;
695 696 697 698 699 700 701 702 703 704 705 706 707 708

	if ((c0 == '-' || c0 == '+') && c0 != c[1]) {
		char *end;

		/*
		 * Ok, we have a string that
		 * 1. starts with a sign
		 * 2. does not start with the sign repeated (think --------)
		 * 3. is more than one character
		 *
		 * Now we check whether we have a number.  We don't want
		 * numbers to be treated as formulae.  FIXME: this really
		 * just checks for C-syntax numbers.
		 */
709
		(void) gnm_strto (c, &end);
710
		if (errno || *end != 0 || end == c)
711
			return (c0 == '+') ? c + N : c;
712 713 714 715 716
		/* Otherwise, it's a number.  */
	}
	return NULL;
}

717 718 719 720 721 722
/**
 * parse_text_value_or_expr : Utility routine to parse a string and convert it
 *     into an expression or value.
 *
 * @pos : If the string looks like an expression parse it at this location.
 * @text: The text to be parsed.
723 724
 * @val : Returns a GnmValue* if the text was a value, otherwise NULL.
 * @texpr: Returns a GnmExprTop* if the text was an expression, otherwise NULL.
725 726
 * @cur_fmt : Optional, current number format.
 * @date_conv : Optional, date parse conventions
727
 *
Jody Goldberg's avatar
Jody Goldberg committed
728
 * If there is a parse failure for an expression an error GnmValue with the syntax
729 730
 * error is returned.
 */
731
void
Morten Welinder's avatar
Morten Welinder committed
732
parse_text_value_or_expr (GnmParsePos const *pos, char const *text,
733
			  GnmValue **val, GnmExprTop const **texpr,
Morten Welinder's avatar
Morten Welinder committed
734
			  GOFormat const *cur_fmt,
735
			  GODateConventions const *date_conv)
736
{
737
	char const *expr_start;
738

739
	*texpr = NULL;
740

741
	/* Does it match any formats?  */
742
	*val = format_match (text, cur_fmt, date_conv);
743 744
	if (*val != NULL)
		return;
745

746
	/* If it does not match known formats, see if it is an expression */
747
	expr_start = gnm_expr_char_start_p (text);
748
	if (NULL != expr_start && *expr_start) {
749
		*texpr = gnm_expr_parse_str (expr_start, pos,
750
			GNM_EXPR_PARSE_DEFAULT, NULL, NULL);
751
		if (*texpr != NULL)
752
			return;
753 754 755 756
	}

	/* Fall back on string */
	*val = value_new_string (text);
757
}
758

Morten Welinder's avatar
Morten Welinder committed
759 760
GnmParseError *
parse_error_init (GnmParseError *pe)
761
{
762 763 764
	pe->err		= NULL;
	pe->begin_char	= 0;
	pe->end_char	= 0;
Jody Goldberg's avatar
Jody Goldberg committed
765

766 767 768 769
	return pe;
}

void
Morten Welinder's avatar
Morten Welinder committed
770
parse_error_free (GnmParseError *pe)
771
{
772 773 774
	if (pe->err != NULL) {
		g_error_free (pe->err);
		pe->err = NULL;
775 776
	}
}
777

778
/***************************************************************************/
779

780
static char const *
781
check_quoted (char const *start, int *num_escapes)
782
{
783
	char const *str = start;
784
	if (*str == '\'' || *str == '\"') {
785
		char const quote = *str++;
786 787 788
		*num_escapes = 0;
		for (; *str && *str != quote; str = g_utf8_next_char (str))
			if (*str == '\\' && str[1]) {
789
				str++;
790 791
				(*num_escapes)++;
			}
792 793
		if (*str)
			return str+1;
794 795
	} else
		*num_escapes = -1;
796
	return start;
797
}
798

799 800
static void
unquote (char *dst, char const *src, int n)
801
{
802
	while (n-- > 0)
803
		if (*src == '\\' && src[1]) {
804 805 806 807 808 809 810 811
			int n = g_utf8_skip [*(guchar *)(++src)];
			strncpy (dst, src, n);
			dst += n;
			src += n;
		} else
			*dst++ = *src++;
	*dst = 0;
}
812

813 814
/**
 * wbref_parse :
815
 * @convs : #GnmConventions const
816 817 818 819 820 821 822
 * @start :
 * @wb :
 *
 * Returns : NULL if there is a valid workbook name but it is unknown.
 *           If the string is a valid workbook known name it returns a pointer
 *           the end of the name.
 *           Otherwise returns @start and does not modify @wb.
823
 **/
824
static char const *
825 826
wbref_parse (GnmConventions const *convs,
	     char const *start, Workbook **wb, Workbook *ref_wb)
827 828 829
{
	/* Is this an external reference ? */
	if (*start == '[') {
830 831
		Workbook *tmp_wb;

832
		int num_escapes;
833
		char const *end = check_quoted (start+1, &num_escapes);
834 835
		char *name;

836
		if (end == start+1) {
837 838 839
			end = strchr (start, ']');
			if (end == NULL)
				return start;
840
		}
841
		if (*end != ']')
842
			return start;
843 844 845 846 847 848 849 850

		/* might be too big if quoted (remember leading [' */
		name = g_alloca (1 + end - start - 2);
		if (num_escapes < 0) {
			strncpy (name, start+1, end-start-1);
			name [end-start-1] = '\0';
		} else
			unquote (name, start+2, end-start-2);
851

852
		tmp_wb = (*convs->input.external_wb) (convs, ref_wb, name);
853 854
		if (tmp_wb == NULL)
			return NULL;
855

856
		*wb = tmp_wb;
857 858 859 860 861 862
		return end + 1;
	}

	return start;
}

863 864
/**
 * sheetref_parse :
865
 * @convs :
866 867 868 869 870 871
 * @start :
 * @sheet :
 * @wb    :
 * @allow_3d :
 *
 * Returns : NULL if there is a valid sheet name but it is unknown.
872
 *           If the string is a valid sheet name it returns a pointer
873 874 875
 *           the end of the name.
 *           Otherwise returns @start and does not modify @sheet.
 **/
876 877 878
static char const *
sheetref_parse (GnmConventions const *convs,
		char const *start, Sheet **sheet, Workbook const *wb,
879
		gboolean allow_3d)
880
{
881
	GString *sheet_name;
Jody Goldberg's avatar
Jody Goldberg committed
882
	char const *end;
883 884

	*sheet = NULL;
885 886
	if (*start == '\'' || *start == '"') {
		sheet_name = g_string_new (NULL);
Jody Goldberg's avatar
Jody Goldberg committed
887
		end = go_strunescape (sheet_name, start);
888 889 890 891 892
		if (end == NULL) {
			g_string_free (sheet_name, TRUE);
			return start;
		}
	} else {
893 894 895 896 897 898 899 900 901 902 903 904
		gboolean only_digits = TRUE;
		end = start;

		/*
		 * Valid: Normal!a1
		 * Valid: x.y!a1
		 * Invalid: .y!a1
		 *
		 * Some names starting with digits are actually valid, but
		 * unparse quoted. Things are quite tricky: most sheet names
		 * starting with a digit are ok, but not those starting with
		 * "[0-9]*\." or "[0-9]+[eE]".
905
		 *
906 907 908 909 910 911 912 913
		 * Valid: 42!a1
		 * Valid: 4x!a1
		 * Invalid: 1.!a1
		 * Invalid: 1e!a1
		 */

		while (1) {
			gunichar uc = g_utf8_get_char (end);
914
			if (g_unichar_isalpha (uc) || uc == '_') {
915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
				if (only_digits && end != start &&
				    (uc == 'e' || uc == 'E')) {
					end = start;
					break;
				}
				only_digits = FALSE;
				end = g_utf8_next_char (end);
			} else if (g_unichar_isdigit (uc)) {
				end = g_utf8_next_char (end);
			} else if (uc == '.') {
				/* Valid, except after only digits.  */
				if (only_digits) {
					end = start;
					break;
				}
				end++;
931
			} else
932 933
				break;
		}
934

935 936
		if (*end != '!' && (!allow_3d || *end != ':'))
			return start;
937

938 939
		sheet_name = g_string_new_len (start, end - start);
	}
940

941 942 943
	*sheet = workbook_sheet_by_name (wb, sheet_name->str);
	if (*sheet == NULL)
		end = start;
944

945 946
	g_string_free (sheet_name, TRUE);
	return end;
947 948
}

949 950 951 952
static char const *
r1c1_rangeref_parse (GnmRangeRef *res, char const *ptr, GnmParsePos const *pp)
{
	char const *tmp;
953 954 955 956 957 958
	GnmSheetSize const *a_ss, *b_ss;
	Sheet const *a_sheet, *b_sheet;

	a_sheet = eval_sheet (res->a.sheet, pp->sheet);
	b_sheet = eval_sheet (res->b.sheet, a_sheet);

959 960
	a_ss = gnm_sheet_get_size2 (a_sheet, pp->wb);
	b_ss = gnm_sheet_get_size2 (b_sheet, pp->wb);
961 962

	if (*ptr == 'R' || *ptr == 'r') {
963
		ptr = r1c1_get_index (ptr, a_ss,
964 965 966
				      &res->a.row, &res->a.row_relative,
				      FALSE);
		if (!ptr)
967
			return NULL;
968 969 970 971 972 973 974
		if (*ptr != 'C' && *ptr != 'c') {
			if (g_ascii_isalpha (*ptr))
				return NULL;
			/* full row R# */
			res->a.col_relative = FALSE;
			res->a.col = 0;
			res->b = res->a;
975
			res->b.col = a_ss->max_cols - 1;
976 977
			if (ptr[0] != ':' || (ptr[1] != 'R' && ptr[1] != 'r'))
				return ptr;
978
			tmp = r1c1_get_index (ptr+1, a_ss,
979 980 981
					      &res->b.row, &res->b.row_relative,
					      FALSE);
			if (!tmp)
982 983
				return ptr; /* fallback to just the initial R */
			return tmp;
984
		} else {
985
			ptr = r1c1_get_index (ptr, a_ss,
986 987 988 989 990
					      &res