parse-util.c 13.4 KB
Newer Older
Jody Goldberg's avatar
Jody Goldberg committed
1
/* vim: set sw=8: */
2 3 4 5 6

/*
 * parse-util.c: Various utility routines to parse or produce
 *     string representations of common reference types.
 *
Jody Goldberg's avatar
Jody Goldberg committed
7
 * Copyright (C) 2000 Jody Goldberg (jody@gnome.org)
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License as
 * published by the Free Software Foundation; either version 2 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
 * USA
 */
24 25
#include <gnumeric-config.h>
#include "gnumeric.h"
26
#include "parse-util.h"
27

28 29 30 31
#include "workbook.h"
#include "sheet.h"
#include "value.h"
#include "ranges.h"
Jody Goldberg's avatar
Jody Goldberg committed
32 33
#include "cell.h"
#include "expr.h"
34
#include "number-match.h"
35
#include "format.h"
36

37
#include <errno.h>
38 39 40
#include <stdlib.h>
#include <stdio.h>
#include <ctype.h>
41
#include <glib.h>
42 43 44

/* Can remove sheet since local references have NULL sheet */
char *
Jody Goldberg's avatar
Jody Goldberg committed
45
cellref_name (CellRef const *cell_ref, ParsePos const *pp, gboolean no_sheetname)
46 47 48 49 50 51 52
{
	static char buffer [sizeof (long) * 4 + 4];
	char *p = buffer;
	int col, row;
	Sheet *sheet = cell_ref->sheet;

	if (cell_ref->col_relative)
53
		col = pp->eval.col + cell_ref->col;
54 55 56 57 58
	else {
		*p++ = '$';
		col = cell_ref->col;
	}

59 60 61 62 63
	/* ICK!  XL compatibility kludge */
	col %= SHEET_MAX_COLS;
	if (col < 0)
		col += SHEET_MAX_COLS;

64 65 66 67 68 69 70 71 72 73
	if (col <= 'Z'-'A'){
		*p++ = col + 'A';
	} else {
		int a = col / ('Z'-'A'+1);
		int b = col % ('Z'-'A'+1);

		*p++ = a + 'A' - 1;
		*p++ = b + 'A';
	}
	if (cell_ref->row_relative)
74
		row = pp->eval.row + cell_ref->row;
75 76 77 78 79
	else {
		*p++ = '$';
		row = cell_ref->row;
	}

80 81 82 83 84
	/* ICK!  XL compatibility kludge */
	row %= SHEET_MAX_ROWS;
	if (row < 0)
		row += SHEET_MAX_ROWS;

85 86 87
	sprintf (p, "%d", row+1);

	/* If it is a non-local reference, add the path to the external sheet */
Jody Goldberg's avatar
Jody Goldberg committed
88
	if (sheet != NULL && !no_sheetname) {
89 90
		/* pp->wb==NULL happens for the leak printer.  */
		if (pp->wb == NULL || sheet->workbook == pp->wb)
91 92 93
			return g_strconcat (sheet->name_quoted, "!", buffer, NULL);
		return g_strconcat ("[", sheet->workbook->filename, "]",
				    sheet->name_quoted, "!", buffer, NULL);
94 95 96 97
	} else
		return g_strdup (buffer);
}

98
char const *
99
cellref_a1_get (CellRef *out, char const *in, CellPos const *pos)
100 101 102 103
{
	int col = 0;
	int row = 0;

104 105
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);
106 107 108 109 110 111 112 113 114

	/* Try to parse a column */
	if (*in == '$'){
		out->col_relative = FALSE;
		in++;
	} else
		out->col_relative = TRUE;

	if (!(toupper (*in) >= 'A' && toupper (*in) <= 'Z'))
115
		return NULL;
116 117

	col = toupper (*in++) - 'A';
118

119 120 121 122 123 124 125 126 127
	if (toupper (*in) >= 'A' && toupper (*in) <= 'Z')
		col = (col+1) * ('Z'-'A'+1) + toupper (*in++) - 'A';

	/* Try to parse a row */
	if (*in == '$'){
		out->row_relative = FALSE;
		in++;
	} else
		out->row_relative = TRUE;
128

129
	if (!(*in >= '1' && *in <= '9'))
130
		return NULL;
131 132 133 134 135 136

	while (isdigit ((unsigned char)*in)){
		row = row * 10 + *in - '0';
		in++;
	}
	if (row > SHEET_MAX_ROWS)
137
		return NULL;
138 139 140 141
	row--;

	/* Setup the cell reference information */
	if (out->row_relative)
Jody Goldberg's avatar
Jody Goldberg committed
142
		out->row = row - pos->row;
143 144 145 146
	else
		out->row = row;

	if (out->col_relative)
Jody Goldberg's avatar
Jody Goldberg committed
147
		out->col = col - pos->col;
148 149 150 151 152
	else
		out->col = col;

	out->sheet = NULL;

153
	return in;
154 155 156
}

static gboolean
157
r1c1_get_item (int *num, unsigned char *rel, char const * *in)
158 159 160
{
	gboolean neg = FALSE;

161 162 163
	if (**in == '\0')
		return FALSE;

164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
	if (**in == '[') {
		(*in)++;
		*rel = TRUE;
		if (!**in)
			return FALSE;

		if (**in == '+')
			(*in)++;
		else if (**in == '-') {
			neg = TRUE;
			(*in)++;
		}
	}
	*num = 0;

	while (**in && isdigit ((unsigned char)**in)) {
		*num = *num * 10 + **in - '0';
		(*in)++;
	}

	if (neg)
		*num = -*num;

	if (**in == ']')
		(*in)++;

	return TRUE;
}

193
char const *
194
cellref_r1c1_get (CellRef *out, char const *in, CellPos const *pos)
195
{
196 197
	g_return_val_if_fail (in != NULL, NULL);
	g_return_val_if_fail (out != NULL, NULL);
198 199 200

	out->row_relative = FALSE;
	out->col_relative = FALSE;
Jody Goldberg's avatar
Jody Goldberg committed
201 202
	out->col = pos->col;
	out->row = pos->row;
203 204
	out->sheet = NULL;

205 206 207
	if (*in == 'R') {
		in++;
		if (!r1c1_get_item (&out->row, &out->row_relative, &in))
208
			return NULL;
209
	} else
210
		return NULL;
211

212 213 214
	if (*in == 'C') {
		in++;
		if (!r1c1_get_item (&out->col, &out->col_relative, &in))
215
			return NULL;
216
	} else
217
		return NULL;
218 219 220

	out->col--;
	out->row--;
221
	return in;
222 223 224 225 226
}

/**
 * cellref_get:
 * @out: destination CellRef
227
 * @in: reference description text, no leading
228
 *      whitespace allowed.
229
 *
230 231
 * Converts the char * representation of a Cell reference into
 * an internal representation.
232
 *
233 234
 * Return value: TRUE if no format errors found.
 **/
235
char const *
236
cellref_get (CellRef *out, char const *in, CellPos const *pos)
237
{
238 239 240 241
	char const *res = cellref_a1_get (out, in, pos);
	if (res != NULL)
		return res;
	return cellref_r1c1_get (out, in, pos);
242 243 244 245
}

/****************************************************************************/

246 247 248 249 250 251 252 253 254
/**
 * gnumeric_char_start_expr_p :
 *
 * Can the supplied string be an expression ?  It does not guarantee that it is,
 * however, it is possible.  If it is possible it strips off any header
 * characters that are not relevant.
 *
 * NOTE : things like -1,234 will match
 */
255 256 257
char const *
gnumeric_char_start_expr_p (char const * c)
{
258
	char c0;
Jody Goldberg's avatar
Jody Goldberg committed
259

260 261 262 263
	if (NULL == c)
		return NULL;

	c0 = *c;
264 265

	if (c0 == '=' || c0 == '@')
266 267
		return c + 1;

268
	if ((c0 == '-' || c0 == '+') && c[1] != 0 && c0 != c[1]) {
269 270
		char *end;

271 272 273 274 275 276 277 278 279 280
		/*
		 * Ok, we have a string that
		 * 1. starts with a sign
		 * 2. does not start with the sign repeated (think --------)
		 * 3. is more than one character
		 *
		 * Now we check whether we have a number.  We don't want
		 * numbers to be treated as formulae.  FIXME: this really
		 * just checks for C-syntax numbers.
		 */
281
		errno = 0;
Morten Welinder's avatar
Morten Welinder committed
282
		(void) strtognum (c, &end);
283
		if (errno || *end != 0 || end == c)
284
			return (c0 == '+') ? c + 1 : c;
285 286
		/* Otherwise, it's a number.  */
	}
287 288 289
	return NULL;
}

290 291
static char *
col_name_internal (char *buf, int col)
292
{
293 294
	g_return_val_if_fail (col < SHEET_MAX_COLS, buf);
	g_return_val_if_fail (col >= 0, buf);
295

296 297
	if (col <= 'Z'-'A'){
		*buf++ = col + 'A';
298
	} else {
299 300
		int a = col / ('Z'-'A'+1);
		int b = col % ('Z'-'A'+1);
301

302 303
		*buf++ = a + 'A' - 1;
		*buf++ = b + 'A';
304
	}
305 306
	return buf;
}
307

308 309 310
char const *
col_name (int col)
{
311
	static char buffer [3]; /* What if SHEET_MAX_COLS is changed ? */
312 313 314 315 316 317 318 319
	char *res = col_name_internal (buffer, col);
	*res = '\0';
	return buffer;
}

char const *
cols_name (int start_col, int end_col)
{
320
	static char buffer [16]; /* Why is this 16 ? */
321 322 323 324
	char *res = col_name_internal (buffer, start_col);

	if (start_col != end_col) {
		*res = ':';
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
		res = col_name_internal (res + 1, end_col);
	}
	*res = '\0';
	return buffer;
}

static char *
row_name_internal (char *buf, int row)
{
	int len = g_snprintf (buf, 6, "%d", row + 1); /* The 6 is hardcoded, see comments in row{s}_name */
	return buf + len;
}

char const *
row_name (int row)
{
	static char buffer [6]; /* What if SHEET_MAX_ROWS changes? */
	char *res = row_name_internal (buffer, row);
	*res = '\0';
	return buffer;
}

char const *
rows_name (int start_row, int end_row)
{
	static char buffer [13]; /* What if SHEET_MAX_ROWS changes? */
	char *res = row_name_internal (buffer, start_row);

	if (start_row != end_row) {
		*res = ':';
		res = row_name_internal (res + 1, end_row);
356 357 358 359 360 361 362 363 364 365 366
	}
	*res = '\0';
	return buffer;
}

char const *
cell_coord_name (int col, int row)
{
	static char buffer [2 + 4 * sizeof (long)];
	char *res = col_name_internal (buffer, col);
	sprintf (res, "%d", row + 1);
367 368
	return buffer;
}
369 370

char const *
371 372 373 374 375 376 377
cell_pos_name (CellPos const *pos)
{
	g_return_val_if_fail (pos != NULL, "ERROR");

	return cell_coord_name (pos->col, pos->row);
}

378
char const *
379 380 381 382
cell_name (Cell const *cell)
{
	g_return_val_if_fail (cell != NULL, "ERROR");

383
	return cell_coord_name (cell->pos.col, cell->pos.row);
384 385 386 387 388 389
}

/**
 * Converts a column name into an integer
 **/
int
390
parse_col_name (char const *cell_str, char const **endptr)
391 392 393 394
{
	char c;
	int col = 0;

395 396 397
	if (endptr)
		*endptr = cell_str;

398 399
	c = toupper ((unsigned char)*cell_str++);
	if (c < 'A' || c > 'Z')
400 401
		return 0;

402 403
	col = c - 'A';
	c = toupper ((unsigned char)*cell_str);
404
	if (c >= 'A' && c <= 'Z') {
405
		col = ((col + 1) * ('Z' - 'A' + 1)) + (c - 'A');
406 407 408
		cell_str++;
	}

409
	if (col >= SHEET_MAX_COLS)
410 411 412 413 414 415
		return 0;

	if (endptr)
		*endptr = cell_str;

	return col;
416 417
}

418
/**
419 420 421 422 423 424
 * parse_cell_name
 * @cell_name:   a string representation of a cell name.
 * @col:         result col
 * @row:         result row
 * @strict:      if this is TRUE, then parsing stops at possible errors,
 *               otherwise an attempt is made to return cell names with trailing garbage.
425
 *
426 427 428
 * Return value: true if the cell_name could be successfully parsed
 */
gboolean
429
parse_cell_name (char const *cell_str, int *col, int *row, gboolean strict, int *chars_read)
430 431 432 433
{
	char const * const original = cell_str;
	unsigned char c;
	gboolean found_digits = FALSE;
434

435 436
	if (*cell_str == '$')
		cell_str++;
437

438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454
	/* Parse column name: one or two letters.  */
	c = toupper ((unsigned char) *cell_str);
	cell_str++;
	if (c < 'A' || c > 'Z')
		return FALSE;

	*col = c - 'A';
	c = toupper ((unsigned char)*cell_str);
	if (c >= 'A' && c <= 'Z') {
		*col = ((*col + 1) * ('Z' - 'A' + 1)) + (c - 'A');
		cell_str++;
	}
	if (*col >= SHEET_MAX_COLS)
		return FALSE;

	if (*cell_str == '$')
		cell_str++;
455

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486
	/* Parse row number: a sequence of digits.  */
	for (*row = 0; *cell_str; cell_str++) {
		if (*cell_str < '0' || *cell_str > '9'){
			if (found_digits && strict == FALSE){
				break;
			} else
				return FALSE;
		}
		found_digits = TRUE;
		*row = *row * 10 + (*cell_str - '0');
		if (*row > SHEET_MAX_ROWS) /* Note: ">" is deliberate.  */
			return FALSE;
	}
	if (*row == 0)
		return FALSE;

	/* Internal row numbers are one less than the displayed.  */
	(*row)--;

	if (chars_read)
		*chars_read = cell_str - original;
	return TRUE;
}

/**
 * parse_text_value_or_expr : Utility routine to parse a string and convert it
 *     into an expression or value.
 *
 * @pos : If the string looks like an expression parse it at this location.
 * @text: The text to be parsed.
 * @val : Returns a Value * if the text was a value, otherwise NULL.
487
 * @expr: Returns an GnmExpr * if the text was an expression, otherwise NULL.
488
 * @current_format : Optional, current number format.
489 490 491 492
 *
 * If there is a parse failure for an expression an error Value with the syntax
 * error is returned.
 */
493
void
494
parse_text_value_or_expr (ParsePos const *pos, char const *text,
495
			  Value **val, GnmExpr const **expr,
496
			  StyleFormat *current_format /* can be NULL */)
497
{
498
	char const *expr_start;
499

500 501
	*expr = NULL;

502
	/* Does it match any formats?  */
503 504 505
	*val = format_match (text, current_format);
	if (*val != NULL)
		return;
506

507 508 509
	/* If it does not match known formats, see if it is an expression */
	expr_start = gnumeric_char_start_expr_p (text);
	if (NULL != expr_start && *expr_start) {
510 511
		*expr = gnm_expr_parse_str (expr_start, pos,
			GNM_EXPR_PARSE_DEFAULT, NULL);
512
		if (*expr != NULL)
513
			return;
514 515 516 517
	}

	/* Fall back on string */
	*val = value_new_string (text);
518
}
519 520 521 522

ParseError *
parse_error_init (ParseError *pe)
{
523
	pe->id         = PERR_NONE;
524 525 526
	pe->message    = NULL;
	pe->begin_char = -1;
	pe->end_char   = -1;
Jody Goldberg's avatar
Jody Goldberg committed
527

528 529 530 531 532 533 534
	return pe;
}

void
parse_error_free (ParseError *pe)
{
	if (pe->message != NULL) {
Jody Goldberg's avatar
Jody Goldberg committed
535
		g_free (pe->message);
536 537 538
		pe->message = NULL;
	}
}
539 540


541 542 543 544 545 546
#undef DEBUG_PARSE_SURROUNDING_RANGES

gboolean   
parse_surrounding_ranges  (char const *text, gint cursor, Sheet *sheet, 
			   gboolean single_range_only, gint *from, gint *to,
			   RangeRef **range)
547 548 549 550 551 552 553
{
	int start, end, last;
	gchar *test;
	gboolean last_was_alnum = FALSE;
	
	if (text == NULL)
		return FALSE;
554 555 556 557

#ifdef DEBUG_PARSE_SURROUNDING_RANGES
			g_warning ("Starting  to parse [%s]", text);
#endif
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593
	
	last = strlen (text);
	for (start = 0;
	     start <= cursor;
	     start = g_utf8_next_char (text + start) - text) {
		int next_end = -1;
		gboolean next_was_alnum = FALSE;
		gunichar c = g_utf8_get_char (text + start);
		gboolean is_alnum = g_unichar_isalnum (c);

		/* A range does not start in the middle of a word.  */
		if (last_was_alnum && is_alnum)
			continue;
		last_was_alnum = is_alnum;
		/* A range starts with a letter, a quote, or a dollar sign.  */
		if (is_alnum ? g_unichar_isdigit (c) : (c != '\'' && c != '$'))
			continue;

		for (end = last; end >= MAX (cursor, start + 1); end = next_end) {
			GSList *ranges;
			gunichar c_end;
			gboolean is_alnum;

			next_end = g_utf8_prev_char (text + end) - text;
			c_end = g_utf8_get_char (text + next_end);
			is_alnum = g_unichar_isalnum (c_end);

			/* A range does not end in the middle of a word.  */
			if (is_alnum && next_was_alnum)
				continue;
			next_was_alnum = is_alnum;
			/* A range ends in a letter, digit, or quote.  */
			if (!is_alnum && c_end != '\'')
				continue;

			test = g_strndup (text + start, end - start);
594 595

#ifdef DEBUG_PARSE_SURROUNDING_RANGES
596 597
			g_warning ("Parsing [%s]", test);
#endif
598

599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620
			ranges = global_range_list_parse (sheet, test);
			g_free (test);

			if (ranges != NULL) {
				if ((ranges->next != NULL) && single_range_only) { 
					range_list_destroy (ranges);
					continue;
				}
				*from = start;
				*to = end;
				if (range) {
					*range = value_to_rangeref 
						((Value *) ((g_slist_last 
							     (ranges))->data), FALSE);
				}
				range_list_destroy (ranges);
				return TRUE;
			}
		}
	}
	return FALSE;
}