Commit 78b37c0e authored by Christian Persch's avatar Christian Persch

parser: Import new parser

parent 032a5c66
......@@ -85,6 +85,9 @@ IGNORE_HFILES = \
keymap.h \
marshal.h \
matcher.hh \
parser.hh \
parser-cmd.hh \
parser-glue.hh \
ring.h \
stamp-vtetypebuiltins.h \
table.hh \
......
......@@ -48,7 +48,6 @@ libvte_@VTE_API_MAJOR_VERSION@_@VTE_API_MINOR_VERSION@_la_SOURCES = \
attr.hh \
buffer.h \
caps.hh \
caps-list.hh \
color-triple.hh \
debug.cc \
debug.h \
......@@ -56,15 +55,15 @@ libvte_@VTE_API_MAJOR_VERSION@_@VTE_API_MINOR_VERSION@_la_SOURCES = \
iso2022.h \
keymap.cc \
keymap.h \
matcher.cc \
matcher.hh \
parser.cc \
parser.hh \
parser-cmd.hh \
parser-glue.hh \
pty.cc \
reaper.cc \
reaper.hh \
ring.cc \
ring.h \
table.cc \
table.hh \
vte.cc \
vteaccess.cc \
vteaccess.h \
......@@ -173,7 +172,7 @@ vteresources.cc: vte.gresource.xml Makefile $(shell $(GLIB_COMPILE_RESOURCES) --
# Misc unit tests and utilities
noinst_PROGRAMS += interpret slowcat
noinst_PROGRAMS += interpret interpret-old slowcat test-parser
noinst_SCRIPTS = decset osc window
EXTRA_DIST += $(noinst_SCRIPTS)
......@@ -194,6 +193,7 @@ dist_check_SCRIPTS = \
$(NULL)
TESTS = \
test-parser \
reaper \
test-vtetypes \
vteconv \
......@@ -230,15 +230,14 @@ reflect_vte_LDADD = libvte-$(VTE_API_VERSION).la $(VTE_LIBS)
interpret_SOURCES = \
buffer.h \
caps.hh \
caps-list.hh \
debug.cc \
debug.h \
iso2022.cc \
iso2022.h \
matcher.cc \
matcher.hh \
table.cc \
table.hh \
parser.cc \
parser.hh \
parser-cmd.hh \
parser-glue.hh \
vteconv.cc \
vteconv.h \
interpret.cc
......@@ -262,6 +261,40 @@ interpret_LDADD = \
$(GLIB_LIBS) \
$(GOBJECT_LIBS)
interpret_old_SOURCES = \
buffer.h \
caps.hh \
debug.cc \
debug.h \
iso2022.cc \
iso2022.h \
matcher.cc \
matcher.hh \
table.cc \
table.hh \
vteconv.cc \
vteconv.h \
interpret-old.cc
interpret_old_CPPFLAGS = \
-DINTERPRET_OLD_MAIN \
-DVTE_API_VERSION=\"$(VTE_API_VERSION)\" \
-I$(builddir) \
-I$(srcdir) \
$(AM_CPPFLAGS)
interpret_old_CFLAGS = \
$(GLIB_CFLAGS) \
$(GOBJECT_CFLAGS) \
$(GTK_CFLAGS) \
$(AM_CFLAGS)
interpret_old_CXXFLAGS = \
$(GLIB_CFLAGS) \
$(GOBJECT_CFLAGS) \
$(GTK_CFLAGS) \
$(AM_CXXFLAGS)
interpret_old_LDADD = \
$(GLIB_LIBS) \
$(GOBJECT_LIBS)
slowcat_SOURCES = \
slowcat.c \
$(NULL)
......@@ -269,6 +302,24 @@ slowcat_CPPFLAGS = -I$(builddir) -I$(srcdir) $(AM_CPPFLAGS)
slowcat_CFLAGS = $(GLIB_CFLAGS) $(AM_CFLAGS)
slowcat_LDADD = $(GLIB_LIBS)
test_parser_SOURCES = \
parser-test.cc \
parser.cc \
parser.hh \
parser-cmd.hh \
parser-glue.hh \
$(NULL)
test_parser_CPPFLAGS = \
-I$(builddir) \
-I$(srcdir) \
$(AM_CPPFLAGS)
test_parser_CXXFLAGS = \
$(VTE_CFLAGS) \
$(AM_CXXFLAGS)
test_parser_LDADD = \
$(VTE_LIBS) \
$(NULL)
test_vtetypes_SOURCES = \
vtetypes.cc \
vtetypes.hh \
......
......@@ -35,7 +35,7 @@ _vte_debug_init(void)
{ "adj", VTE_DEBUG_ADJ },
{ "updates", VTE_DEBUG_UPDATES },
{ "events", VTE_DEBUG_EVENTS },
{ "parse", VTE_DEBUG_PARSE },
{ "parser", VTE_DEBUG_PARSER },
{ "signals", VTE_DEBUG_SIGNALS },
{ "selection", VTE_DEBUG_SELECTION },
{ "substitution", VTE_DEBUG_SUBSTITUTION },
......@@ -44,7 +44,6 @@ _vte_debug_init(void)
{ "cursor", VTE_DEBUG_CURSOR },
{ "keyboard", VTE_DEBUG_KEYBOARD },
{ "lifecycle", VTE_DEBUG_LIFECYCLE },
{ "matcher", VTE_DEBUG_MATCHER },
{ "work", VTE_DEBUG_WORK },
{ "cells", VTE_DEBUG_CELLS },
{ "timeout", VTE_DEBUG_TIMEOUT },
......
......@@ -38,7 +38,7 @@ G_BEGIN_DECLS
typedef enum {
VTE_DEBUG_MISC = 1 << 0,
VTE_DEBUG_PARSE = 1 << 1,
VTE_DEBUG_PARSER = 1 << 1,
VTE_DEBUG_IO = 1 << 2,
VTE_DEBUG_UPDATES = 1 << 3,
VTE_DEBUG_EVENTS = 1 << 4,
......@@ -50,19 +50,18 @@ typedef enum {
VTE_DEBUG_CURSOR = 1 << 10,
VTE_DEBUG_KEYBOARD = 1 << 11,
VTE_DEBUG_LIFECYCLE = 1 << 12,
VTE_DEBUG_MATCHER = 1 << 13,
VTE_DEBUG_WORK = 1 << 14,
VTE_DEBUG_CELLS = 1 << 15,
VTE_DEBUG_TIMEOUT = 1 << 16,
VTE_DEBUG_DRAW = 1 << 17,
VTE_DEBUG_ALLY = 1 << 18,
VTE_DEBUG_ADJ = 1 << 19,
VTE_DEBUG_PANGOCAIRO = 1 << 20,
VTE_DEBUG_WIDGET_SIZE = 1 << 21,
VTE_DEBUG_STYLE = 1 << 22,
VTE_DEBUG_RESIZE = 1 << 23,
VTE_DEBUG_REGEX = 1 << 24,
VTE_DEBUG_HYPERLINK = 1 << 25,
VTE_DEBUG_WORK = 1 << 13,
VTE_DEBUG_CELLS = 1 << 14,
VTE_DEBUG_TIMEOUT = 1 << 15,
VTE_DEBUG_DRAW = 1 << 16,
VTE_DEBUG_ALLY = 1 << 17,
VTE_DEBUG_ADJ = 1 << 18,
VTE_DEBUG_PANGOCAIRO = 1 << 19,
VTE_DEBUG_WIDGET_SIZE = 1 << 20,
VTE_DEBUG_STYLE = 1 << 21,
VTE_DEBUG_RESIZE = 1 << 22,
VTE_DEBUG_REGEX = 1 << 23,
VTE_DEBUG_HYPERLINK = 1 << 24,
} VteDebugFlags;
void _vte_debug_init(void);
......
/*
* Copyright (C) 2001,2002,2003 Red Hat, Inc.
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <config.h>
#include <sys/types.h>
#include <assert.h>
#include <ctype.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <glib.h>
#include <glib-object.h>
#include <locale.h>
#include "caps.hh"
#include "debug.h"
#include "iso2022.h"
#include "matcher.hh"
static bool quiet = false;
static void
print_array(GValueArray* array, bool subvalues)
{
GValue *value;
if (array != NULL) {
if (!subvalues && array->n_values > 0)
g_print(" ");
for (unsigned int i = 0; i < array->n_values; i++) {
value = g_value_array_get_nth(array, i);
if (i > 0) {
g_print(subvalues? ":" : ";");
}
if (G_VALUE_HOLDS_LONG(value)) {
g_print("%ld", g_value_get_long(value));
} else
if (G_VALUE_HOLDS_STRING(value)) {
g_print("\"%s\"", g_value_get_string(value));
} else
if (G_VALUE_HOLDS_POINTER(value)) {
g_print("\"%ls\"",
(wchar_t*) g_value_get_pointer(value));
} else
if (G_VALUE_HOLDS_BOXED(value)) {
if (!subvalues)
print_array((GValueArray *)g_value_get_boxed(value), true);
else
g_print("subsubvalues!?");
}
}
}
}
namespace vte { namespace parser { struct Params { GValueArray *m_values; }; } }
enum {
#define SEQUENCE_HANDLER(name) \
HANDLER_##name,
#include "vteseq-list.hh"
#undef SEQUENCE_HANDLER
_HANDLER_N
};
static char const* handler_to_string(int handler_id)
{
static char const* handlers[_HANDLER_N] = {
#include "vteseq-str.hh"
};
if (G_UNLIKELY(handler_id < 0 || handler_id >= _HANDLER_N))
return "XXXWTF";
else
return handlers[handler_id];
}
static void print_seq(int handler_id,
const char *fname,
vte::parser::Params const& params)
{
g_print("%s", handler_to_string(handler_id));
print_array(params.m_values, false);
g_print("\n");
// g_print(" =: %s\n", fname);
}
class VteTerminalPrivate {
public:
#define SEQUENCE_HANDLER(name) \
inline void seq_ ## name (vte::parser::Params const& params) { \
if (!quiet) { \
print_seq(HANDLER_##name, __func__, params); \
} \
}
#include "vteseq-list.hh"
#undef SEQUENCE_HANDLER
};
vte_matcher_entry_t const*
_vte_get_matcher_entries(unsigned int* n_entries)
{
#include "caps-list.hh"
*n_entries = G_N_ELEMENTS (entries);
return entries;
}
int
main(int argc, char **argv)
{
struct _vte_matcher *matcher = NULL;
GArray *array;
int infile;
struct _vte_iso2022_state *subst;
setlocale(LC_ALL, "");
_vte_debug_init();
if (argc < 1) {
g_print("usage: %s [file] [--quiet]\n", argv[0]);
return 1;
}
if ((argc > 1) && (strcmp(argv[1], "-") != 0)) {
infile = open (argv[1], O_RDONLY);
if (infile == -1) {
g_print("error opening %s: %s\n", argv[1],
strerror(errno));
exit(1);
}
} else {
infile = 1;
}
if (argc > 2)
quiet = g_str_equal(argv[2], "--quiet") || g_str_equal(argv[2], "-q");
g_type_init();
array = g_array_new(FALSE, FALSE, sizeof(gunichar));
matcher = _vte_matcher_new();
subst = _vte_iso2022_state_new(NULL);
VteTerminalPrivate terminal{};
gsize n_seq = 0;
gsize n_chars = 0;
gsize n_discarded = 0;
gsize start = 0;
gsize buf_size = 1024*1024;
guchar* buf = g_new0(guchar, buf_size);
for (;;) {
auto l = read (infile, buf, buf_size);
if (!l)
break;
if (l == -1) {
if (errno == EAGAIN)
continue;
break;
}
_vte_iso2022_process(subst, buf, (unsigned int) l, array);
gunichar* wbuf = &g_array_index(array, gunichar, 0);
gsize wcount = array->len;
bool leftovers = false;
while (start < wcount && !leftovers) {
const gunichar *next;
vte::parser::Params params{nullptr};
sequence_handler_t handler = nullptr;
auto match_result = _vte_matcher_match(matcher,
&wbuf[start],
wcount - start,
&handler,
&next,
&params.m_values);
switch (match_result) {
case VTE_MATCHER_RESULT_MATCH: {
(terminal.*handler)(params);
if (params.m_values != nullptr)
_vte_matcher_free_params_array(matcher, params.m_values);
/* Skip over the proper number of unicode chars. */
start = (next - wbuf);
n_seq++;
break;
}
case VTE_MATCHER_RESULT_NO_MATCH: {
auto c = wbuf[start];
/* If it's a control character, permute the order, per
* vttest. */
if ((c != *next) &&
((*next & 0x1f) == *next) &&
//FIXMEchpe what about C1 controls
(gssize(start + 1) < next - wbuf)) {
const gunichar *tnext = nullptr;
gunichar ctrl;
/* We don't want to permute it if it's another
* control sequence, so check if it is. */
sequence_handler_t thandler;
_vte_matcher_match(matcher,
next,
wcount - (next - wbuf),
&thandler,
&tnext,
nullptr);
/* We only do this for non-control-sequence
* characters and random garbage. */
if (tnext == next + 1) {
/* Save the control character. */
ctrl = *next;
/* Move everything before it up a
* slot. */
// FIXMEchpe memmove!
gsize i;
for (i = next - wbuf; i > start; i--) {
wbuf[i] = wbuf[i - 1];
}
/* Move the control character to the
* front. */
wbuf[i] = ctrl;
goto next_match;
}
}
n_chars++;
if (!quiet) {
char cbuf[7];
cbuf[g_unichar_to_utf8(c, cbuf)] = 0;
g_print("%s U+%04X [%s]\n", "GRAPHIC",
c,
g_unichar_isprint(c) ? cbuf : "�");
}
start++;
break;
}
case VTE_MATCHER_RESULT_PARTIAL: {
if (wbuf + wcount > next) {
if (!quiet)
g_print("Invalid control "
"sequence, discarding %ld "
"characters.\n",
(long)(next - (wbuf + start)));
/* Discard. */
start = next - wbuf + 1;
n_discarded += next - &wbuf[start];
} else {
/* Pause processing here and wait for more
* data before continuing. */
leftovers = true;
}
break;
}
}
}
next_match:
if (start < wcount) {
g_array_remove_range(array, 0, start);
start = wcount - start;
} else {
g_array_set_size(array, 0);
start = 0;
}
}
if (!quiet)
g_printerr("End of data.\n");
g_printerr ("Characters inserted: %" G_GSIZE_FORMAT "\n"
"Sequences recognised: %" G_GSIZE_FORMAT "\n"
"Bytes discarded: %" G_GSIZE_FORMAT "\n",
n_chars, n_seq, n_discarded);
close (infile);
_vte_iso2022_state_free(subst);
g_array_free(array, TRUE);
_vte_matcher_free(matcher);
g_free(buf);
return 0;
}
......@@ -33,7 +33,8 @@
#include "caps.hh"
#include "debug.h"
#include "iso2022.h"
#include "matcher.hh"
#include "parser.hh"
#include "assert.h"
static bool quiet = false;
......@@ -66,207 +67,165 @@ print_array(GValueArray* array)
}
}
namespace vte { namespace parser { struct Params { GValueArray *m_values; }; } }
static char const*
seq_to_str(unsigned int type)
{
switch (type) {
case VTE_SEQ_NONE: return "NONE";
case VTE_SEQ_IGNORE: return "IGNORE";
case VTE_SEQ_GRAPHIC: return "GRAPHIC";
case VTE_SEQ_CONTROL: return "CONTROL";
case VTE_SEQ_ESCAPE: return "ESCAPE";
case VTE_SEQ_CSI: return "CSI";
case VTE_SEQ_DCS: return "DCS";
case VTE_SEQ_OSC: return "OSC";
default:
assert(false);
}
}
class VteTerminalPrivate {
public:
#define SEQUENCE_HANDLER(name) \
inline void seq_ ## name (vte::parser::Params const& params) { \
if (!quiet) { \
g_print (G_STRINGIFY(name)); \
print_array(params.m_values); \
g_print("\n"); \
} \
static char const*
cmd_to_str(unsigned int command)
{
switch (command) {
#define _VTE_CMD(cmd) case VTE_CMD_##cmd: return #cmd;
#include "parser-cmd.hh"
#undef _VTE_CMD
default:
static char buf[32];
snprintf(buf, sizeof(buf), "UNKOWN(%u)", command);
return buf;
}
#include "vteseq-list.hh"
#undef SEQUENCE_HANDLER
};
}
vte_matcher_entry_t const*
_vte_get_matcher_entries(unsigned int* n_entries)
static void print_seq(const struct vte_seq *seq)
{
#include "caps-list.hh"
*n_entries = G_N_ELEMENTS (entries);
return entries;
auto c = seq->terminator;
if (seq->command == VTE_CMD_GRAPHIC) {
char buf[7];
buf[g_unichar_to_utf8(c, buf)] = 0;
g_print("%s U+%04X [%s]\n", cmd_to_str(seq->command),
c,
g_unichar_isprint(c) ? buf : "�");
} else {
g_print("%s", cmd_to_str(seq->command));
if (seq->n_args) {
g_print(" ");
for (unsigned int i = 0; i < seq->n_args; i++) {
if (i > 0)
g_print(";");
g_print("%d", seq->args[i]);
}
}
g_print("\n");
}
}
int
main(int argc, char **argv)
{
struct _vte_matcher *matcher = NULL;
GArray *array;
unsigned char buf[4096];
int infile;
struct _vte_iso2022_state *subst;
GArray *array;
int infile;
struct _vte_iso2022_state *subst;
setlocale(LC_ALL, "");
_vte_debug_init();
_vte_debug_init();
if (argc < 1) {
g_print("usage: %s [file] [--quiet]\n", argv[0]);
return 1;
}
return 1;
}
if ((argc > 1) && (strcmp(argv[1], "-") != 0)) {
infile = open (argv[1], O_RDONLY);
if (infile == -1) {
if (infile == -1) {
g_print("error opening %s: %s\n", argv[1],
strerror(errno));
exit(1);
}
} else {
infile = 1;
}
strerror(errno));
exit(1);
}
} else {
infile = 1;
}
if (argc > 2)
quiet = g_str_equal(argv[2], "--quiet") || g_str_equal(argv[2], "-q");
g_type_init();
g_type_init();
array = g_array_new(FALSE, FALSE, sizeof(gunichar));
array = g_array_new(FALSE, FALSE, sizeof(gunichar));
matcher = _vte_matcher_new();
struct vte_parser *parser;
if (vte_parser_new(&parser) != 0)
return 1;
subst = _vte_iso2022_state_new(NULL);
VteTerminalPrivate terminal{};
gsize n_seq = 0;
gsize n_chars = 0;
gsize n_discarded = 0;
gsize buf_size = 1024*1024;
guchar* buf = g_new0(guchar, buf_size);
gsize start = 0;
gsize* seq_stats = g_new0(gsize, VTE_SEQ_N);
gsize* cmd_stats = g_new0(gsize, VTE_CMD_N);
for (;;) {
auto l = read (infile, buf, sizeof (buf));
if (!l)
break;
if (l == -1) {
if (errno == EAGAIN)
continue;
break;
}
_vte_iso2022_process(subst, buf, (unsigned int) l, array);
for (;;) {
auto l = read (infile, buf, buf_size);
if (!l)
break;
if (l == -1) {
if (errno == EAGAIN)
continue;
break;
}
_vte_iso2022_process(subst, buf, (unsigned int) l, array);
gunichar* wbuf = &g_array_index(array, gunichar, 0);
gsize wcount = array->len;
bool leftovers = false;
struct vte_seq *seq;
for (gsize i = 0; i < wcount; i++) {
auto ret = vte_parser_feed(parser,
&seq,
wbuf[i]);
if (ret < 0) {
if (!quiet)
g_print("Parser error\n");
goto done;
}
while (start < wcount && !leftovers) {
const gunichar *next;
vte::parser::Params params{nullptr};
sequence_handler_t handler = nullptr;
auto match_result = _vte_matcher_match(matcher,
&wbuf[start],
wcount - start,
&handler,
&next,
&params.m_values);
switch (match_result) {
case VTE_MATCHER_RESULT_MATCH: {
(terminal.*handler)(params);
if (params.m_values != nullptr)
_vte_matcher_free_params_array(matcher, params.m_values);
seq_stats[ret]++;
if (ret != VTE_SEQ_NONE) {
cmd_stats[seq->command]++;
/* Skip over the proper number of unicode chars. */
start = (next - wbuf);
n_seq++;
break;
}
case VTE_MATCHER_RESULT_NO_MATCH: {
auto c = wbuf[start];
/* If it's a control character, permute the order, per
* vttest. */
if ((c != *next) &&
((*next & 0x1f) == *next) &&
//FIXMEchpe what about C1 controls
(gssize(start + 1) < next - wbuf)) {
const gunichar *tnext = nullptr;
gunichar ctrl;
/* We don't want to permute it if it's another
* control sequence, so check if it is. */
sequence_handler_t thandler;
_vte_matcher_match(matcher,
next,
wcount - (next - wbuf),
&thandler,
&tnext,
nullptr);
/* We only do this for non-control-sequence
* characters and random garbage. */
if (tnext == next + 1) {
/* Save the control character. */
ctrl = *next;
/* Move everything before it up a
* slot. */
// FIXMEchpe memmove!
gsize i;
for (i = next - wbuf; i > start; i--) {
wbuf[i] = wbuf[i - 1];
}