Commit 1c6e2ce1 authored by Christian Hergert's avatar Christian Hergert

gtksourceregex: wrap GRegex through a shim

This creates an ImplRegex intermediate structure that we can use to switch
the implementation from GRegex into PCRE2. Doing so will not only be
faster, but also allow us to eventually migrate to using a JIT for the
regex implementation as well as avoid deprecations from GRegex in future
releases of GLib.

Other modules will eventually need to be ported to this, but focusing on
GtkSourceRegex would result in the largest gain from language specs.
parent 89dfadf9
......@@ -27,21 +27,23 @@
#include "gtksourceregex-private.h"
#include "gtksourceutils-private.h"
#include "implregex-private.h"
/*
* GRegex wrapper which adds a few features needed for syntax highlighting,
* ImplRegex wrapper which adds a few features needed for syntax highlighting,
* in particular resolving "\%{...@start}" and forbidding the use of \C.
*/
/* Regex used to match "\%{...@start}". */
static GRegex *
static ImplRegex *
get_start_ref_regex (void)
{
static GRegex *start_ref_regex = NULL;
static ImplRegex *start_ref_regex = NULL;
if (start_ref_regex == NULL)
{
start_ref_regex = g_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
G_REGEX_OPTIMIZE, 0, NULL);
start_ref_regex = impl_regex_new ("(?<!\\\\)(\\\\\\\\)*\\\\%\\{(.*?)@start\\}",
G_REGEX_OPTIMIZE, 0, NULL);
}
return start_ref_regex;
......@@ -55,8 +57,8 @@ struct _GtkSourceRegex
GRegexCompileFlags flags;
} info;
struct {
GRegex *regex;
GMatchInfo *match;
ImplRegex *regex;
ImplMatchInfo *match;
} regex;
} u;
......@@ -110,9 +112,9 @@ find_single_byte_escape (const gchar *string)
* Returns: a newly-allocated #GtkSourceRegex.
*/
GtkSourceRegex *
_gtk_source_regex_new (const gchar *pattern,
GRegexCompileFlags flags,
GError **error)
_gtk_source_regex_new (const gchar *pattern,
GRegexCompileFlags flags,
GError **error)
{
GtkSourceRegex *regex;
......@@ -130,7 +132,7 @@ _gtk_source_regex_new (const gchar *pattern,
regex = g_slice_new0 (GtkSourceRegex);
regex->ref_count = 1;
if (g_regex_match (get_start_ref_regex (), pattern, 0, NULL))
if (impl_regex_match (get_start_ref_regex (), pattern, 0, NULL))
{
regex->resolved = FALSE;
regex->u.info.pattern = g_strdup (pattern);
......@@ -139,9 +141,9 @@ _gtk_source_regex_new (const gchar *pattern,
else
{
regex->resolved = TRUE;
regex->u.regex.regex = g_regex_new (pattern,
flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
error);
regex->u.regex.regex = impl_regex_new (pattern,
flags | G_REGEX_OPTIMIZE | G_REGEX_NEWLINE_LF, 0,
error);
if (regex->u.regex.regex == NULL)
{
......@@ -168,9 +170,9 @@ _gtk_source_regex_unref (GtkSourceRegex *regex)
{
if (regex->resolved)
{
g_regex_unref (regex->u.regex.regex);
impl_regex_unref (regex->u.regex.regex);
if (regex->u.regex.match)
g_match_info_free (regex->u.regex.match);
impl_match_info_free (regex->u.regex.match);
}
else
{
......@@ -186,27 +188,25 @@ struct RegexResolveData {
};
static gboolean
replace_start_regex (const GMatchInfo *match_info,
GString *expanded_regex,
gpointer user_data)
replace_start_regex (const ImplMatchInfo *match_info,
GString *expanded_regex,
gpointer user_data)
{
gchar *num_string, *subst, *subst_escaped, *escapes;
gint num;
struct RegexResolveData *data = user_data;
escapes = g_match_info_fetch (match_info, 1);
num_string = g_match_info_fetch (match_info, 2);
escapes = impl_match_info_fetch (match_info, 1);
num_string = impl_match_info_fetch (match_info, 2);
num = _gtk_source_utils_string_to_int (num_string);
if (num < 0)
{
subst = g_match_info_fetch_named (data->start_regex->u.regex.match,
num_string);
subst = impl_match_info_fetch_named (data->start_regex->u.regex.match, num_string);
}
else
{
subst = g_match_info_fetch (data->start_regex->u.regex.match,
num);
subst = impl_match_info_fetch (data->start_regex->u.regex.match, num);
}
if (subst != NULL)
......@@ -261,11 +261,11 @@ _gtk_source_regex_resolve (GtkSourceRegex *regex,
data.start_regex = start_regex;
data.matched_text = matched_text;
expanded_regex = g_regex_replace_eval (get_start_ref_regex (),
regex->u.info.pattern,
-1, 0, 0,
replace_start_regex,
&data, NULL);
expanded_regex = impl_regex_replace_eval (get_start_ref_regex (),
regex->u.info.pattern,
-1, 0, 0,
replace_start_regex,
&data, NULL);
new_regex = _gtk_source_regex_new (expanded_regex, regex->u.info.flags, NULL);
if (new_regex == NULL || !new_regex->resolved)
{
......@@ -299,14 +299,14 @@ _gtk_source_regex_match (GtkSourceRegex *regex,
if (regex->u.regex.match)
{
g_match_info_free (regex->u.regex.match);
impl_match_info_free (regex->u.regex.match);
regex->u.regex.match = NULL;
}
result = g_regex_match_full (regex->u.regex.regex, line,
byte_length, byte_pos,
0, &regex->u.regex.match,
NULL);
result = impl_regex_match_full (regex->u.regex.regex, line,
byte_length, byte_pos,
0, &regex->u.regex.match,
NULL);
return result;
}
......@@ -317,7 +317,7 @@ _gtk_source_regex_fetch (GtkSourceRegex *regex,
{
g_assert (regex->resolved);
return g_match_info_fetch (regex->u.regex.match, num);
return impl_match_info_fetch (regex->u.regex.match, num);
}
void
......@@ -331,8 +331,8 @@ _gtk_source_regex_fetch_pos (GtkSourceRegex *regex,
g_assert (regex->resolved);
/* g_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
if (!g_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
/* impl_match_info_fetch_pos() can return TRUE with start_pos/end_pos set to -1 */
if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &byte_start_pos, &byte_end_pos))
{
if (start_pos != NULL)
*start_pos = -1;
......@@ -359,7 +359,7 @@ _gtk_source_regex_fetch_pos_bytes (GtkSourceRegex *regex,
g_assert (regex->resolved);
if (!g_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
if (!impl_match_info_fetch_pos (regex->u.regex.match, num, &start_pos, &end_pos))
{
start_pos = -1;
end_pos = -1;
......@@ -382,7 +382,7 @@ _gtk_source_regex_fetch_named_pos (GtkSourceRegex *regex,
g_assert (regex->resolved);
if (!g_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
if (!impl_match_info_fetch_named_pos (regex->u.regex.match, name, &byte_start_pos, &byte_end_pos))
{
if (start_pos != NULL)
*start_pos = -1;
......@@ -403,6 +403,6 @@ _gtk_source_regex_get_pattern (GtkSourceRegex *regex)
{
g_assert (regex->resolved);
return g_regex_get_pattern (regex->u.regex.regex);
return impl_regex_get_pattern (regex->u.regex.regex);
}
/*
* This file is part of GtkSourceView
*
* Copyright 2020 Christian Hergert <chergert@redhat.com>
*
* GtkSourceView is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* GtkSourceView is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#pragma once
#include <glib.h>
G_BEGIN_DECLS
typedef struct _ImplRegex ImplRegex;
typedef struct _ImplMatchInfo ImplMatchInfo;
typedef gboolean (*ImplRegexEvalCallback) (const ImplMatchInfo *match_info,
GString *result,
gpointer user_data);
ImplRegex *impl_regex_new (const char *pattern,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options,
GError **error);
gboolean impl_regex_match (const ImplRegex *regex,
const char *string,
GRegexMatchFlags match_options,
ImplMatchInfo **match_info);
void impl_regex_unref (ImplRegex *regex);
void impl_match_info_free (ImplMatchInfo *match_info);
char *impl_match_info_fetch (const ImplMatchInfo *match_info,
int match_num);
char *impl_match_info_fetch_named (const ImplMatchInfo *match_info,
const char *name);
char *impl_regex_replace_eval (const ImplRegex *regex,
const char *string,
gssize string_len,
int start_position,
GRegexMatchFlags match_options,
ImplRegexEvalCallback eval,
gpointer user_data,
GError **error);
gboolean impl_regex_match_full (const ImplRegex *regex,
const char *string,
gssize string_len,
int start_position,
GRegexMatchFlags match_options,
ImplMatchInfo **match_info,
GError **error);
gboolean impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
int match_num,
int *start_pos,
int *end_pos);
gboolean impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
const char *name,
int *start_pos,
int *end_pos);
const char *impl_regex_get_pattern (const ImplRegex *regex);
G_END_DECLS
/*
* This file is part of GtkSourceView
*
* Copyright 2020 Christian Hergert <chergert@redhat.com>
*
* GtkSourceView is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* GtkSourceView is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this library; if not, see <http://www.gnu.org/licenses/>.
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*/
#include "config.h"
#include "implregex-private.h"
struct _ImplRegex
{
int ref_count;
char *pattern;
GRegex *re;
};
struct _ImplMatchInfo
{
GMatchInfo *match_info;
};
#if 0
static void
set_regex_error (GError **error,
int errnum)
{
guchar errstr[128];
pcre2_get_error_message (errnum, errstr, sizeof errstr - 1);
errstr[sizeof errstr - 1] = 0;
g_set_error_literal (error,
G_REGEX_ERROR,
G_REGEX_ERROR_COMPILE,
(const gchar *)errstr);
}
#endif
static ImplMatchInfo *
impl_match_info_new (const ImplRegex *regex)
{
ImplMatchInfo *match_info;
match_info = g_slice_new0 (ImplMatchInfo);
match_info->match_info = NULL;
return match_info;
}
ImplRegex *
impl_regex_new (const char *pattern,
GRegexCompileFlags compile_options,
GRegexMatchFlags match_options,
GError **error)
{
GRegex *re;
ImplRegex *regex;
g_return_val_if_fail (pattern != NULL, NULL);
re = g_regex_new (pattern, compile_options, match_options, error);
if (re == NULL)
{
return NULL;
}
regex = g_slice_new0 (ImplRegex);
regex->ref_count = 1;
regex->pattern = g_strdup (pattern);
regex->re = re;
return regex;
}
const char *
impl_regex_get_pattern (const ImplRegex *regex)
{
g_return_val_if_fail (regex != NULL, NULL);
return regex->pattern;
}
void
impl_regex_unref (ImplRegex *regex)
{
g_return_if_fail (regex != NULL);
g_return_if_fail (regex->ref_count > 0);
regex->ref_count--;
if (regex->ref_count == 0)
{
g_clear_pointer (&regex->pattern, g_free);
g_clear_pointer (&regex->re, g_regex_unref);
g_slice_free (ImplRegex, regex);
}
}
void
impl_match_info_free (ImplMatchInfo *match_info)
{
g_clear_pointer (&match_info->match_info, g_match_info_free);
g_slice_free (ImplMatchInfo, match_info);
}
gboolean
impl_regex_match (const ImplRegex *regex,
const char *string,
GRegexMatchFlags match_options,
ImplMatchInfo **match_info)
{
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (regex->re != NULL, FALSE);
if (match_info != NULL)
{
*match_info = impl_match_info_new (regex);
}
return g_regex_match (regex->re,
string,
match_options,
match_info ? &(*match_info)->match_info : NULL);
}
char *
impl_match_info_fetch (const ImplMatchInfo *match_info,
int match_num)
{
g_return_val_if_fail (match_info != NULL, NULL);
return g_match_info_fetch (match_info->match_info, match_num);
}
char *
impl_match_info_fetch_named (const ImplMatchInfo *match_info,
const char *name)
{
g_return_val_if_fail (match_info != NULL, NULL);
return g_match_info_fetch_named (match_info->match_info, name);
}
static gboolean
wrapper_eval (const GMatchInfo *match_info,
GString *result,
gpointer user_data)
{
struct {
ImplRegexEvalCallback callback;
gpointer user_data;
} *wrapper = user_data;
ImplMatchInfo wrapped = {
.match_info = (GMatchInfo *)match_info,
};
return wrapper->callback (&wrapped, result, wrapper->user_data);
}
char *
impl_regex_replace_eval (const ImplRegex *regex,
const char *string,
gssize string_len,
int start_position,
GRegexMatchFlags match_options,
ImplRegexEvalCallback eval,
gpointer user_data,
GError **error)
{
struct {
ImplRegexEvalCallback callback;
gpointer user_data;
} wrapper;
g_return_val_if_fail (regex != NULL, NULL);
g_return_val_if_fail (regex->re != NULL, NULL);
wrapper.callback = eval;
wrapper.user_data = user_data;
return g_regex_replace_eval (regex->re,
string,
string_len,
start_position,
match_options,
wrapper_eval,
&wrapper,
error);
}
gboolean
impl_regex_match_full (const ImplRegex *regex,
const char *string,
gssize string_len,
int start_position,
GRegexMatchFlags match_options,
ImplMatchInfo **match_info,
GError **error)
{
GMatchInfo *wrapped = NULL;
gboolean ret;
g_return_val_if_fail (regex != NULL, FALSE);
g_return_val_if_fail (regex->re != NULL, FALSE);
ret = g_regex_match_full (regex->re,
string,
string_len,
start_position,
match_options,
&wrapped,
error);
if (match_info != NULL)
{
*match_info = g_slice_new0 (ImplMatchInfo);
(*match_info)->match_info = wrapped;
}
else
{
g_match_info_free (wrapped);
}
return ret;
}
gboolean
impl_match_info_fetch_pos (const ImplMatchInfo *match_info,
int match_num,
int *start_pos,
int *end_pos)
{
g_return_val_if_fail (match_info != NULL, FALSE);
g_return_val_if_fail (match_info->match_info != NULL, FALSE);
return g_match_info_fetch_pos (match_info->match_info, match_num, start_pos, end_pos);
}
gboolean
impl_match_info_fetch_named_pos (const ImplMatchInfo *match_info,
const char *name,
int *start_pos,
int *end_pos)
{
g_return_val_if_fail (match_info != NULL, FALSE);
g_return_val_if_fail (match_info->match_info != NULL, FALSE);
return g_match_info_fetch_named_pos (match_info->match_info, name, start_pos, end_pos);
}
......@@ -118,6 +118,7 @@ core_private_c = files([
'gtksourcesnippetbundle.c',
'gtksourcesnippetbundle-parser.c',
'gtksourceview-snippets.c',
'implregex.c',
])
core_c_args = [
......@@ -135,6 +136,7 @@ core_deps = [
libxml_dep,
pangoft2_dep,
fontconfig_dep,
pcre2_dep,
]
if profiler_enabled
......
......@@ -77,6 +77,7 @@ libxml_req = '>= 2.6'
introspection_req = '>= 1.42.0'
gtk_doc_req = '>= 1.25'
fribidi_req = '>= 0.19.7'
pcre2_req = '>= 10.21'
glib_dep = dependency('glib-2.0', version: glib_req)
gobject_dep = dependency('gobject-2.0', version: glib_req)
......@@ -86,6 +87,7 @@ libxml_dep = dependency('libxml-2.0', version: libxml_req, required: cc.get_id()
fribidi_dep = dependency('fribidi', version: fribidi_req)
fontconfig_dep = dependency('fontconfig', required: false)
pangoft2_dep = dependency('pangoft2', required: false)
pcre2_dep = dependency('libpcre2-8', version: pcre2_req)
gtk_quartz_dep = dependency('gtk4-quartz', version: gtk_doc_req, required: false)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment