Commit d2d6415c authored by Nelson Ben's avatar Nelson Ben 👣 Committed by Germán Poo-Caamaño

Add support for text search across lines

Implemented in poppler MR:
https://gitlab.freedesktop.org/poppler/poppler/merge_requests/267

as the new poppler result data type is PopplerFindRectangle which
incorporates more fields than just coordinates, we create a
corresponding type in Evince, called EvFindRectangle, and use it
all across Evince including the pdf backend (ev-poppler.cc) and
djvu backend (djvu-document.c) which are the only backends that
implement text search interface.

This new feature has the following aspects:

 - Ignores hyphen character while matching when 1) it's the
   last character of the line and 2) its corresponding matching
   character in the search term is not an hyphen too.

 - Any whitespace characters in the search term will be allowed
   to match on the logic position where the lines split (i.e. what
   would normally be the newline character in a text file, but
   PDF text does not include newline characters between lines).

 - It won't match on text spanning more than two lines, i.e. it
   only matches text spanning from end of one line to start of
   next line.

Part of issue #333
parent 5e6ff7ab
Pipeline #133074 failed with stages
in 4 minutes and 10 seconds
......@@ -898,6 +898,15 @@ djvu_document_find_find_text (EvDocumentFind *document,
r->y1 = height - r->y2 * 72.0 / dpi;
r->y2 = height - tmp * 72.0 / dpi;
EvFindRectangle *ev_rect = ev_find_rectangle_new ();
ev_rect->x1 = r->x1;
ev_rect->x2 = r->x2;
ev_rect->y1 = r->y1;
ev_rect->y2 = r->y2;
ev_rectangle_free (r);
l->data = ev_rect;
}
......
......@@ -157,6 +157,11 @@ static gboolean attachment_save_to_buffer (PopplerAttachment *attachment,
gchar **buffer,
gsize *buffer_size,
GError **error);
static GList *pdf_document_find_find_text_with_options_real (EvDocumentFind *document_find,
EvPage *page,
const gchar *text,
EvFindOptions options,
gboolean returnFindRects);
EV_BACKEND_REGISTER_WITH_CODE (PdfDocument, pdf_document,
{
......@@ -1950,6 +1955,7 @@ pdf_document_find_find_text_with_options (EvDocumentFind *document_find,
GList *matches, *l;
PopplerPage *poppler_page;
gdouble height;
gboolean uses_new_api;
GList *retval = NULL;
guint find_flags = 0;
......@@ -1965,29 +1971,48 @@ pdf_document_find_find_text_with_options (EvDocumentFind *document_find,
to broaden our search in order to match on more expected results */
find_flags |= POPPLER_FIND_IGNORE_DIACRITICS;
#endif
if (options & EV_FIND_WHOLE_WORDS_ONLY)
find_flags |= POPPLER_FIND_WHOLE_WORDS_ONLY;
#if POPPLER_CHECK_VERSION(0, 77, 0)
/* Allow to match on text across lines */
find_flags |= POPPLER_FIND_ACROSS_LINES;
matches = poppler_page_find_text_with_options2 (poppler_page, text, (PopplerFindFlags)find_flags);
uses_new_api = TRUE;
#else
matches = poppler_page_find_text_with_options (poppler_page, text, (PopplerFindFlags)find_flags);
uses_new_api = FALSE;
#endif
if (!matches)
return NULL;
poppler_page_get_size (poppler_page, NULL, &height);
for (l = matches; l && l->data; l = g_list_next (l)) {
PopplerRectangle *rect = (PopplerRectangle *)l->data;
EvRectangle *ev_rect;
ev_rect = ev_rectangle_new ();
ev_rect->x1 = rect->x1;
ev_rect->x2 = rect->x2;
/* Invert this for X-style coordinates */
ev_rect->y1 = height - rect->y2;
ev_rect->y2 = height - rect->y1;
EvFindRectangle *ev_rect = ev_find_rectangle_new ();
if (uses_new_api) {
PopplerFindRectangle *rect = (PopplerFindRectangle *)l->data;
ev_rect->x1 = rect->x1;
ev_rect->x2 = rect->x2;
ev_rect->y1 = height - rect->y2;
ev_rect->y2 = height - rect->y1;
ev_rect->next_line = rect->next_line;
ev_rect->after_hyphen = rect->after_hyphen;
} else {
PopplerRectangle *rect = (PopplerRectangle *)l->data;
ev_rect->x1 = rect->x1;
ev_rect->x2 = rect->x2;
/* Invert this for X-style coordinates */
ev_rect->y1 = height - rect->y2;
ev_rect->y2 = height - rect->y1;
ev_rect->next_line = FALSE;
ev_rect->after_hyphen = FALSE;
}
retval = g_list_prepend (retval, ev_rect);
}
g_list_foreach (matches, (GFunc)poppler_rectangle_free, NULL);
g_list_free (matches);
g_list_free_full (matches, (uses_new_api ? (GDestroyNotify) poppler_find_rectangle_free
: (GDestroyNotify) poppler_rectangle_free));
return g_list_reverse (retval);
}
......
......@@ -36,7 +36,7 @@ ev_document_find_default_init (EvDocumentFindInterface *klass)
* @text: text to find
* @case_sensitive: whether to match the string case
*
* Returns: (transfer full) (element-type EvRectangle): a list of results
* Returns: (transfer full) (element-type EvFindRectangle): a list of results
*/
GList *
ev_document_find_find_text (EvDocumentFind *document_find,
......@@ -56,7 +56,7 @@ ev_document_find_find_text (EvDocumentFind *document_find,
* @text: text to find
* @options: a set of #EvFindOptions
*
* Returns: (transfer full) (element-type EvRectangle): a list of results
* Returns: (transfer full) (element-type EvFindRectangle): a list of results
*/
GList *
ev_document_find_find_text_with_options (EvDocumentFind *document_find,
......@@ -72,6 +72,28 @@ ev_document_find_find_text_with_options (EvDocumentFind *document_find,
return ev_document_find_find_text (document_find, page, text, options & EV_FIND_CASE_SENSITIVE);
}
/* EvFindRectangle */
G_DEFINE_BOXED_TYPE (EvFindRectangle, ev_find_rectangle, ev_find_rectangle_copy, ev_find_rectangle_free)
EvFindRectangle *
ev_find_rectangle_new (void)
{
return g_slice_new0 (EvFindRectangle);
}
EvFindRectangle *
ev_find_rectangle_copy (EvFindRectangle *rectangle)
{
g_return_val_if_fail (rectangle != NULL, NULL);
return g_slice_dup (EvFindRectangle, rectangle);
}
void
ev_find_rectangle_free (EvFindRectangle *rectangle)
{
g_slice_free (EvFindRectangle, rectangle);
}
EvFindOptions
ev_document_find_get_supported_options (EvDocumentFind *document_find)
{
......
......@@ -42,6 +42,23 @@ G_BEGIN_DECLS
typedef struct _EvDocumentFind EvDocumentFind;
typedef struct _EvDocumentFindInterface EvDocumentFindInterface;
typedef struct _EvFindRectangle EvFindRectangle;
#define EV_TYPE_FIND_RECTANGLE (ev_find_rectangle_get_type ())
struct _EvFindRectangle
{
gdouble x1;
gdouble y1;
gdouble x2;
gdouble y2;
gboolean next_line;
gboolean after_hyphen;
};
GType ev_find_rectangle_get_type (void) G_GNUC_CONST;
EvFindRectangle *ev_find_rectangle_new (void);
EvFindRectangle *ev_find_rectangle_copy (EvFindRectangle *ev_find_rect);
void ev_find_rectangle_free (EvFindRectangle *ev_find_rect);
typedef enum {
EV_FIND_DEFAULT = 0,
......
......@@ -1640,8 +1640,7 @@ ev_job_find_dispose (GObject *object)
gint i;
for (i = 0; i < job->n_pages; i++) {
g_list_foreach (job->pages[i], (GFunc)ev_rectangle_free, NULL);
g_list_free (job->pages[i]);
g_list_free_full (job->pages[i], (GDestroyNotify)ev_find_rectangle_free);
}
g_free (job->pages);
......@@ -1779,6 +1778,33 @@ ev_job_find_get_n_results (EvJobFind *job,
return g_list_length (job->pages[page]);
}
/**
* ev_job_find_get_n_main_results:
* @job: an #EvJobFind job
* @page: number of the page we want to count its match results.
*
* This is similar to ev_job_find_get_n_results() but it does not
* count the results where <next_line> field is TRUE, i.e. the
* results that mark the next-line part of an across-line match.
*
* Returns: total number of match results
* (i.e. results which are not a next-line part) in @page
*/
gint
ev_job_find_get_n_main_results (EvJobFind *job,
gint page)
{
GList *l;
int n = 0;
for (l = job->pages[page]; l; l = l->next) {
if ( !((EvFindRectangle *) l->data)->next_line )
n++;
}
return n;
}
gdouble
ev_job_find_get_progress (EvJobFind *job)
{
......@@ -1808,7 +1834,7 @@ ev_job_find_has_results (EvJobFind *job)
* ev_job_find_get_results: (skip)
* @job: an #EvJobFind
*
* Returns: a #GList of #GList<!-- -->s containing #EvRectangle<!-- -->s
* Returns: a #GList of #GList<!-- -->s containing #EvFindRectangle<!-- -->s
*/
GList **
ev_job_find_get_results (EvJobFind *job)
......
......@@ -604,6 +604,8 @@ EvJob *ev_job_find_new (EvDocument *document,
void ev_job_find_set_options (EvJobFind *job,
EvFindOptions options);
EvFindOptions ev_job_find_get_options (EvJobFind *job);
gint ev_job_find_get_n_main_results (EvJobFind *job,
gint pages);
gint ev_job_find_get_n_results (EvJobFind *job,
gint pages);
gdouble ev_job_find_get_progress (EvJobFind *job);
......
......@@ -141,9 +141,11 @@ struct _EvView {
/* Find */
EvJobFind *find_job;
GList **find_pages; /* Backwards compatibility */
gint find_page;
gint find_result;
GList **find_pages; /* Backwards compatibility. Contains EvFindRectangles's elements per page */
gint find_page; /* Page of current find result */
gint find_result; /* Index of current find result on find_pages[find_page]. For matches across
* two lines (which comprise two EvFindRectangle's), this will always point
* to the first one, i.e. the one where rect->next_line is FALSE */
gboolean jump_to_find_result;
gboolean highlight_find_results;
......
......@@ -288,7 +288,7 @@ static void ev_view_handle_cursor_over_xy (EvView *view,
/*** Find ***/
static gint ev_view_find_get_n_results (EvView *view,
gint page);
static EvRectangle *ev_view_find_get_result (EvView *view,
static EvFindRectangle *ev_view_find_get_result (EvView *view,
gint page,
gint result);
static void jump_to_find_result (EvView *view);
......@@ -6936,25 +6936,33 @@ highlight_find_results (EvView *view,
cairo_t *cr,
int page)
{
EvRectangle *rectangle;
gint i, n_results = 0;
n_results = ev_view_find_get_n_results (view, page);
rectangle = ev_rectangle_new ();
for (i = 0; i < n_results; i++) {
EvRectangle *rectangle;
EvFindRectangle *find_rect;
GdkRectangle view_rectangle;
gdouble alpha;
if (i == view->find_result && page == view->find_page) {
find_rect = ev_view_find_get_result (view, page, i);
if (page == view->find_page && (i == view->find_result ||
(find_rect->next_line && i == view->find_result + 1))) {
alpha = 0.6;
} else {
alpha = 0.3;
}
rectangle = ev_view_find_get_result (view, page, i);
rectangle->x1 = find_rect->x1;
rectangle->x2 = find_rect->x2;
rectangle->y1 = find_rect->y1;
rectangle->y2 = find_rect->y2;
_ev_view_transform_doc_rect_to_view_rect (view, page, rectangle, &view_rectangle);
draw_rubberband (view, cr, &view_rectangle, alpha);
}
ev_rectangle_free (rectangle);
}
static void
......@@ -9082,32 +9090,80 @@ ev_view_find_get_n_results (EvView *view, gint page)
return view->find_pages ? g_list_length (view->find_pages[page]) : 0;
}
static EvRectangle *
static EvFindRectangle *
ev_view_find_get_result (EvView *view, gint page, gint result)
{
return view->find_pages ? (EvRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
return view->find_pages ? (EvFindRectangle *) g_list_nth_data (view->find_pages[page], result) : NULL;
}
static EvFindRectangle *
ev_view_find_get_result_and_next (EvView *view, gint page, gint result, EvFindRectangle **next_rect)
{
GList *elem;
if (!view->find_pages)
return NULL;
elem = g_list_nth (view->find_pages[page], result);
if (!elem)
return NULL;
if (elem->next)
*next_rect = (EvFindRectangle *) elem->next->data;
return (EvFindRectangle *) elem->data;
}
static gboolean
ev_view_find_is_next_line (EvView *view, gint page, gint result)
{
if (!view->find_pages)
return FALSE;
GList *elem = g_list_nth (view->find_pages[page], result);
if (!elem)
return FALSE;
return ((EvFindRectangle *) elem->data)->next_line;
}
static void
jump_to_find_result (EvView *view)
{
EvRectangle *rect;
gint n_results;
gint page = view->find_page;
n_results = ev_view_find_get_n_results (view, page);
rect = ev_rectangle_new ();
if (n_results > 0 && view->find_result < n_results) {
EvRectangle *rect;
EvFindRectangle *find_rect, *rect_next;
GdkRectangle view_rect;
rect = ev_view_find_get_result (view, page, view->find_result);
rect_next = NULL;
find_rect = ev_view_find_get_result_and_next (view, page, view->find_result, &rect_next);
if (rect_next && rect_next->next_line) {
/* For an across-lines match, make sure both rectangles are visible */
rect->x1 = MIN (find_rect->x1, rect_next->x1);
rect->y1 = MIN (find_rect->y1, rect_next->y1);
rect->x2 = MAX (find_rect->x2, rect_next->x2);
rect->y2 = MAX (find_rect->y2, rect_next->y2);
} else {
rect->x1 = find_rect->x1;
rect->y1 = find_rect->y1;
rect->x2 = find_rect->x2;
rect->y2 = find_rect->y2;
}
_ev_view_transform_doc_rect_to_view_rect (view, page, rect, &view_rect);
_ev_view_ensure_rectangle_is_visible (view, &view_rect);
if (view->caret_enabled && view->rotation == 0)
position_caret_cursor_at_doc_point (view, page, rect->x1, rect->y1);
position_caret_cursor_at_doc_point (view, page, find_rect->x1, find_rect->y1);
view->jump_to_find_result = FALSE;
}
ev_rectangle_free (rect);
}
/**
......@@ -9233,7 +9289,8 @@ ev_view_find_next (EvView *view)
gint n_results;
n_results = ev_view_find_get_n_results (view, view->find_page);
view->find_result++;
view->find_result += ev_view_find_is_next_line (view, view->find_page, view->find_result + 1)
? 2 : 1;
if (view->find_result >= n_results) {
view->find_result = 0;
......@@ -9249,11 +9306,14 @@ ev_view_find_next (EvView *view)
void
ev_view_find_previous (EvView *view)
{
view->find_result--;
view->find_result -= ev_view_find_is_next_line (view, view->find_page, view->find_result - 1)
? 2 : 1;
if (view->find_result < 0) {
jump_to_find_page (view, EV_VIEW_FIND_PREV, -1);
view->find_result = MAX (0, ev_view_find_get_n_results (view, view->find_page) - 1);
if (view->find_result && ev_view_find_is_next_line (view, view->find_page, view->find_result))
view->find_result--; /* set to last "non-nextline" result */
} else if (view->find_page != view->current_page) {
jump_to_find_page (view, EV_VIEW_FIND_PREV, 0);
}
......
......@@ -268,7 +268,7 @@ ev_find_sidebar_highlight_first_match_of_page (EvFindSidebar *sidebar,
return;
for (i = 0; i < page; i++)
index += ev_job_find_get_n_results (priv->job, i);
index += ev_job_find_get_n_main_results (priv->job, i);
if (priv->highlighted_result)
gtk_tree_path_free (priv->highlighted_result);
......@@ -339,7 +339,9 @@ get_surrounding_text_markup (const gchar *text,
gboolean case_sensitive,
PangoLogAttr *log_attrs,
gint log_attrs_length,
gint offset)
gint offset,
gboolean has_nextline,
gboolean hyphen_was_ignored)
{
gint iter;
gchar *prec = NULL;
......@@ -356,7 +358,15 @@ get_surrounding_text_markup (const gchar *text,
iter = offset;
offset += g_utf8_strlen (find_text, -1);
if (!case_sensitive)
if (has_nextline || g_utf8_offset_to_pointer (text, offset-1)[0] == '\n') {
if (has_nextline) {
offset += 1; /* for newline */
if (hyphen_was_ignored)
offset += 1; /* for hyphen */
}
match = sanitized_substring (text, iter, offset);
} else if (!case_sensitive)
match = g_utf8_substring (text, iter, offset);
iter = MIN (log_attrs_length, offset + 1);
......@@ -409,7 +419,7 @@ get_page_text (EvDocument *document,
static gint
get_match_offset (EvRectangle *areas,
guint n_areas,
EvRectangle *match,
EvFindRectangle *match,
gint offset)
{
gdouble x, y;
......@@ -489,9 +499,13 @@ process_matches_idle (EvFindSidebar *sidebar)
offset = 0;
for (l = matches, result = 0; l; l = g_list_next (l), result++) {
EvRectangle *match = (EvRectangle *)l->data;
EvFindRectangle *match = (EvFindRectangle *)l->data;
gchar *markup;
GtkTreeIter iter;
gboolean has_nextline, hyphen_ignored;
if (match->next_line)
continue;
offset = get_match_offset (areas, n_areas, match, offset);
if (offset == -1) {
......@@ -508,12 +522,16 @@ process_matches_idle (EvFindSidebar *sidebar)
priv->insert_position++;
}
has_nextline = l->next && ((EvFindRectangle *)l->next->data)->next_line;
hyphen_ignored = l->next && ((EvFindRectangle *)l->next->data)->after_hyphen;
markup = get_surrounding_text_markup (page_text,
priv->job->text,
priv->job->case_sensitive,
text_log_attrs,
text_log_attrs_length,
offset);
offset,
has_nextline,
hyphen_ignored);
gtk_list_store_set (GTK_LIST_STORE (model), &iter,
TEXT_COLUMN, markup,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment