Commit 45918ba0 authored by Sam Thursfield's avatar Sam Thursfield

tracker-extract-text: Try to read file even if n_bytes == 0

This makes a clear distinction between two cases:

  1. the user asked us not to read text files
  2. the user wants us to read a text file, but we can't

For a long time, our behaviour was to always insert a resource into
tracker-store for the text file, even if we failed to read it.
Since 2eda05bb we return an error code if we tried to read and
failed. But not when org.freedesktop.Tracker.Extract.max-bytes was set
to 0 as we wouldn't even try to read the file.

This commit changes the code to always read the file in order to check
it exists. There is a performance penalty for users who set max-bytes to
0, but it seems unlikely that someone would do that while still having
tracker-extract enabled for other types of file.

Suggested in
GNOME/tracker-miners!62 (comment 501732)
parent afb0d921
......@@ -46,8 +46,6 @@ get_file_content (GFile *file,
gchar *text, *uri, *path;
int fd;
g_return_val_if_fail (n_bytes > 0, NULL);
uri = g_file_get_uri (file);
/* Get filename from URI */
......@@ -82,19 +80,15 @@ tracker_extract_get_metadata (TrackerExtractInfo *info)
{
TrackerResource *metadata;
TrackerConfig *config;
gsize n_bytes;
gchar *content = NULL;
config = tracker_main_get_config ();
n_bytes = tracker_config_get_max_bytes (config);
if (n_bytes > 0) {
content = get_file_content (tracker_extract_info_get_file (info), n_bytes);
content = get_file_content (tracker_extract_info_get_file (info), tracker_config_get_max_bytes (config));
if (content == NULL) {
/* An error occurred, perhaps the file was deleted. */
return FALSE;
}
if (content == NULL) {
/* An error occurred, perhaps the file was deleted. */
return FALSE;
}
metadata = tracker_resource_new (NULL);
......
......@@ -93,7 +93,7 @@ process_chunk (const gchar *read_bytes,
gsize read_size,
gsize buffer_size,
gsize *remaining_size,
GString **s)
GString *s)
{
/* If no more bytes to read, halt loop */
if (read_size == 0) {
......@@ -114,7 +114,7 @@ process_chunk (const gchar *read_bytes,
* UTF-16LE), so we can't rely on methods which assume
* NUL-terminated strings, as g_strstr_len().
*/
if (*s == NULL) {
if (s->len == 0) {
if (read_size <= 3) {
g_debug (" File has less than 3 characters in it, "
"not indexing file");
......@@ -153,9 +153,7 @@ process_chunk (const gchar *read_bytes,
*remaining_size);
/* Append non-NIL terminated bytes */
*s = (*s ?
g_string_append_len (*s, read_bytes, read_size) :
g_string_new_len (read_bytes, read_size));
g_string_append_len (s, read_bytes, read_size);
return TRUE;
}
......@@ -305,7 +303,7 @@ tracker_read_text_from_stream (GInputStream *stream,
n_bytes_read,
BUFFER_SIZE,
&n_bytes_remaining,
&s)) {
s)) {
break;
}
}
......@@ -333,17 +331,17 @@ tracker_read_text_from_fd (gint fd,
gsize max_bytes)
{
FILE *fz;
GString *s = NULL;
GString *s;
gsize n_bytes_remaining = max_bytes;
g_return_val_if_fail (max_bytes > 0, NULL);
if ((fz = fdopen (fd, "r")) == NULL) {
g_warning ("Cannot read from FD... could not extract text");
close (fd);
return NULL;
}
s = g_string_new ("");
/* Reading in chunks of BUFFER_SIZE
* Loop is halted whenever one of this conditions is met:
* a) Read bytes reached the maximum allowed (max_bytes)
......@@ -367,7 +365,7 @@ tracker_read_text_from_fd (gint fd,
n_bytes_read,
BUFFER_SIZE,
&n_bytes_remaining,
&s)) {
s)) {
break;
}
}
......@@ -380,5 +378,5 @@ tracker_read_text_from_fd (gint fd,
fclose (fz);
/* Validate UTF-8 if something was read, and return it */
return s ? process_whole_string (s) : NULL;
return process_whole_string (s);
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment