From d1b44392cc21502cfff01937b87c49306ff6198a Mon Sep 17 00:00:00 2001 From: Pekka Vuorela Date: Fri, 29 Oct 2021 11:05:45 +0300 Subject: [PATCH 1/2] Fix duplicate entries on files created and instantly modified On "touch newfile.jpg; cp oldfile.jpg newfile.jpg" the mime type was first detected as text/plain after the touch call and afterwards as proper type. With file added to two type specific graphs, the tracker extractor query for files without extractorHash listed the same file twice and the extracted content got also added twice. Side-effect, of course, is that empty files are no longer available outside tracker:FileSystem. Relates to https://gitlab.gnome.org/GNOME/tracker-miners/-/issues/200 --- src/miners/fs/tracker-miner-files.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/miners/fs/tracker-miner-files.c b/src/miners/fs/tracker-miner-files.c index dafb8dbf3..a7fed924c 100644 --- a/src/miners/fs/tracker-miner-files.c +++ b/src/miners/fs/tracker-miner-files.c @@ -2170,9 +2170,10 @@ miner_files_process_file (TrackerMinerFS *fs, graph = tracker_extract_module_manager_get_graph (mime_type); - if (graph) { + if (graph && g_file_info_get_size (file_info) > 0) { /* This mimetype will be extracted by some module, pre-fill the * nfo:FileDataObject in that graph. + * Empty files skipped as mime-type for those cannot be trusted. */ graph_file = tracker_resource_new (uri); tracker_resource_add_uri (graph_file, "rdf:type", "nfo:FileDataObject"); -- GitLab From 24a9bb294039375a18464ff0325e750e673b6b20 Mon Sep 17 00:00:00 2001 From: Pekka Vuorela Date: Fri, 29 Oct 2021 11:08:25 +0300 Subject: [PATCH 2/2] Avoid null pointer warning --- src/libtracker-miner/tracker-sparql-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/libtracker-miner/tracker-sparql-buffer.c b/src/libtracker-miner/tracker-sparql-buffer.c index b3ff07bd9..40634f2c1 100644 --- a/src/libtracker-miner/tracker-sparql-buffer.c +++ b/src/libtracker-miner/tracker-sparql-buffer.c @@ -438,7 +438,7 @@ tracker_sparql_buffer_get_state (TrackerSparqlBuffer *buffer, if (!tracker_task_pool_find (TRACKER_TASK_POOL (buffer), file)) return TRACKER_BUFFER_STATE_UNKNOWN; - if (g_hash_table_contains (priv->file_set, file)) + if (priv->file_set != NULL && g_hash_table_contains (priv->file_set, file)) return TRACKER_BUFFER_STATE_QUEUED; return TRACKER_BUFFER_STATE_FLUSHING; -- GitLab