Commit 975c05da authored by Sam Thursfield's avatar Sam Thursfield

Merge branch 'sam/extract-validation' into 'master'

functional-tests: Validate SPARQL as part of extractor tests

Closes tracker#196

See merge request !277
parents c428b7bc 0bf5c251
Pipeline #220907 failed with stages
in 4 minutes and 1 second
......@@ -169,20 +169,16 @@ process_desktop_file (TrackerResource *resource,
if (link_url) {
TrackerResource *website_resource;
gchar *link_uri;
g_strstrip (link_url);
link_uri = tracker_sparql_escape_uri_printf (LINK_URN_PREFIX "%s", link_url);
website_resource = tracker_resource_new (link_uri);
website_resource = tracker_resource_new (link_url);
tracker_resource_add_uri (website_resource, "rdf:type", "nie:DataObject");
tracker_resource_add_uri (website_resource, "rdf:type", "nfo:Website");
tracker_resource_set_string (website_resource, "nie:url", link_url);
tracker_resource_set_take_relation (resource, "nie:isStoredAs", website_resource);
tracker_resource_add_uri (resource, "rdf:type", "nfo:Bookmark");
tracker_resource_set_take_relation (resource, "nfo:bookmarks", website_resource);
g_free (link_url);
g_free (link_uri);
} else {
/* a Link desktop entry must have an URL */
gchar *uri;
......@@ -696,11 +696,21 @@ tracker_extract_get_metadata_by_cmdline (TrackerExtract *object,
if (resource) {
char *text;
g_autoptr (TrackerResource) file_resource = NULL;
/* If this was going into the tracker-store we'd generate a unique ID
* here, so that the data persisted across file renames.
/* Set up the corresponding nfo:FileDataObject resource appropriately,
* so the SPARQL we generate is valid according to Nepomuk.
tracker_resource_set_identifier (resource, uri);
file_resource = tracker_resource_get_first_relation (resource, "nie:isStoredAs");
if (file_resource) {
g_object_ref (file_resource);
} else {
file_resource = tracker_resource_new (uri);
tracker_resource_set_relation (resource, "nie:isStoredAs", file_resource);
tracker_resource_add_uri (file_resource, "rdf:type", "nfo:FileDataObject");
text = tracker_resource_print_sparql_update (resource, NULL, NULL);
......@@ -21,13 +21,12 @@
from gi.repository import GLib
import configparser
import errno
import json
import logging
import os
import shutil
import tempfile
import sys
if 'TRACKER_FUNCTIONAL_TEST_CONFIG' not in os.environ:
......@@ -67,6 +66,12 @@ def cli_subcommands_dir():
def nepomuk_path():
parser = configparser.ConfigParser()['TEST_DOMAIN_ONTOLOGY_RULE'])
return parser.get('DomainOntology', 'OntologyLocation')
# This path is used for test data for tests which expect filesystem monitoring
# to work. For this reason we must avoid it being on a tmpfs filesystem. Note
# that this MUST NOT be a hidden directory, as Tracker is hardcoded to ignore
......@@ -85,8 +85,8 @@ class FlacCuesheetTest(fixtures.TrackerExtractTestCase):
audio_path = pathlib.Path(tmpdir).joinpath('cuesheet-test.flac')
datagenerator.create_test_flac(audio_path, duration=6*60)
result = fixtures.get_tracker_extract_jsonld_output(
cfg.test_environment(tmpdir), audio_path)
result = fixtures.get_tracker_extract_output(
cfg.test_environment(tmpdir), audio_path, output_format='json-ld')
self.spec(audio_path), result, audio_path, __file__)
......@@ -23,6 +23,11 @@ metadata is extracted. Load dynamically the test information from a data
directory (containing xxx.expected files)
import gi
gi.require_version('Tracker', '3.0')
from gi.repository import Gio
from gi.repository import Tracker
import json
import os
import shutil
......@@ -64,6 +69,21 @@ class GenericExtractionTestCase(fixtures.TrackerExtractTestCase):
def __get_bugnumber(self):
return self.spec['test'].get('Bugzilla')
def validate_sparql_update(self, sparql):
"""Create a temporary database and run the given SPARQL update.
This gives us a smoke test to detect any situation where the
extractor generates invalid SPARQL.
cancellable = None
ontology_path = Gio.File.new_for_uri(cfg.nepomuk_path())
db =,
None, # create in-memory database,
db.update(sparql, cancellable)
def generic_test_extraction(self):
abs_description = os.path.abspath(self.descfile)
......@@ -76,8 +96,15 @@ class GenericExtractionTestCase(fixtures.TrackerExtractTestCase):
tmpdir = tempfile.mkdtemp(prefix='tracker-extract-test-')
extra_env = cfg.test_environment(tmpdir)
result = fixtures.get_tracker_extract_jsonld_output(extra_env, self.file_to_extract)
jsonld = fixtures.get_tracker_extract_output(extra_env,
sparql = fixtures.get_tracker_extract_output(extra_env,
shutil.rmtree(tmpdir, ignore_errors=True)
......@@ -245,13 +245,14 @@ class TrackerMinerFTSTest (TrackerMinerTest):
return int(result[0][0])
def get_tracker_extract_jsonld_output(extra_env, filename, mime_type=None):
def get_tracker_extract_output(extra_env, filename, output_format='json-ld', mime_type=None):
Runs `tracker-extract --file` to extract metadata from a file.
tracker_extract = os.path.join(cfg.TRACKER_EXTRACT_PATH)
command = [tracker_extract, '--output-format=json-ld', '--file', str(filename)]
command = [tracker_extract, '--output-format', output_format, '--file',
if mime_type is not None:
command.extend(['--mime', mime_type])
......@@ -285,19 +286,20 @@ def get_tracker_extract_jsonld_output(extra_env, filename, mime_type=None):
error_output = stderr.decode('utf-8').strip()
log.debug("Error output from tracker-extract:\n%s", error_output)
output = stdout.decode('utf-8')
if len(output.strip()) == 0:
raise RuntimeError("tracker-extract didn't return any data.\n"
"Error output was: %s" % error_output)
output = stdout.decode('utf-8')
data = json.loads(output)
except ValueError as e:
raise RuntimeError("tracker-extract did not return valid JSON data: %s\n"
"Output was: %s" % (e, output))
if len(output.strip()) == 0:
raise RuntimeError("tracker-extract didn't return any data.\n"
"Error output was: %s" % error_output)
return data
if output_format == 'json-ld':
return json.loads(output)
except ValueError as e:
raise RuntimeError("tracker-extract did not return valid JSON data: %s\n"
"Output was: %s" % (e, output))
return output
class TrackerExtractTestCase(ut.TestCase):
......@@ -326,7 +328,7 @@ class TrackerExtractTestCase(ut.TestCase):
Checks tracker-extract json-ld output against the expected result.
Use get_tracker_extract_jsonld_output() to get the extractor output.
Use get_tracker_extract_output() to get the extractor output.
Look in test-extraction-data/*/*.expected.json for examples of the spec
......@@ -4,8 +4,8 @@
"metadata": {
"nie:title" : "Wikipedia Tracker",
"nie:isStoredAs": {
"@id": "",
"nfo:bookmarks": {
"@id": "",
"@type": [
......@@ -40,7 +40,7 @@ class WritebackAudioTest(fixtures.TrackerWritebackTest):
self.wait_for_file_change(path, initial_mtime)
results = fixtures.get_tracker_extract_jsonld_output({}, path)
results = fixtures.get_tracker_extract_output({}, path, output_format='json-ld')
self.assertIn(TEST_VALUE, results[prop])
def test_writeback_mp3(self):
......@@ -85,7 +85,7 @@ class WritebackKeepDateTest (fixtures.TrackerWritebackTest):
self.wait_for_file_change(jpeg_path, initial_mtime)
# Check the value is written in the file
metadata = fixtures.get_tracker_extract_jsonld_output(self.extra_env, jpeg_path, "")
metadata = fixtures.get_tracker_extract_output(self.extra_env, jpeg_path, output_format='json-ld')
tags = metadata.get('nao:hasTag', [])
tag_names = [tag['nao:prefLabel'] for tag in tags]
......@@ -63,7 +63,7 @@ class WritebackImagesTest(fixtures.TrackerWritebackTest):
self.wait_for_file_change(path, initial_mtime)
log.debug("Got the change")
results = fixtures.get_tracker_extract_jsonld_output({}, path, mimetype)
results = fixtures.get_tracker_extract_output({}, path, mime_type=mimetype, output_format='json-ld')
keyDict = expectedKey or prop
self.assertIn(TEST_VALUE, results[keyDict])
......@@ -87,7 +87,7 @@ class WritebackImagesTest(fixtures.TrackerWritebackTest):
self.wait_for_file_change(path, initial_mtime)
results = fixtures.get_tracker_extract_jsonld_output(self.extra_env, filename, mimetype)
results = fixtures.get_tracker_extract_output(self.extra_env, filename, mime_type=mimetype, output_format='json-ld')
self.assertIn("testTag", results["nao:hasTag"])
# JPEG test
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment