400-extractor-metadata.py 10.1 KB
Newer Older
1
#!/usr/bin/env python3
Ivan Frade's avatar
Ivan Frade committed
2 3
#
# Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
4
# Copyright (C) 2018, Sam Thursfield <sam@afuera.me.uk>
Ivan Frade's avatar
Ivan Frade committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
"""
For a collection of files, call the extractor and check that the expected
metadata is extracted. Load dynamically the test information from a data
directory (containing xxx.expected files)
"""
26

Ivan Frade's avatar
Ivan Frade committed
27
from common.utils import configuration as cfg
28
from common.utils.extractor import get_tracker_extract_jsonld_output
29
import unittest as ut
30
import json
Ivan Frade's avatar
Ivan Frade committed
31 32 33 34 35 36
import os
import sys


class ExtractionTestCase (ut.TestCase):
    """
37
    Test checks if the tracker extractor is able to retrieve metadata
Ivan Frade's avatar
Ivan Frade committed
38
    """
39 40

    def __init__(self, methodName='runTest', descfile=None):
Ivan Frade's avatar
Ivan Frade committed
41 42 43
        """
        Descfile is the description file in a relative path
        """
44
        ut.TestCase.__init__(self, methodName)
Ivan Frade's avatar
Ivan Frade committed
45

46 47 48 49 50 51
        self.descfile = descfile
        try:
            with open(descfile) as f:
                self.spec = json.load(f)
        except ValueError as e:
            self.fail("Error loading %s: %s" % (descfile, e))
Ivan Frade's avatar
Ivan Frade committed
52 53

        # Add a method to the class called after the description file
54
        methodName = descfile.lower()[:-len(".expected")].replace(" ", "_")[-60:]
Ivan Frade's avatar
Ivan Frade committed
55

56
        if (self.spec['test'].get('ExpectedFailure', False)):
57
            setattr(self,
58 59
                    methodName,
                    self.expected_failure_test_extraction)
Ivan Frade's avatar
Ivan Frade committed
60
        else:
61
            setattr(self, methodName, self.generic_test_extraction)
Ivan Frade's avatar
Ivan Frade committed
62 63 64 65 66

        # unittest framework will run the test called "self._testMethodName"
        # So we set that variable to our new name
        self._testMethodName = methodName

67
    def runTest(self):
Ivan Frade's avatar
Ivan Frade committed
68 69 70 71 72
        """
        Empty function pointer, that should NEVER be called. It is required to exist by unittest.
        """
        assert False

73
    def __get_bugnumber(self):
74
        return self.spec['test'].get('Bugzilla')
75

76 77
    def generic_test_extraction(self):
        abs_description = os.path.abspath(self.descfile)
Ivan Frade's avatar
Ivan Frade committed
78

79
        # Filename contains the file to extract, in a relative path to the description file
80
        desc_root, desc_file = os.path.split(abs_description)
Ivan Frade's avatar
Ivan Frade committed
81

82
        filename_to_extract = self.spec['test']['Filename']
83
        self.file_to_extract = os.path.join(desc_root, filename_to_extract)
Ivan Frade's avatar
Ivan Frade committed
84

85
        result = get_tracker_extract_jsonld_output(self.file_to_extract)
86
        self.__assert_extraction_ok(result)
87

88
    @ut.expectedFailure
89 90
    def expected_failure_test_extraction(self):
        self.generic_test_extraction()
Ivan Frade's avatar
Ivan Frade committed
91

92 93
        if self.__get_bugnumber():
            raise Exception("Unexpected success. Maybe bug: " + self.__get_bugnumber() + " has been fixed?")
Ivan Frade's avatar
Ivan Frade committed
94
        else:
95
            raise Exception("Unexpected success. Check " + self.rel_description)
Ivan Frade's avatar
Ivan Frade committed
96

97
    def assertDictHasKey(self, d, key, msg=None):
98
        if not isinstance(d, dict):
99
            self.fail("Expected dict, got %s" % d)
100
        if key not in d:
Ivan Frade's avatar
Ivan Frade committed
101
            standardMsg = "Missing: %s\n" % (key)
102
            self.fail(self._formatMessage(msg, standardMsg))
Ivan Frade's avatar
Ivan Frade committed
103 104
        else:
            return
105

106
    def assertIsURN(self, supposed_uuid, msg=None):
107 108 109
        import uuid

        try:
110
            if (supposed_uuid.startswith("<") and supposed_uuid.endswith(">")):
111 112
                supposed_uuid = supposed_uuid[1:-1]

113
            uuid.UUID(supposed_uuid)
114 115
        except ValueError:
            standardMsg = "'%s' is not a valid UUID" % (supposed_uuid)
116
            self.fail(self._formatMessage(msg, standardMsg))
117

118
    def __assert_extraction_ok(self, result):
119
        try:
120
            self.__check(self.spec['metadata'], result)
121 122 123 124
        except AssertionError as e:
            print("\ntracker-extract returned: %s" % json.dumps(result, indent=4))
            raise

125
    def __check(self, spec, result):
126 127 128 129 130
        error_missing_prop = "Property '%s' hasn't been extracted from file \n'%s'\n (requested on '%s')"
        error_wrong_value = "on property '%s' from file %s\n (requested on: '%s')"
        error_wrong_length = "Length mismatch on property '%s' from file %s\n (requested on: '%s')"
        error_extra_prop = "Property '%s' was explicitely banned for file \n'%s'\n (requested on '%s')"
        error_extra_prop_v = "Property '%s' with value '%s' was explicitely banned for file \n'%s'\n (requested on %s')"
Ivan Frade's avatar
Ivan Frade committed
131

132
        expected_pairs = []  # List of expected (key, value)
133 134 135
        unexpected_pairs = []  # List of unexpected (key, value)
        expected_keys = []  # List of expected keys (the key must be there, value doesnt matter)

136
        for k, v in list(spec.items()):
137 138
            if k.startswith("!"):
                unexpected_pairs.append((k[1:], v))
139
            elif k == '@type':
140
                expected_keys.append('@type')
141
            else:
142
                expected_pairs.append((k, v))
Ivan Frade's avatar
Ivan Frade committed
143

144
        for prop, expected_value in expected_pairs:
145 146 147 148
            self.assertDictHasKey(result, prop,
                                  error_missing_prop % (prop,
                                                        self.file_to_extract,
                                                        self.descfile))
149
            if expected_value == "@URNUUID@":
150 151 152 153
                self.assertIsURN(result[prop][0]['@id'],
                                 error_wrong_value % (prop,
                                                      self.file_to_extract,
                                                      self.descfile))
154
            else:
155 156 157 158 159
                if isinstance(expected_value, list):
                    if not isinstance(result[prop], list):
                        raise AssertionError("Expected a list property for %s, but got a %s: %s" % (
                            prop, type(result[prop]).__name__, result[prop]))

160 161 162 163
                    self.assertEqual(len(expected_value), len(result[prop]),
                                     error_wrong_length % (prop,
                                                           self.file_to_extract,
                                                           self.descfile))
164 165 166 167 168 169

                    for i in range(0, len(expected_value)):
                        self.__check(spec[prop][i], result[prop][i])
                elif isinstance(expected_value, dict):
                    self.__check(expected_value, result[prop])
                else:
170 171 172 173
                    self.assertEqual(str(spec[prop]), str(result[prop]),
                                     error_wrong_value % (prop,
                                                          self.file_to_extract,
                                                          self.descfile))
Ivan Frade's avatar
Ivan Frade committed
174 175 176 177

        for (prop, value) in unexpected_pairs:
            # There is no prop, or it is but not with that value
            if (value == ""):
178 179 180
                self.assertFalse(prop in result, error_extra_prop % (prop,
                                                                     self.file_to_extract,
                                                                     self.descfile))
Ivan Frade's avatar
Ivan Frade committed
181
            else:
182
                if (value == "@URNUUID@"):
183 184 185
                    self.assertIsURN(result[prop][0], error_extra_prop % (prop,
                                                                          self.file_to_extract,
                                                                          self.descfile))
186
                else:
187 188 189 190
                    self.assertNotIn(value, result[prop], error_extra_prop_v % (prop,
                                                                                value,
                                                                                self.file_to_extract,
                                                                                self.descfile))
Ivan Frade's avatar
Ivan Frade committed
191

192
        for prop in expected_keys:
193 194 195 196
            self.assertDictHasKey(result, prop,
                                  error_missing_prop % (prop,
                                                        self.file_to_extract,
                                                        self.descfile))
197

198

199
def run_all():
200 201 202 203 204 205 206
    ##
    # Traverse the TEST_DATA_PATH directory looking for .description files
    # Add a new TestCase to the suite per .description file and run the suite.
    #
    # Is we do this inside a single TestCase an error in one test would stop the whole
    # testing.
    ##
207
    if (os.path.exists(os.getcwd() + "/test-extraction-data")):
208 209 210
        # Use local directory if available
        TEST_DATA_PATH = os.getcwd() + "/test-extraction-data"
    else:
211 212
        TEST_DATA_PATH = os.path.join(cfg.DATADIR, "tracker-tests",
                                      "test-extraction-data")
213
    print("Loading test descriptions from", TEST_DATA_PATH)
214 215 216 217 218 219 220
    extractionTestSuite = ut.TestSuite()
    for root, dirs, files in os.walk(TEST_DATA_PATH):
        descriptions = [os.path.join(root, f) for f in files if f.endswith("expected")]
        for descfile in descriptions:
            tc = ExtractionTestCase(descfile=descfile)
            extractionTestSuite.addTest(tc)
    result = ut.TextTestRunner(verbosity=1).run(extractionTestSuite)
221 222
    sys.exit(not result.wasSuccessful())

223 224

def run_one(filename):
225 226 227
    ##
    # Run just one .description file
    ##
228
    description = os.path.join(os.getcwd(), filename)
229

230
    extractionTestSuite = ut.TestSuite()
231 232 233
    tc = ExtractionTestCase(descfile=description)
    extractionTestSuite.addTest(tc)

234
    result = ut.TextTestRunner(verbosity=2).run(extractionTestSuite)
235 236 237
    sys.exit(not result.wasSuccessful())


238 239
try:
    if len(sys.argv) == 2:
240
        run_one(sys.argv[1])
241
    elif len(sys.argv) == 1:
242
        run_all()
243 244 245 246
    else:
        raise RuntimeError("Too many arguments.")
except RuntimeError as e:
    sys.stderr.write("ERROR: %s\n" % e)
247
    sys.exit(1)