400-extractor-metadata.py 10.3 KB
Newer Older
Ivan Frade's avatar
Ivan Frade committed
1 2 3
#!/usr/bin/python
#
# Copyright (C) 2010, Nokia <ivan.frade@nokia.com>
4
# Copyright (C) 2018, Sam Thursfield <sam@afuera.me.uk>
Ivan Frade's avatar
Ivan Frade committed
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
# 02110-1301, USA.
#
"""
For a collection of files, call the extractor and check that the expected
metadata is extracted. Load dynamically the test information from a data
directory (containing xxx.expected files)
"""
26

Ivan Frade's avatar
Ivan Frade committed
27
from common.utils import configuration as cfg
28
from common.utils.extractor import get_tracker_extract_jsonld_output
29
import unittest as ut
30
import json
Ivan Frade's avatar
Ivan Frade committed
31 32 33 34
import os
import sys


35

Ivan Frade's avatar
Ivan Frade committed
36 37
class ExtractionTestCase (ut.TestCase):
    """
38
    Test checks if the tracker extractor is able to retrieve metadata
Ivan Frade's avatar
Ivan Frade committed
39 40 41 42 43 44 45
    """
    def __init__ (self, methodName='runTest', descfile=None):
        """
        Descfile is the description file in a relative path
        """
        ut.TestCase.__init__ (self, methodName)

46 47 48 49 50 51
        self.descfile = descfile
        try:
            with open(descfile) as f:
                self.spec = json.load(f)
        except ValueError as e:
            self.fail("Error loading %s: %s" % (descfile, e))
Ivan Frade's avatar
Ivan Frade committed
52 53

        # Add a method to the class called after the description file
54
        methodName = descfile.lower()[:-len(".expected")].replace (" ", "_")[-60:]
Ivan Frade's avatar
Ivan Frade committed
55

56
        if (self.spec['test'].get('ExpectedFailure', False)):
Ivan Frade's avatar
Ivan Frade committed
57
            setattr (self,
58 59
                    methodName,
                    self.expected_failure_test_extraction)
Ivan Frade's avatar
Ivan Frade committed
60
        else:
61
            setattr (self, methodName, self.generic_test_extraction)
Ivan Frade's avatar
Ivan Frade committed
62 63 64 65 66 67 68 69 70 71 72

        # unittest framework will run the test called "self._testMethodName"
        # So we set that variable to our new name
        self._testMethodName = methodName

    def runTest (self):
        """
        Empty function pointer, that should NEVER be called. It is required to exist by unittest.
        """
        assert False

73 74
    def __get_bugnumber (self):
        return self.spec['test'].get('Bugzilla')
75

76 77
    def generic_test_extraction (self):
        abs_description = os.path.abspath (self.descfile)
Ivan Frade's avatar
Ivan Frade committed
78

79 80
        # Filename contains the file to extract, in a relative path to the description file
        desc_root, desc_file = os.path.split (abs_description)
Ivan Frade's avatar
Ivan Frade committed
81

82 83
        filename_to_extract = self.spec['test']['Filename']
        self.file_to_extract = os.path.join (desc_root, filename_to_extract)
Ivan Frade's avatar
Ivan Frade committed
84

85 86
        result = get_tracker_extract_jsonld_output(self.file_to_extract)
        self.__assert_extraction_ok (result)
87

Ivan Frade's avatar
Ivan Frade committed
88 89 90 91 92 93 94 95 96 97 98 99
    def expected_failure_test_extraction (self):
        try:
            self.generic_test_extraction ()
        except Exception:
            raise ut.case._ExpectedFailure(sys.exc_info())

        if self.__get_bugnumber ():
            raise Exception ("Unexpected success. Maybe bug: " + self.__get_bugnumber () + " has been fixed?")
        else:
            raise Exception ("Unexpected success. Check " + self.rel_description)

    def assertDictHasKey (self, d, key, msg=None):
100 101
        if not isinstance(d, dict):
            self.fail ("Expected dict, got %s" % d)
Ivan Frade's avatar
Ivan Frade committed
102 103 104 105 106
        if not d.has_key (key):
            standardMsg = "Missing: %s\n" % (key)
            self.fail (self._formatMessage (msg, standardMsg))
        else:
            return
107 108 109 110 111

    def assertIsURN (self, supposed_uuid, msg=None):
        import uuid

        try:
112 113 114
            if (supposed_uuid.startswith ("<") and supposed_uuid.endswith (">")):
                supposed_uuid = supposed_uuid[1:-1]

115 116 117 118
            uuid.UUID (supposed_uuid)
        except ValueError:
            standardMsg = "'%s' is not a valid UUID" % (supposed_uuid)
            self.fail (self._formatMessage (msg, standardMsg))
119

Ivan Frade's avatar
Ivan Frade committed
120
    def __assert_extraction_ok (self, result):
121 122 123 124 125 126 127 128 129 130 131 132
        try:
            self.__check (self.spec['metadata'], result)
        except AssertionError as e:
            print("\ntracker-extract returned: %s" % json.dumps(result, indent=4))
            raise

    def __check (self, spec, result):
        error_missing_prop = "Property '%s' hasn't been extracted from file \n'%s'\n (requested on '%s')"
        error_wrong_value = "on property '%s' from file %s\n (requested on: '%s')"
        error_wrong_length = "Length mismatch on property '%s' from file %s\n (requested on: '%s')"
        error_extra_prop = "Property '%s' was explicitely banned for file \n'%s'\n (requested on '%s')"
        error_extra_prop_v = "Property '%s' with value '%s' was explicitely banned for file \n'%s'\n (requested on %s')"
Ivan Frade's avatar
Ivan Frade committed
133

134 135 136 137
        expected_pairs = [] # List of expected (key, value)
        unexpected_pairs = []  # List of unexpected (key, value)
        expected_keys = []  # List of expected keys (the key must be there, value doesnt matter)

138
        for k, v in spec.items():
139
            if k.startswith ("!"):
140 141 142
                unexpected_pairs.append ( (k[1:], v) )
            elif k == '@type':
                expected_keys.append ( '@type' )
143
            else:
144
                expected_pairs.append ( (k, v) )
145

Ivan Frade's avatar
Ivan Frade committed
146

147
        for prop, expected_value in expected_pairs:
Ivan Frade's avatar
Ivan Frade committed
148 149 150
            self.assertDictHasKey (result, prop,
                                   error_missing_prop % (prop,
                                                         self.file_to_extract,
151 152 153
                                                         self.descfile))
            if expected_value == "@URNUUID@":
                self.assertIsURN (result [prop][0]['@id'],
154 155
                                  error_wrong_value % (prop,
                                                       self.file_to_extract,
156
                                                       self.descfile))
157
            else:
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
                if isinstance(expected_value, list):
                    if not isinstance(result[prop], list):
                        raise AssertionError("Expected a list property for %s, but got a %s: %s" % (
                            prop, type(result[prop]).__name__, result[prop]))

                    self.assertEqual (len(expected_value), len(result[prop]),
                                      error_wrong_length % (prop,
                                                            self.file_to_extract,
                                                            self.descfile))

                    for i in range(0, len(expected_value)):
                        self.__check(spec[prop][i], result[prop][i])
                elif isinstance(expected_value, dict):
                    self.__check(expected_value, result[prop])
                else:
                    self.assertEqual (str(spec[prop]), str(result [prop]),
                                      error_wrong_value % (prop,
                                                           self.file_to_extract,
                                                           self.descfile))
Ivan Frade's avatar
Ivan Frade committed
177 178 179 180 181 182

        for (prop, value) in unexpected_pairs:
            # There is no prop, or it is but not with that value
            if (value == ""):
                self.assertFalse (result.has_key (prop), error_extra_prop % (prop,
                                                                             self.file_to_extract,
183
                                                                             self.descfile))
Ivan Frade's avatar
Ivan Frade committed
184
            else:
185 186 187
                if (value == "@URNUUID@"):
                    self.assertIsURN (result [prop][0], error_extra_prop % (prop,
                                                                            self.file_to_extract,
188
                                                                            self.descfile))
189 190 191 192
                else:
                    self.assertNotIn (value, result [prop], error_extra_prop_v % (prop,
                                                                                  value,
                                                                                  self.file_to_extract,
193
                                                                                  self.descfile))
Ivan Frade's avatar
Ivan Frade committed
194

195 196 197 198
        for prop in expected_keys:
             self.assertDictHasKey (result, prop,
                                    error_missing_prop % (prop,
                                                          self.file_to_extract,
199
                                                          self.descfile))
200

201

202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225
def run_all ():
    ##
    # Traverse the TEST_DATA_PATH directory looking for .description files
    # Add a new TestCase to the suite per .description file and run the suite.
    #
    # Is we do this inside a single TestCase an error in one test would stop the whole
    # testing.
    ##
    if (os.path.exists (os.getcwd() + "/test-extraction-data")):
        # Use local directory if available
        TEST_DATA_PATH = os.getcwd() + "/test-extraction-data"
    else:
        TEST_DATA_PATH = os.path.join (cfg.DATADIR, "tracker-tests",
                                       "test-extraction-data")
    print "Loading test descriptions from", TEST_DATA_PATH
    extractionTestSuite = ut.TestSuite ()
    for root, dirs, files in os.walk (TEST_DATA_PATH):
         descriptions = [os.path.join (root, f) for f in files if f.endswith ("expected")]
         for descfile in descriptions:
             tc = ExtractionTestCase(descfile=descfile)
             extractionTestSuite.addTest(tc)
    result = ut.TextTestRunner (verbosity=1).run (extractionTestSuite)
    sys.exit(not result.wasSuccessful())

226 227 228 229 230 231 232 233 234 235 236 237 238 239
def run_one (filename):
    ##
    # Run just one .description file
    ##
    description = os.path.join (os.getcwd (), filename) 

    extractionTestSuite = ut.TestSuite ()
    tc = ExtractionTestCase(descfile=description)
    extractionTestSuite.addTest(tc)

    result = ut.TextTestRunner (verbosity=2).run (extractionTestSuite)
    sys.exit(not result.wasSuccessful())


240 241 242 243 244 245 246 247 248
try:
    if len(sys.argv) == 2:
        run_one (sys.argv[1])
    elif len(sys.argv) == 1:
        run_all ()
    else:
        raise RuntimeError("Too many arguments.")
except RuntimeError as e:
    sys.stderr.write("ERROR: %s\n" % e)
249
    sys.exit(1)