database_history.py 21.1 KB
Newer Older
Cédric Bellegarde's avatar
Cédric Bellegarde committed
1
# Copyright (c) 2017-2019 Cedric Bellegarde <cedric.bellegarde@adishatz.org>
Cédric Bellegarde's avatar
Cédric Bellegarde committed
2 3 4 5 6 7 8 9 10 11 12
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

13
from gi.repository import GLib
Cédric Bellegarde's avatar
Cédric Bellegarde committed
14 15

import sqlite3
Cédric Bellegarde's avatar
Cédric Bellegarde committed
16
import itertools
17
from urllib.parse import urlparse
Cédric Bellegarde's avatar
Cédric Bellegarde committed
18
from threading import Lock
Cédric Bellegarde's avatar
Cédric Bellegarde committed
19
from re import sub
Cédric Bellegarde's avatar
Cédric Bellegarde committed
20

Cédric Bellegarde's avatar
Cédric Bellegarde committed
21
from eolie.utils import noaccents, get_random_string
22
from eolie.define import EOLIE_DATA_PATH, Type
Cédric Bellegarde's avatar
Cédric Bellegarde committed
23 24
from eolie.localized import LocalizedCollation
from eolie.sqlcursor import SqlCursor
25
from eolie.logger import Logger
26
from eolie.database_upgrade import DatabaseUpgrade
Cédric Bellegarde's avatar
Cédric Bellegarde committed
27 28 29 30 31 32


class DatabaseHistory:
    """
        Eolie history db
    """
33
    DB_PATH = "%s/history.db" % EOLIE_DATA_PATH
Cédric Bellegarde's avatar
Cédric Bellegarde committed
34

35
    __UPGRADES = {
36
        1: "ALTER TABLE history ADD opened INT NOT NULL DEFAULT 0",
Cédric Bellegarde's avatar
Cédric Bellegarde committed
37 38 39 40
        2: "ALTER TABLE history ADD netloc TEXT NOT NULL DEFAULT ''",
        3: "DELETE FROM history WHERE popularity=0",
        4: "DELETE FROM history_atime WHERE NOT EXISTS (SELECT * FROM history\
            WHERE history.rowid=history_atime.history_id)"
41 42
    }

Cédric Bellegarde's avatar
Cédric Bellegarde committed
43 44 45 46 47 48 49 50 51
    # SQLite documentation:
    # In SQLite, a column with type INTEGER PRIMARY KEY
    # is an alias for the ROWID.
    # Here, we define an id INT PRIMARY KEY but never feed it,
    # this make VACUUM not destroy rowids...
    __create_history = '''CREATE TABLE history (
                                               id INTEGER PRIMARY KEY,
                                               title TEXT NOT NULL,
                                               uri TEXT NOT NULL,
52
                                               netloc TEXT NOT NULL,
Cédric Bellegarde's avatar
Cédric Bellegarde committed
53
                                               guid TEXT NOT NULL,
54
                                               mtime REAL NOT NULL,
55
                                               opened INT NOT NULL DEFAULT 0,
Cédric Bellegarde's avatar
Cédric Bellegarde committed
56 57
                                               popularity INT NOT NULL
                                               )'''
58 59 60 61
    __create_history_atime = '''CREATE TABLE history_atime (
                                                history_id INT NOT NULL,
                                                atime REAL NOT NULL
                                               )'''
Cédric Bellegarde's avatar
Cédric Bellegarde committed
62

63 64 65 66 67 68 69
    __create_history_orderby_idx = """CREATE INDEX
                                               idx_orderby ON history(
                                               mtime, popularity)"""
    __create_history_where_idx = """CREATE INDEX
                                               idx_where ON history(
                                               uri, title)"""

Cédric Bellegarde's avatar
Cédric Bellegarde committed
70 71 72 73
    def __init__(self):
        """
            Create database tables or manage update if needed
        """
74
        upgrade = DatabaseUpgrade(Type.HISTORY)
Cédric Bellegarde's avatar
Cédric Bellegarde committed
75
        self.thread_lock = Lock()
76
        if not GLib.file_test(self.DB_PATH, GLib.FileTest.IS_REGULAR):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
77
            try:
78 79
                if not GLib.file_test(EOLIE_DATA_PATH, GLib.FileTest.IS_DIR):
                    GLib.mkdir_with_parents(EOLIE_DATA_PATH, 0o0750)
Cédric Bellegarde's avatar
Cédric Bellegarde committed
80 81 82
                # Create db schema
                with SqlCursor(self) as sql:
                    sql.execute(self.__create_history)
83
                    sql.execute(self.__create_history_atime)
84 85
                    sql.execute(self.__create_history_orderby_idx)
                    sql.execute(self.__create_history_where_idx)
86
                    sql.execute("PRAGMA user_version=%s" % upgrade.version)
Cédric Bellegarde's avatar
Cédric Bellegarde committed
87
            except Exception as e:
88
                Logger.error("DatabaseHistory::__init__(): %s", e)
89 90
        else:
            upgrade.upgrade(self)
Cédric Bellegarde's avatar
Cédric Bellegarde committed
91

92
    def add(self, title, uri, mtime, guid=None, atimes=[]):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
93 94 95 96
        """
            Add a new entry to history, if exists, update it
            @param title as str
            @param uri as str
Cédric Bellegarde's avatar
Cédric Bellegarde committed
97
            @param mtime as int
98 99
            @parma guid as str
            @param atime as [int]
100
            @return history id as int
Cédric Bellegarde's avatar
Cédric Bellegarde committed
101 102 103
        """
        if not uri:
            return
104
        uri = uri.rstrip('/')
105
        parsed = urlparse(uri)
106
        with SqlCursor(self) as sql:
107
            result = sql.execute("SELECT rowid, popularity FROM history\
108
                                  WHERE uri=?", (uri,))
Cédric Bellegarde's avatar
Cédric Bellegarde committed
109
            v = result.fetchone()
110
            # Update current item
Cédric Bellegarde's avatar
Cédric Bellegarde committed
111
            if v is not None:
112
                history_id = v[0]
113
                guid = self.get_guid(history_id)
114
                sql.execute("UPDATE history\
115
                             SET uri=?, netloc=?, mtime=?,\
116
                                 title=?, guid=?, popularity=?\
117
                             WHERE rowid=?", (uri, parsed.netloc, mtime, title,
Cédric Bellegarde's avatar
Cédric Bellegarde committed
118
                                              guid, v[1] + 1, history_id))
119
            # Add a new item
Cédric Bellegarde's avatar
Cédric Bellegarde committed
120
            else:
121 122 123 124 125
                # Find an uniq guid
                while guid is None:
                    guid = get_random_string(12)
                    if self.exists_guid(guid):
                        guid = None
126
                result = sql.execute("INSERT INTO history\
127 128 129 130 131
                                      (title, uri, netloc,\
                                       mtime, popularity, guid)\
                                      VALUES (?, ?, ?, ?, ?, ?)",
                                     (title, uri, parsed.netloc,
                                      mtime, 0, guid))
132 133 134
                history_id = result.lastrowid
            # Only add new atimes to db
            if not atimes:
135
                atimes = [mtime]
136 137 138 139 140 141
            current_atimes = self.get_atimes(history_id)
            for atime in atimes:
                if atime not in current_atimes:
                    sql.execute("INSERT INTO history_atime\
                                 (history_id, atime)\
                                 VALUES (?, ?)", (history_id, atime))
142
            return history_id
Cédric Bellegarde's avatar
Cédric Bellegarde committed
143

144 145 146 147 148 149 150 151
    def remove(self, history_id):
        """
            Remove item from history
            @param history id as int
        """
        with SqlCursor(self) as sql:
            sql.execute("DELETE from history\
                         WHERE rowid=?", (history_id,))
152 153
            sql.execute("DELETE from history_atime\
                         WHERE history_id=?", (history_id,))
154

155
    def clear_from(self, atime):
156
        """
Cédric Bellegarde's avatar
Cédric Bellegarde committed
157
            Clear history from atime
158
            @param atime as int
159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
        """
        with SqlCursor(self) as sql:
            sql.execute("DELETE FROM history_atime\
                         WHERE atime >= ?", (atime,))

    def clear_to(self, atime):
        """
            Clear history to atime
            @param atime as int
        """
        with SqlCursor(self) as sql:
            sql.execute("DELETE FROM history_atime\
                         WHERE atime <= ?", (atime,))

    def get_from_atime(self, atime):
        """
            Get history ids from atime
            @param atime as int
177
            @return modified history ids as [int]
178 179
        """
        with SqlCursor(self) as sql:
180 181 182
            result = sql.execute("SELECT DISTINCT history.rowid\
                                  FROM history, history_atime\
                                  WHERE history_atime.history_id=history.rowid\
Cédric Bellegarde's avatar
Cédric Bellegarde committed
183
                                  AND atime >= ?", (atime,))
184
            return list(itertools.chain(*result))
185 186 187 188 189 190 191 192 193 194 195 196

    def get_empties(self):
        """
            Get empties history entries (without atime)
            @return history ids as [int]
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT history.rowid FROM history\
                                  WHERE NOT EXISTS (\
                                    SELECT rowid FROM history_atime AS ha\
                                    WHERE ha.history_id=history.rowid)")
            return list(itertools.chain(*result))
197

198
    def get(self, atime):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
199
        """
200
            Get history for atime (current day)
201
            @param atime as int
Cédric Bellegarde's avatar
Cédric Bellegarde committed
202 203 204 205
            @return (str, str, int)
        """
        one_day = 86400
        with SqlCursor(self) as sql:
206 207 208 209
            result = sql.execute("SELECT history.rowid, title, uri, atime\
                                  FROM history, history_atime\
                                  WHERE history.rowid=history_atime.history_id\
                                  AND atime >= ? AND atime <= ?\
210 211
                                  ORDER BY atime DESC",
                                 (atime, atime + one_day))
Cédric Bellegarde's avatar
Cédric Bellegarde committed
212 213
            return list(result)

214
    def get_id(self, uri):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
215 216 217 218 219 220
        """
            Get history id
            @param uri as str
            @return history_id as int
        """
        with SqlCursor(self) as sql:
221
            uri = uri.rstrip('/')
Cédric Bellegarde's avatar
Cédric Bellegarde committed
222 223
            result = sql.execute("SELECT rowid\
                                  FROM history\
224 225
                                  WHERE uri=?",
                                 (uri,))
Cédric Bellegarde's avatar
Cédric Bellegarde committed
226 227 228 229 230
            v = result.fetchone()
            if v is not None:
                return v[0]
            return None

Cédric Bellegarde's avatar
Cédric Bellegarde committed
231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
    def get_title(self, history_id):
        """
            Get history title
            @param history_id as int
            @return title as str
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT title\
                                  FROM history\
                                  WHERE rowid=?", (history_id,))
            v = result.fetchone()
            if v is not None:
                return v[0]
            return ""

    def get_uri(self, history_id):
        """
            Get history uri
            @param history_id as int
            @return uri as str
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT uri\
                                  FROM history\
                                  WHERE rowid=?", (history_id,))
            v = result.fetchone()
            if v is not None:
                return v[0]
            return ""

    def get_guid(self, history_id):
        """
            Get history item guid
            @param history_id as int
            @return guid as str
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT guid\
                                  FROM history\
                                  WHERE rowid=?", (history_id,))
            v = result.fetchone()
            if v is not None:
                return v[0]
274
            return None
Cédric Bellegarde's avatar
Cédric Bellegarde committed
275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

    def get_mtime(self, history_id):
        """
            Get history mtime
            @param history_id as int
            @return mtime as int
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT mtime\
                                  FROM history\
                                  WHERE rowid=?", (history_id,))
            v = result.fetchone()
            if v is not None:
                return v[0]
            return 0

291
    def get_atimes(self, history_id):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
292
        """
293
            Get history access times
Cédric Bellegarde's avatar
Cédric Bellegarde committed
294
            @param history_id as int
295
            @return [int]
Cédric Bellegarde's avatar
Cédric Bellegarde committed
296 297 298
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT atime\
299 300 301
                                  FROM history_atime\
                                  WHERE history_id=?", (history_id,))
            return list(itertools.chain(*result))
Cédric Bellegarde's avatar
Cédric Bellegarde committed
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329

    def get_id_by_guid(self, guid):
        """
            Get id for guid
            @param guid as str
            @return id as int
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT rowid\
                                  FROM history\
                                  WHERE guid=?", (guid,))
            v = result.fetchone()
            if v is not None:
                return v[0]
            return None

    def get_ids_for_mtime(self, mtime):
        """
            Get ids that need to be synced related to mtime
            @param mtime as int
            @return [int]
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT rowid\
                                  FROM history\
                                  WHERE mtime > ?", (mtime,))
            return list(itertools.chain(*result))

330 331 332 333 334 335 336 337 338 339
    def get_match(self, uri):
        """
            Try to get best uri matching
            @parma uri as str
            @return str
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT uri\
                                  FROM history\
                                  WHERE uri like ?\
340
                                  ORDER BY popularity DESC,\
341
                                  length(uri) ASC\
342 343 344 345 346 347 348
                                  LIMIT 1",
                                 ("%" + uri + "%",))
            v = result.fetchone()
            if v is not None:
                return v[0]
            return None

349
    def set_title(self, history_id, title):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
350 351 352 353 354 355 356 357 358 359 360
        """
            Set history title
            @param history_id as int
            @param title as str
            @param commit as bool
        """
        with SqlCursor(self) as sql:
            sql.execute("UPDATE history\
                         SET title=?\
                         WHERE rowid=?", (title, history_id,))

361
    def get_populars(self, netloc, limit):
362 363
        """
            Get popular bookmarks
364
            @param netloc as str
365 366 367 368
            @param limit as bool
            @return [(id, title, uri)]
        """
        with SqlCursor(self) as sql:
369
            if netloc:
370 371
                # Hack: we return history.mtime as count because we know
                # it will not be used
372
                result = sql.execute("\
373 374 375 376 377
                                SELECT rowid,\
                                       uri,\
                                       uri,\
                                       title,\
                                       mtime\
378 379
                                FROM history\
                                WHERE netloc=?\
380
                                AND popularity!=0\
381 382
                                ORDER BY popularity DESC,\
                                mtime DESC\
383 384 385
                                LIMIT ?", (netloc, limit))
            else:
                result = sql.execute("\
386 387 388 389
                                SELECT rowid,\
                                       uri,\
                                       netloc,\
                                       netloc,\
390
                                       COUNT(uri)\
391
                                FROM history\
392 393 394
                                GROUP BY netloc\
                                ORDER BY MAX(popularity) DESC,\
                                mtime DESC\
395
                                LIMIT ?", (limit,))
396 397
            return list(result)

398 399 400 401 402 403
    def get_opened_pages(self):
        """
            Get page with opened state
            @return [(uri, title)]
        """
        with SqlCursor(self) as sql:
404 405 406 407 408 409 410 411 412
            try:
                result = sql.execute("SELECT uri, title\
                                      FROM history\
                                      WHERE opened=1")
                return list(result)
            finally:
                sql.execute("UPDATE history\
                             SET opened=0\
                             WHERE opened=1")
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433

    def set_page_state(self, uri, mtime=None):
        """
            Mark page with uri as opened if mtime is not None
            @param uri as str
            @param mtime as double
        """
        if uri is None:
            return
        uri = uri.rstrip('/')
        with SqlCursor(self) as sql:
            if mtime is None:
                sql.execute("UPDATE history\
                             SET opened=0\
                             WHERE uri=?\
                             AND opened=1", (uri,))
            else:
                sql.execute("UPDATE history\
                             SET opened=1 WHERE uri=?\
                             AND mtime=?", (uri, mtime))

434
    def set_atimes(self, history_id, atimes):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
435 436 437
        """
            Set history atime
            @param history_id as int
438
            @param atimes as [int]
Cédric Bellegarde's avatar
Cédric Bellegarde committed
439 440 441
            @param commit as bool
        """
        with SqlCursor(self) as sql:
442 443 444 445 446
            current_atimes = self.get_atimes(history_id)
            for atime in atimes:
                if atime not in current_atimes:
                    sql.execute("INSERT INTO history_atime (history_id, atime)\
                                 VALUES (?, ?)", (history_id, atime))
Cédric Bellegarde's avatar
Cédric Bellegarde committed
447

448
    def set_mtime(self, history_id, mtime):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
449 450 451
        """
            Set history mtime
            @param history_id as int
452
            @param mtime as int
Cédric Bellegarde's avatar
Cédric Bellegarde committed
453 454 455 456 457
        """
        with SqlCursor(self) as sql:
            sql.execute("UPDATE history\
                         SET mtime=? where rowid=?", (mtime, history_id))

458
    def search(self, search, limit):
Cédric Bellegarde's avatar
Cédric Bellegarde committed
459 460 461
        """
            Search string in db (uri and title)
            @param search as str
462
            @param limit as int
463
            @return [(id, title, uri, score)] as [(int, str, str, int)]
Cédric Bellegarde's avatar
Cédric Bellegarde committed
464
        """
Cédric Bellegarde's avatar
Cédric Bellegarde committed
465
        words = sub("[^\w]", " ", search.lower()).split()
466
        items = []
467
        with SqlCursor(self) as sql:
468 469 470 471
            filters = ()
            for word in words:
                filters += ("%" + word + "%", "%" + word + "%")
            filters += (limit,)
472
            request = "SELECT rowid, title, uri FROM history"
473 474
            if words:
                request += " WHERE"
475 476 477 478 479 480
                words_copy = list(words)
                while words_copy:
                    word = words_copy.pop(0)
                    request += " (title LIKE ? OR uri LIKE ?)"
                    if words_copy:
                        request += " AND "
481
            request += " ORDER BY mtime DESC, popularity DESC LIMIT ?"
482 483 484 485 486 487
            try:
                result = sql.execute(request, filters)
                items += list(result)
            except:
                Logger.error("DatabaseHistory::search(): %s -> %s",
                             (request, filters))
488 489

            # And then search containing one item
490
            request = "SELECT rowid, title, uri FROM history"
491 492
            if words:
                request += " WHERE"
493 494 495
                words_copy = list(words)
                while words_copy:
                    word = words_copy.pop(0)
496
                    request += " title LIKE ? OR uri LIKE ?"
497 498
                    if words_copy:
                        request += " OR "
499
            request += " ORDER BY mtime DESC, popularity DESC LIMIT ?"
500 501 502 503 504 505 506
            try:
                result = sql.execute(request, filters)
                items += list(result)
            except:
                Logger.error("DatabaseHistory::search(): %s -> %s",
                             (request, filters))

507 508 509 510
        # Do some scoring calculation on items
        scored_items = []
        uris = []
        for item in items:
511 512 513
            uri = item[2]
            if uri in uris:
                continue
514
            score = 0
515 516 517 518 519 520
            title = item[1].lower()
            parsed = urlparse(uri)
            if not parsed.path:
                score += 2
            elif not parsed.query:
                score += 2
521
            for word in words:
522
                lower_word = word.lower()
523 524
                # If netloc match word, +1
                if parsed.netloc.find(lower_word) != -1:
525
                    score += len(lower_word) * 2
526
                # URI match
527 528 529 530 531 532 533
                elif uri.find(lower_word) != -1:
                    score += len(lower_word)
                # Title match
                elif title.find(lower_word) != -1:
                    score += len(lower_word) * 2
            scored_items.append((item[0], item[1], item[2], score))
            uris.append(uri)
534
        return scored_items
Cédric Bellegarde's avatar
Cédric Bellegarde committed
535

536 537 538 539 540 541 542 543 544 545 546 547 548 549
    def reset_popularity(self, uri):
        """
            Reset popularity for uri
            @param uri as str
        """
        with SqlCursor(self) as sql:
            parsed = urlparse(uri)
            if parsed.scheme:
                sql.execute("UPDATE history SET popularity=0 WHERE uri=?",
                            (uri,))
            else:
                sql.execute("UPDATE history SET popularity=0 WHERE netloc=?",
                            (uri,))

Cédric Bellegarde's avatar
Cédric Bellegarde committed
550 551 552 553 554 555 556 557 558 559 560
    def exists_guid(self, guid):
        """
            Check if guid exists in db
            @return bool
        """
        with SqlCursor(self) as sql:
            result = sql.execute("SELECT guid FROM history\
                                  WHERE guid=?", (guid,))
            v = result.fetchone()
            return v is not None

Cédric Bellegarde's avatar
Cédric Bellegarde committed
561 562 563 564 565 566 567 568 569
    def get_cursor(self):
        """
            Return a new sqlite cursor
        """
        try:
            c = sqlite3.connect(self.DB_PATH, 600.0)
            c.create_collation('LOCALIZED', LocalizedCollation())
            c.create_function("noaccents", 1, noaccents)
            return c
570 571
        except Exception as e:
            Logger.error("DatabaseHistory::get_cursor(): %s", e)
Cédric Bellegarde's avatar
Cédric Bellegarde committed
572 573 574 575 576
            exit(-1)

#######################
# PRIVATE             #
#######################