helper_web_youtube.py 7.66 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
# Copyright (c) 2014-2019 Cedric Bellegarde <cedric.bellegarde@adishatz.org>
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.

from gi.repository import GLib

import json
from re import sub

from lollypop.define import App, GOOGLE_API_ID
from lollypop.utils import escape
from lollypop.logger import Logger


class YouTubeHelper:
    """
        YoutTube helper
    """

    __BAD_SCORE = 1000000

    def __init__(self):
        """
            Init heApper
        """
        self.__fallback = False

    def get_uri(self, track, cancellable):
        """
            Item youtube uri for web uri
            @param track as Track
            @return uri as str
            @param cancellable as Gio.Cancellable
        """
        yid = self.__get_youtube_id(track, cancellable)
        if yid is None:
            return ""
        else:
            return "https://www.youtube.com/watch?v=%s" % yid

    def get_uri_content(self, track):
        """
            Get content uri
            @param track as Track
            @return content uri as str/None
        """
        # Remove playlist args
        uri = sub("list=.*", "", track.uri)
57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
        argv_list = [
            ["youtube-dl", "-g", "-f", "bestaudio", uri, None],
            ["youtube-dl", "-g", uri, None]]
        for argv in argv_list:
            (s, o, e, s) = GLib.spawn_sync(None,
                                           argv,
                                           None,
                                           GLib.SpawnFlags.SEARCH_PATH,
                                           None)
            if o:
                return o.decode("utf-8")
        error = e.decode("utf-8")
        if App().notify is not None:
            App().notify.send(error)
        Logger.warning("YouTubeHelper::get_uri_content(): %s", error)
        return None
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156

#######################
# PRIVATE             #
#######################
    def __get_youtube_id(self, track, cancellable):
        """
            Get youtube id
            @param track as Track
            @param cancellable as Gio.Cancellable
            @return youtube id as str
        """
        if self.__fallback:
            return self.__get_youtube_id_fallback(track, cancellable)
        unescaped = "%s %s" % (track.artists[0],
                               track.name)
        search = GLib.uri_escape_string(
                            unescaped.replace(" ", "+"),
                            None,
                            True)
        key = App().settings.get_value("cs-api-key").get_string()
        try:
            uri = "https://www.googleapis.com/youtube/v3/" +\
                  "search?part=snippet&q=%s&" % search +\
                  "type=video&key=%s&cx=%s" % (key, GOOGLE_API_ID)
            (status, data) = App().task_helper.load_uri_content_sync(
                uri, cancellable)
            if status:
                decode = json.loads(data.decode("utf-8"))
                dic = {}
                best = self.__BAD_SCORE
                for i in decode["items"]:
                    score = self.__get_youtube_score(i["snippet"]["title"],
                                                     track.name,
                                                     track.artists[0],
                                                     track.album.name)
                    if score < best:
                        best = score
                    elif score == best:
                        continue  # Keep first result
                    dic[score] = i["id"]["videoId"]
                # Return url from first dic item
                if best == self.__BAD_SCORE:
                    return None
                else:
                    return dic[best]
        except Exception as e:
            Logger.warning("YouTubeHelper::__get_youtube_id(): %s", e)
            self.__fallback = True
            return self.__get_youtube_id_fallback(track, cancellable)
        return None

    def __get_youtube_score(self, page_title, title, artist, album):
        """
            Calculate youtube score
            if page_title looks like (title, artist, album), score is lower
            @return int
        """
        page_title = escape(page_title.lower(), [])
        artist = escape(artist.lower(), [])
        album = escape(album.lower(), [])
        title = escape(title.lower(), [])
        # YouTube page title should be at least as long as wanted title
        if len(page_title) < len(title):
            return self.__BAD_SCORE
        # Remove common word for a valid track
        page_title = page_title.replace("official", "")
        page_title = page_title.replace("video", "")
        page_title = page_title.replace("audio", "")
        # Remove artist name
        page_title = page_title.replace(artist, "")
        # Remove album name
        page_title = page_title.replace(album, "")
        # Remove title
        page_title = page_title.replace(title, "")
        return len(page_title)

    def __get_youtube_id_fallback(self, track, cancellable):
        """
            Get youtube id (fallback)
            @param track as Track
            @param cancellable as Gio.Cancellable
            @return youtube id as str
        """
        try:
157
            from beautifulsoup4 import BeautifulSoup
158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
        except:
            print("$ sudo pip3 install beautifulsoup4")
            return None
        try:
            unescaped = "%s %s" % (track.artists[0],
                                   track.name)
            search = GLib.uri_escape_string(
                            unescaped.replace(" ", "+"),
                            None,
                            True)
            uri = "https://www.youtube.com/results?search_query=%s" % search
            (status, data) = App().task_helper.load_uri_content_sync(
                uri, cancellable)
            if not status:
                return None

            html = data.decode("utf-8")
            soup = BeautifulSoup(html, "html.parser")
            ytems = []
            for link in soup.findAll("a"):
                href = link.get("href")
                title = link.get("title")
                if href is None or title is None:
                    continue
                if href.startswith("/watch?v="):
                    href = href.replace("/watch?v=", "")
                    ytems.append((href, title))
            dic = {}
            best = self.__BAD_SCORE
            for (yid, title) in ytems:
                score = self.__get_youtube_score(title,
                                                 track.name,
                                                 track.artists[0],
                                                 track.album.name)
                if score < best:
                    best = score
                elif score == best:
                    continue  # Keep first result
                dic[score] = yid
            # Return url from first dic item
            if best == self.__BAD_SCORE:
                return None
            else:
                return dic[best]
        except Exception as e:
            Logger.warning("YouTubeHelper::__get_youtube_id_fallback(): %s", e)
            self.__fallback = True
        return None