Add thumbnail extraction, cover conversion

setup project structure
2022-04-15 22:52:50 +02:00 · 2022-04-12 17:05:47 +02:00
26 changed files with 676 additions and 1140 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,6 +5,7 @@ venv
 .tox
 __pycache__
 *.egg-info
+.pytest_cache

 # Jupyter
 .ipynb_checkpoints
@ -13,3 +14,6 @@ __pycache__
 *.webm
 *.mp4
 *.mp3
+
+# Application data
+/_run
--- a/deploy/docker-compose.yml
+++ b/deploy/docker-compose.yml
@ -0,0 +1,7 @@
+version: "3"
+services:
+  redis:
+    container_name: ucast-redis
+    image: redis:alpine
+    ports:
+      - "127.0.0.1:6379:6379"
--- a/notes/Coverbilder.md
+++ b/notes/Coverbilder.md
@ -0,0 +1,17 @@
+# Coverbilder
+
+Podcast-Cover sind quadratisch.
+
+- Durchschnittliche Farbe der oberen und unteren 20% des Bilds berechnen
+- Farbverlauf zwischen diesen Farben als Hintergrund verwenden
+- Das Thumbnail findet in der Mitte Platz
+- Im oberen Bereich wird das Profilbild und der Kanalname eingefügt
+- Im unteren Bereich wird der Videotitel eingefügt
+- Der Text ist entweder weiß oder schwarz, je nach dem welche Farbe den höchsten Kontrast hat.
+- Textgröße: 50px, max 2 Zeilen, Overflow mit ... abschneiden.
+
+### Verwendete Python-Libraries
+
+- Pillow
+- colorthief
+- wcag-contrast-ratio
--- a/notes/Speicher.md
+++ b/notes/Speicher.md
@ -0,0 +1,56 @@
+# Datenspeicherung
+
+## Verzeichnisstruktur
+
+```txt
+_ config
+  |_ config.toml
+_ data
+  |_ LinusTechTips
+    |_ .ucast
+      |_ videos.json  # IDs und Metadaten aller heruntergeladenen Videos
+      |_ options.json  # Kanalspezifische Optionen (ID, LastScan)
+      |_ avatar.png  # Profilbild des Kanals
+      |_ feed.xml  # RSS-Feed
+      |_ covers  # Cover-Bilder
+        |_ 220409_Building a _1_000_000 Computer.png
+        |_ 220410_Apple makes GREAT Gaming Computers.png
+    |_ 220409_Building a _1_000_000 Computer.mp3
+    |_ 220410_Apple makes GREAT Gaming Computers.mp3
+
+  |_ Andreas Spiess
+  |_ ...
+```
+
+## Datenmodelle
+
+### LastScan
+
+- LastScan: datetime
+
+### ChannelOptions
+
+- ID: str
+- Active: bool = True
+- LastScan: datetime
+- SkipLivestreams: bool = True
+- SkipShorts: bool = True
+- KeepVideos: int = -1
+
+### Videos
+
+- Videos: dict[id: str -> Video]
+
+### Video
+
+- Title: str
+- Slug: str (YYMMDD_Title, used as filename)
+- Published: datetime
+- Description: str
+
+### Config
+
+- RedisURL: str
+- ScanInterval: 1h
+- DefaultChannelOptions: ChannelOptions
+- AppriseUrl: str (für Benachrichtigungen, https://github.com/caronc/apprise/wiki)
--- a/notes/YouTubeDownloading.ipynb
+++ b/notes/YouTubeDownloading.ipynb
@ -2,7 +2,11 @@
 "cells": [
  {
   "cell_type": "markdown",
-   "metadata": {},
+   "metadata": {
+    "pycharm": {
+     "name": "#%% md\n"
+    }
+   },
   "source": [
    "### Get all videos of a channel"
   ]
@ -102,7 +106,7 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": 1,
   "outputs": [
    {
     "name": "stdout",
@ -111,7 +115,7 @@
      "Kanal-ID: UCGiJh0NZ52wRhYKYnuZI08Q\n",
      "Name: ThetaDev\n",
      "Description: I'm ThetaDev. I love creating cool projects using electronics, 3D printers and other awesome tech-based stuff.\n",
-      "Avatar: https://yt3.ggpht.com/ytc/AKedOLSnFfmpibLLoqyaYdsF6bJ-zaLPzomII__FrJve1w=s900-c-k-c0x00ffffff-no-rj\n"
+      "Avatar: https://yt3.ggpht.com/ytc/AKedOLSnFfmpibLLoqyaYdsF6bJ-zaLPzomII__FrJve1w=s900-c-k-c0x00ffffff-no-rj"
     ]
    }
   ],
@ -121,7 +125,7 @@
    "import json\n",
    "\n",
    "channel_url = 'https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q'\n",
-    "channel_url2 = 'https://www.youtube.com/c/LinusTechTips'\n",
+    "channel_url2 = 'https://www.youtube.com/c/MrBeast6000'\n",
    "\n",
    "session = requests.Session()\n",
    "session.headers[\n",
@ -170,7 +174,11 @@
  {
   "cell_type": "code",
   "execution_count": 4,
-   "metadata": {},
+   "metadata": {
+    "pycharm": {
+     "name": "#%%\n"
+    }
+   },
   "outputs": [
    {
     "name": "stdout",
--- a/poetry.lock
+++ b/poetry.lock
--- a/pyproject.toml
+++ b/pyproject.toml
@ -11,9 +11,15 @@ yt-dlp = "^2022.3.8"
 scrapetube = "^2.2.2"
 rfeed = "^1.1.1"
 feedparser = "^6.0.8"
+Pillow = "^9.1.0"
+colorthief = "^0.2.1"
+wcag-contrast-ratio = "^0.9"
+font-source-sans-pro = "^0.0.1"
+fonts = "^0.0.3"

 [tool.poetry.dev-dependencies]
-jupyter = "^1.0.0"
+pytest = "^7.1.1"
+pytest-cov = "^3.0.0"

 [build-system]
 requires = ["poetry-core>=1.0.0"]
--- a/tests/init.py
+++ b/tests/init.py
@ -0,0 +1,4 @@
+# coding=utf-8
+from importlib.resources import files
+
+DIR_TESTFILES = files('tests.testfiles')
--- a/tests/test_cover.py
+++ b/tests/test_cover.py
@ -0,0 +1,24 @@
+# coding=utf-8
+from typing import List
+
+import pytest
+from PIL import ImageFont
+from fonts.ttf import SourceSansPro
+
+import tests
+from ucast import cover
+
+
+@pytest.mark.parametrize('height,width,text,expect', [
+    (40, 300, 'Hello', ['Hello']),
+    (40, 300, 'Hello World, this is me', ['Hello World,…']),
+    (90, 300, 'Hello World, this is me', ['Hello World, this', 'is me']),
+    (90, 300, 'Rindfleischettikettierungsüberwachungsaufgabenübertragungsgesetz', ['Rindfleischettik…']),
+    (1000, 300, 'Ha! du wärst Obrigkeit von Gott? Gott spendet Segen aus; du raubst! Du nicht von Gott, Tyrann!',
+     ['Ha! du wärst', 'Obrigkeit von', 'Gott? Gott', 'spendet Segen', 'aus; du raubst!', 'Du nicht von Gott,',
+      'Tyrann!']),
+])
+def test_split_text(height: int, width: int, text: str, expect: List[str]):
+    font = ImageFont.truetype(SourceSansPro, 40)
+    lines = cover._split_text(height, width, text, font, 8)
+    assert lines == expect
--- a/tests/testfiles/avatar/a1.jpg
+++ b/tests/testfiles/avatar/a1.jpg
--- a/tests/testfiles/avatar/a2.jpg
+++ b/tests/testfiles/avatar/a2.jpg
--- a/tests/testfiles/avatar/a3.jpg
+++ b/tests/testfiles/avatar/a3.jpg
--- a/tests/testfiles/cover/c1.png
+++ b/tests/testfiles/cover/c1.png
--- a/tests/testfiles/cover/c2.png
+++ b/tests/testfiles/cover/c2.png
--- a/tests/testfiles/cover/c3.png
+++ b/tests/testfiles/cover/c3.png
--- a/tests/testfiles/get_cover.py
+++ b/tests/testfiles/get_cover.py
@ -0,0 +1,36 @@
+# coding=utf-8
+import sys
+import os
+
+from ucast import youtube, util, cover
+import tests
+
+# Mit diesem Skript kann man Coverbilder zum Testen erzeugen
+# python tests/testfiles/get_cover.py <Video-ID>
+
+
+if __name__ == '__main__':
+    if len(sys.argv) <= 1:
+        print('No video id given')
+        sys.exit(1)
+
+    video_id = sys.argv[1]
+    vinfo = youtube.get_video_info(video_id)
+    title = vinfo['fulltitle']
+    channel_name = vinfo['uploader']
+    thumbnail_url = youtube.get_thumbnail_url(vinfo)
+    channel_url = vinfo['channel_url']
+    channel_metadata = youtube.get_channel_metadata(channel_url)
+
+    ti = 1
+    while os.path.exists(tests.DIR_TESTFILES / 'cover' / f'c{ti}.png'):
+        ti += 1
+
+    tn_file = tests.DIR_TESTFILES / 'thumbnail' / f't{ti}.webp'
+    av_file = tests.DIR_TESTFILES / 'avatar' / f'a{ti}.jpg'
+    cv_file = tests.DIR_TESTFILES / 'cover' / f'c{ti}.png'
+
+    util.download_file(thumbnail_url, tn_file)
+    util.download_file(channel_metadata.avatar_url, av_file)
+
+    cover.create_cover_file(tn_file, av_file, title, channel_name, cv_file)
--- a/tests/testfiles/sources.md
+++ b/tests/testfiles/sources.md
@ -0,0 +1,5 @@
+### Quellen der Thumbnails/Avatarbilder zum Testen
+
+- a1/t1: [ThetaDev](https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q) (CC-BY)
+- a2/t2: [Blender](https://www.youtube.com/c/BlenderFoundation) (CC-BY)
+- a3/t3: [media.ccc.de](https://www.youtube.com/channel/UC2TXq_t06Hjdr2g_KdKpHQg) (CC-BY)
--- a/tests/testfiles/thumbnail/t1.webp
+++ b/tests/testfiles/thumbnail/t1.webp
--- a/tests/testfiles/thumbnail/t2.webp
+++ b/tests/testfiles/thumbnail/t2.webp
--- a/tests/testfiles/thumbnail/t3.webp
+++ b/tests/testfiles/thumbnail/t3.webp
--- a/ucast/init.py
+++ b/ucast/init.py
@ -0,0 +1,36 @@
+import os
+
+from flask import Flask
+
+
+def create_app(test_config=None):
+    # create and configure the app
+    app = Flask(__name__, instance_relative_config=True)
+    app.config.from_mapping(
+        SECRET_KEY='dev',
+        DATABASE=os.path.join(app.instance_path, 'flaskr.sqlite'),
+    )
+
+    if test_config is None:
+        # load the instance config, if it exists, when not testing
+        app.config.from_pyfile('config.py', silent=True)
+    else:
+        # load the test config if passed in
+        app.config.from_mapping(test_config)
+
+    # ensure the instance folder exists
+    try:
+        os.makedirs(app.instance_path)
+    except OSError:
+        pass
+
+    # a simple page that says hello
+    @app.route('/')
+    def hello():
+        return 'Hello, World!'
+
+    @app.route('/err')
+    def errtest():
+        raise Exception('I f*cked up')
+
+    return app
--- a/ucast/app.py
+++ b/ucast/app.py
--- a/ucast/cover.py
+++ b/ucast/cover.py
@ -0,0 +1,171 @@
+# coding=utf-8
+import math
+from typing import Tuple, List, Optional
+
+from PIL import Image, ImageDraw, ImageFont
+from colorthief import ColorThief
+import wcag_contrast_ratio
+from fonts.ttf import SourceSansPro
+
+from ucast import types
+
+CHAR_ELLIPSIS = '…'
+COVER_WIDTH = 500
+
+
+def _split_text(height: int, width: int, text: str, font: ImageFont.FreeTypeFont, line_spacing=0) -> List[str]:
+    if height < font.size:
+        return []
+
+    max_lines = math.floor((height - font.size) / (font.size + line_spacing)) + 1
+
+    lines = []
+    line = ''
+
+    for word in text.split(' '):
+        if len(lines) >= max_lines:
+            line = word
+            break
+
+        if line == '':
+            nline = word
+        else:
+            nline = line + ' ' + word
+
+        if font.getsize(nline)[0] <= width:
+            line = nline
+        elif line != '':
+            lines.append(line)
+            line = word
+        else:
+            # try to trim current word
+            while nline:
+                nline = nline[:-1]
+                nline_e = nline + CHAR_ELLIPSIS
+                if font.getsize(nline_e)[0] <= width:
+                    lines.append(nline_e)
+                    break
+
+    if line != '':
+        if len(lines) >= max_lines:
+            # Drop the last line and add ... to the end
+            lastline = lines[-1] + CHAR_ELLIPSIS
+            if font.getsize(lastline)[0] <= width:
+                lines[-1] = lastline
+            else:
+                i_last_space = lines[-1].rfind(' ')
+                lines[-1] = lines[-1][:i_last_space] + CHAR_ELLIPSIS
+        else:
+            lines.append(line)
+
+    return lines
+
+
+def _draw_text_box(draw: ImageDraw.ImageDraw, box: Tuple[int, int, int, int], text: str, font: ImageFont.FreeTypeFont,
+                   color: types.Color = (0, 0, 0), line_spacing=0, vertical_center=True):
+    x_tl, y_tl, x_br, y_br = box
+    height = y_br - y_tl
+    width = x_br - x_tl
+
+    lines = _split_text(height, width, text, font, line_spacing)
+
+    y_start = y_tl
+    if vertical_center:
+        text_height = len(lines) * (font.size + line_spacing) - line_spacing
+        y_start += int((height - text_height) / 2)
+
+    for i, line in enumerate(lines):
+        y_pos = y_start + i * (font.size + line_spacing)
+        draw.text((x_tl, y_pos), line, color, font)
+
+
+def _get_dominant_color(img: Image.Image):
+    thief = ColorThief.__new__(ColorThief)
+    thief.image = img
+    return thief.get_color()
+
+
+def _interpolate_color(color_from: types.Color, color_to: types.Color, interval: int):
+    det_co = [(t - f) / interval for f, t in zip(color_from, color_to)]
+    for i in range(interval):
+        yield [round(f + det * i) for f, det in zip(color_from, det_co)]
+
+
+def _get_text_color(bg_color) -> types.Color:
+    color_decimal = tuple([c / 255 for c in bg_color])
+    c_blk = wcag_contrast_ratio.rgb((0, 0, 0), color_decimal)
+    c_wht = wcag_contrast_ratio.rgb((1, 1, 1), color_decimal)
+    if c_wht > c_blk:
+        return 255, 255, 255
+    return 0, 0, 0
+
+
+def _create_cover_image(thumbnail: Image.Image, avatar: Optional[Image.Image], title: str, channel: str) -> Image.Image:
+    # Scale the thumbnail image down to cover size
+    tn_height = int(COVER_WIDTH / thumbnail.width * thumbnail.height)
+    tn = thumbnail.resize((COVER_WIDTH, tn_height), Image.Resampling.LANCZOS)
+
+    # Get dominant colors from the top and bottom 20% of the thumbnail image
+    top_part = tn.crop((0, 0, COVER_WIDTH, int(tn_height * 0.2)))
+    bottom_part = tn.crop((0, int(tn_height * 0.8), COVER_WIDTH, tn_height))
+    top_color = _get_dominant_color(top_part)
+    bottom_color = _get_dominant_color(bottom_part)
+
+    # Create new cover image
+    cover = Image.new('RGB', (COVER_WIDTH, COVER_WIDTH))
+    cover_draw = ImageDraw.Draw(cover)
+
+    # Draw background gradient
+    for i, color in enumerate(_interpolate_color(top_color, bottom_color, cover.height)):
+        cover_draw.line(((0, i), (cover.width, i)), tuple(color), 1)
+
+    # Insert thumbnail image in the middle
+    tn_margin = int((COVER_WIDTH - tn_height) / 2)
+    cover.paste(tn, (0, tn_margin))
+
+    # Add channel avatar
+    avt_margin = 0
+    avt_size = 0
+
+    if avatar:
+        avt_margin = int(tn_margin * 0.05)
+        avt_size = tn_margin - 2 * avt_margin
+
+        avt = avatar.resize((avt_size, avt_size), Image.Resampling.LANCZOS)
+
+        circle_mask = Image.new('L', (avt_size, avt_size))
+        circle_mask_draw = ImageDraw.Draw(circle_mask)
+        circle_mask_draw.ellipse((0, 0, avt_size, avt_size), 255)
+
+        cover.paste(avt, (avt_margin, avt_margin), circle_mask)
+
+    # Add text
+    text_margin_x = 16
+    text_margin_topleft = avt_margin + avt_size + text_margin_x
+    text_vertical_offset = -17
+    text_line_space = -4
+
+    fnt = ImageFont.truetype(SourceSansPro, 50)
+    top_text_color = _get_text_color(top_color)
+    bottom_text_color = _get_text_color(bottom_color)
+
+    _draw_text_box(cover_draw, (text_margin_topleft, text_vertical_offset, COVER_WIDTH - text_margin_x, tn_margin),
+                   channel,
+                   fnt, top_text_color, text_line_space)
+    _draw_text_box(cover_draw,
+                   (text_margin_x, COVER_WIDTH - tn_margin + text_vertical_offset,
+                    COVER_WIDTH - text_margin_x, COVER_WIDTH), title, fnt, bottom_text_color, text_line_space)
+
+    return cover
+
+
+def create_cover_file(thumbnail_path: types.Path, avatar_path: Optional[types.Path], title: str, channel: str,
+                      cover_path: types.Path):
+    thumbnail = Image.open(thumbnail_path)
+
+    avatar = None
+    if avatar_path:
+        avatar = Image.open(avatar_path)
+
+    cvr = _create_cover_image(thumbnail, avatar, title, channel)
+    cvr.save(cover_path)
--- a/ucast/types.py
+++ b/ucast/types.py
@ -0,0 +1,6 @@
+# coding=utf-8
+from os import PathLike
+from typing import Tuple, Union
+
+Color = Tuple[int, int, int]
+Path = Union[str, bytes, PathLike]
--- a/ucast/util.py
+++ b/ucast/util.py
@ -0,0 +1,9 @@
+# coding=utf-8
+import requests
+
+from ucast import types
+
+
+def download_file(url: str, download_path: types.Path):
+    r = requests.get(url, allow_redirects=True)
+    open(download_path, 'wb').write(r.content)
--- a/ucast/youtube.py
+++ b/ucast/youtube.py
@ -0,0 +1,79 @@
+# coding=utf-8
+from operator import itemgetter
+import json
+from dataclasses import dataclass
+
+from yt_dlp import YoutubeDL
+from scrapetube import scrapetube
+import requests
+
+
+def get_thumbnail_url(vinfo):
+    """Get the best quality thumbnail"""
+    return max(vinfo['thumbnails'], key=itemgetter('preference'))['url']
+
+
+def get_video_info(video_id):
+    with YoutubeDL() as ydl:
+        return ydl.extract_info(video_id, download=False)
+
+
+def download_video(video_id, download_path, sponsorblock=False):
+    ydl_params = {
+        'format': 'bestaudio',
+        'postprocessors': [
+            {
+                'key': 'FFmpegExtractAudio',
+                'preferredcodec': 'mp3'
+            },
+        ],
+        'outtmpl': download_path,
+    }
+
+    if sponsorblock:
+        # noinspection PyTypeChecker
+        ydl_params['postprocessors'].extend([
+            {
+                'key': 'SponsorBlock',
+                'categories': ['sponsor'],
+                'when': 'after_filter'
+            },
+            {
+                'key': 'ModifyChapters',
+                'remove_sponsor_segments': ['sponsor']
+            }
+        ])
+
+    with YoutubeDL(ydl_params) as ydl:
+        # extract_info downloads the video and returns its metadata
+        return ydl.extract_info(video_id)
+
+
+@dataclass
+class ChannelMetadata:
+    id: str
+    name: str
+    description: str
+    avatar_url: str
+
+
+def get_channel_metadata(channel_url):
+    session = requests.Session()
+    session.headers[
+        "User-Agent"
+    ] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36"
+
+    url = f"{channel_url}/videos?view=0&flow=grid"
+
+    html = scrapetube.get_initial_data(session, url)
+    data = json.loads(
+        scrapetube.get_json_from_html(html, "var ytInitialData = ", 0, "};") + "}"
+    )
+    metadata = data['metadata']['channelMetadataRenderer']
+
+    channel_id = metadata['externalId']
+    name = metadata['title']
+    description = metadata['description']
+    avatar = metadata['avatar']['thumbnails'][0]['url']
+
+    return ChannelMetadata(channel_id, name, description, avatar)
Author	SHA1	Message	Date
Theta-Dev	c6c3849a82	Add thumbnail extraction, cover conversion	2022-04-15 22:52:50 +02:00
Theta-Dev	1047c8abc3	setup project structure	2022-04-12 17:05:47 +02:00