Compare commits

...

2 commits

Author SHA1 Message Date
c6c3849a82 Add thumbnail extraction, cover conversion 2022-04-15 22:52:50 +02:00
1047c8abc3 setup project structure 2022-04-12 17:05:47 +02:00
26 changed files with 676 additions and 1140 deletions

4
.gitignore vendored
View file

@ -5,6 +5,7 @@ venv
.tox .tox
__pycache__ __pycache__
*.egg-info *.egg-info
.pytest_cache
# Jupyter # Jupyter
.ipynb_checkpoints .ipynb_checkpoints
@ -13,3 +14,6 @@ __pycache__
*.webm *.webm
*.mp4 *.mp4
*.mp3 *.mp3
# Application data
/_run

View file

@ -0,0 +1,7 @@
version: "3"
services:
redis:
container_name: ucast-redis
image: redis:alpine
ports:
- "127.0.0.1:6379:6379"

17
notes/Coverbilder.md Normal file
View file

@ -0,0 +1,17 @@
# Coverbilder
Podcast-Cover sind quadratisch.
- Durchschnittliche Farbe der oberen und unteren 20% des Bilds berechnen
- Farbverlauf zwischen diesen Farben als Hintergrund verwenden
- Das Thumbnail findet in der Mitte Platz
- Im oberen Bereich wird das Profilbild und der Kanalname eingefügt
- Im unteren Bereich wird der Videotitel eingefügt
- Der Text ist entweder weiß oder schwarz, je nach dem welche Farbe den höchsten Kontrast hat.
- Textgröße: 50px, max 2 Zeilen, Overflow mit ... abschneiden.
### Verwendete Python-Libraries
- Pillow
- colorthief
- wcag-contrast-ratio

56
notes/Speicher.md Normal file
View file

@ -0,0 +1,56 @@
# Datenspeicherung
## Verzeichnisstruktur
```txt
_ config
|_ config.toml
_ data
|_ LinusTechTips
|_ .ucast
|_ videos.json # IDs und Metadaten aller heruntergeladenen Videos
|_ options.json # Kanalspezifische Optionen (ID, LastScan)
|_ avatar.png # Profilbild des Kanals
|_ feed.xml # RSS-Feed
|_ covers # Cover-Bilder
|_ 220409_Building a _1_000_000 Computer.png
|_ 220410_Apple makes GREAT Gaming Computers.png
|_ 220409_Building a _1_000_000 Computer.mp3
|_ 220410_Apple makes GREAT Gaming Computers.mp3
|_ Andreas Spiess
|_ ...
```
## Datenmodelle
### LastScan
- LastScan: datetime
### ChannelOptions
- ID: str
- Active: bool = True
- LastScan: datetime
- SkipLivestreams: bool = True
- SkipShorts: bool = True
- KeepVideos: int = -1
### Videos
- Videos: dict[id: str -> Video]
### Video
- Title: str
- Slug: str (YYMMDD_Title, used as filename)
- Published: datetime
- Description: str
### Config
- RedisURL: str
- ScanInterval: 1h
- DefaultChannelOptions: ChannelOptions
- AppriseUrl: str (für Benachrichtigungen, https://github.com/caronc/apprise/wiki)

View file

@ -2,7 +2,11 @@
"cells": [ "cells": [
{ {
"cell_type": "markdown", "cell_type": "markdown",
"metadata": {}, "metadata": {
"pycharm": {
"name": "#%% md\n"
}
},
"source": [ "source": [
"### Get all videos of a channel" "### Get all videos of a channel"
] ]
@ -102,7 +106,7 @@
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 14, "execution_count": 1,
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",
@ -111,7 +115,7 @@
"Kanal-ID: UCGiJh0NZ52wRhYKYnuZI08Q\n", "Kanal-ID: UCGiJh0NZ52wRhYKYnuZI08Q\n",
"Name: ThetaDev\n", "Name: ThetaDev\n",
"Description: I'm ThetaDev. I love creating cool projects using electronics, 3D printers and other awesome tech-based stuff.\n", "Description: I'm ThetaDev. I love creating cool projects using electronics, 3D printers and other awesome tech-based stuff.\n",
"Avatar: https://yt3.ggpht.com/ytc/AKedOLSnFfmpibLLoqyaYdsF6bJ-zaLPzomII__FrJve1w=s900-c-k-c0x00ffffff-no-rj\n" "Avatar: https://yt3.ggpht.com/ytc/AKedOLSnFfmpibLLoqyaYdsF6bJ-zaLPzomII__FrJve1w=s900-c-k-c0x00ffffff-no-rj"
] ]
} }
], ],
@ -121,7 +125,7 @@
"import json\n", "import json\n",
"\n", "\n",
"channel_url = 'https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q'\n", "channel_url = 'https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q'\n",
"channel_url2 = 'https://www.youtube.com/c/LinusTechTips'\n", "channel_url2 = 'https://www.youtube.com/c/MrBeast6000'\n",
"\n", "\n",
"session = requests.Session()\n", "session = requests.Session()\n",
"session.headers[\n", "session.headers[\n",
@ -170,7 +174,11 @@
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": 4, "execution_count": 4,
"metadata": {}, "metadata": {
"pycharm": {
"name": "#%%\n"
}
},
"outputs": [ "outputs": [
{ {
"name": "stdout", "name": "stdout",

1336
poetry.lock generated

File diff suppressed because it is too large Load diff

View file

@ -11,9 +11,15 @@ yt-dlp = "^2022.3.8"
scrapetube = "^2.2.2" scrapetube = "^2.2.2"
rfeed = "^1.1.1" rfeed = "^1.1.1"
feedparser = "^6.0.8" feedparser = "^6.0.8"
Pillow = "^9.1.0"
colorthief = "^0.2.1"
wcag-contrast-ratio = "^0.9"
font-source-sans-pro = "^0.0.1"
fonts = "^0.0.3"
[tool.poetry.dev-dependencies] [tool.poetry.dev-dependencies]
jupyter = "^1.0.0" pytest = "^7.1.1"
pytest-cov = "^3.0.0"
[build-system] [build-system]
requires = ["poetry-core>=1.0.0"] requires = ["poetry-core>=1.0.0"]

4
tests/__init__.py Normal file
View file

@ -0,0 +1,4 @@
# coding=utf-8
from importlib.resources import files
DIR_TESTFILES = files('tests.testfiles')

24
tests/test_cover.py Normal file
View file

@ -0,0 +1,24 @@
# coding=utf-8
from typing import List
import pytest
from PIL import ImageFont
from fonts.ttf import SourceSansPro
import tests
from ucast import cover
@pytest.mark.parametrize('height,width,text,expect', [
(40, 300, 'Hello', ['Hello']),
(40, 300, 'Hello World, this is me', ['Hello World,…']),
(90, 300, 'Hello World, this is me', ['Hello World, this', 'is me']),
(90, 300, 'Rindfleischettikettierungsüberwachungsaufgabenübertragungsgesetz', ['Rindfleischettik…']),
(1000, 300, 'Ha! du wärst Obrigkeit von Gott? Gott spendet Segen aus; du raubst! Du nicht von Gott, Tyrann!',
['Ha! du wärst', 'Obrigkeit von', 'Gott? Gott', 'spendet Segen', 'aus; du raubst!', 'Du nicht von Gott,',
'Tyrann!']),
])
def test_split_text(height: int, width: int, text: str, expect: List[str]):
font = ImageFont.truetype(SourceSansPro, 40)
lines = cover._split_text(height, width, text, font, 8)
assert lines == expect

Binary file not shown.

After

Width:  |  Height:  |  Size: 186 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 32 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 53 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 234 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 229 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 173 KiB

View file

@ -0,0 +1,36 @@
# coding=utf-8
import sys
import os
from ucast import youtube, util, cover
import tests
# Mit diesem Skript kann man Coverbilder zum Testen erzeugen
# python tests/testfiles/get_cover.py <Video-ID>
if __name__ == '__main__':
if len(sys.argv) <= 1:
print('No video id given')
sys.exit(1)
video_id = sys.argv[1]
vinfo = youtube.get_video_info(video_id)
title = vinfo['fulltitle']
channel_name = vinfo['uploader']
thumbnail_url = youtube.get_thumbnail_url(vinfo)
channel_url = vinfo['channel_url']
channel_metadata = youtube.get_channel_metadata(channel_url)
ti = 1
while os.path.exists(tests.DIR_TESTFILES / 'cover' / f'c{ti}.png'):
ti += 1
tn_file = tests.DIR_TESTFILES / 'thumbnail' / f't{ti}.webp'
av_file = tests.DIR_TESTFILES / 'avatar' / f'a{ti}.jpg'
cv_file = tests.DIR_TESTFILES / 'cover' / f'c{ti}.png'
util.download_file(thumbnail_url, tn_file)
util.download_file(channel_metadata.avatar_url, av_file)
cover.create_cover_file(tn_file, av_file, title, channel_name, cv_file)

View file

@ -0,0 +1,5 @@
### Quellen der Thumbnails/Avatarbilder zum Testen
- a1/t1: [ThetaDev](https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q) (CC-BY)
- a2/t2: [Blender](https://www.youtube.com/c/BlenderFoundation) (CC-BY)
- a3/t3: [media.ccc.de](https://www.youtube.com/channel/UC2TXq_t06Hjdr2g_KdKpHQg) (CC-BY)

Binary file not shown.

After

Width:  |  Height:  |  Size: 92 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 101 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 28 KiB

36
ucast/__init__.py Normal file
View file

@ -0,0 +1,36 @@
import os
from flask import Flask
def create_app(test_config=None):
# create and configure the app
app = Flask(__name__, instance_relative_config=True)
app.config.from_mapping(
SECRET_KEY='dev',
DATABASE=os.path.join(app.instance_path, 'flaskr.sqlite'),
)
if test_config is None:
# load the instance config, if it exists, when not testing
app.config.from_pyfile('config.py', silent=True)
else:
# load the test config if passed in
app.config.from_mapping(test_config)
# ensure the instance folder exists
try:
os.makedirs(app.instance_path)
except OSError:
pass
# a simple page that says hello
@app.route('/')
def hello():
return 'Hello, World!'
@app.route('/err')
def errtest():
raise Exception('I f*cked up')
return app

171
ucast/cover.py Normal file
View file

@ -0,0 +1,171 @@
# coding=utf-8
import math
from typing import Tuple, List, Optional
from PIL import Image, ImageDraw, ImageFont
from colorthief import ColorThief
import wcag_contrast_ratio
from fonts.ttf import SourceSansPro
from ucast import types
CHAR_ELLIPSIS = ''
COVER_WIDTH = 500
def _split_text(height: int, width: int, text: str, font: ImageFont.FreeTypeFont, line_spacing=0) -> List[str]:
if height < font.size:
return []
max_lines = math.floor((height - font.size) / (font.size + line_spacing)) + 1
lines = []
line = ''
for word in text.split(' '):
if len(lines) >= max_lines:
line = word
break
if line == '':
nline = word
else:
nline = line + ' ' + word
if font.getsize(nline)[0] <= width:
line = nline
elif line != '':
lines.append(line)
line = word
else:
# try to trim current word
while nline:
nline = nline[:-1]
nline_e = nline + CHAR_ELLIPSIS
if font.getsize(nline_e)[0] <= width:
lines.append(nline_e)
break
if line != '':
if len(lines) >= max_lines:
# Drop the last line and add ... to the end
lastline = lines[-1] + CHAR_ELLIPSIS
if font.getsize(lastline)[0] <= width:
lines[-1] = lastline
else:
i_last_space = lines[-1].rfind(' ')
lines[-1] = lines[-1][:i_last_space] + CHAR_ELLIPSIS
else:
lines.append(line)
return lines
def _draw_text_box(draw: ImageDraw.ImageDraw, box: Tuple[int, int, int, int], text: str, font: ImageFont.FreeTypeFont,
color: types.Color = (0, 0, 0), line_spacing=0, vertical_center=True):
x_tl, y_tl, x_br, y_br = box
height = y_br - y_tl
width = x_br - x_tl
lines = _split_text(height, width, text, font, line_spacing)
y_start = y_tl
if vertical_center:
text_height = len(lines) * (font.size + line_spacing) - line_spacing
y_start += int((height - text_height) / 2)
for i, line in enumerate(lines):
y_pos = y_start + i * (font.size + line_spacing)
draw.text((x_tl, y_pos), line, color, font)
def _get_dominant_color(img: Image.Image):
thief = ColorThief.__new__(ColorThief)
thief.image = img
return thief.get_color()
def _interpolate_color(color_from: types.Color, color_to: types.Color, interval: int):
det_co = [(t - f) / interval for f, t in zip(color_from, color_to)]
for i in range(interval):
yield [round(f + det * i) for f, det in zip(color_from, det_co)]
def _get_text_color(bg_color) -> types.Color:
color_decimal = tuple([c / 255 for c in bg_color])
c_blk = wcag_contrast_ratio.rgb((0, 0, 0), color_decimal)
c_wht = wcag_contrast_ratio.rgb((1, 1, 1), color_decimal)
if c_wht > c_blk:
return 255, 255, 255
return 0, 0, 0
def _create_cover_image(thumbnail: Image.Image, avatar: Optional[Image.Image], title: str, channel: str) -> Image.Image:
# Scale the thumbnail image down to cover size
tn_height = int(COVER_WIDTH / thumbnail.width * thumbnail.height)
tn = thumbnail.resize((COVER_WIDTH, tn_height), Image.Resampling.LANCZOS)
# Get dominant colors from the top and bottom 20% of the thumbnail image
top_part = tn.crop((0, 0, COVER_WIDTH, int(tn_height * 0.2)))
bottom_part = tn.crop((0, int(tn_height * 0.8), COVER_WIDTH, tn_height))
top_color = _get_dominant_color(top_part)
bottom_color = _get_dominant_color(bottom_part)
# Create new cover image
cover = Image.new('RGB', (COVER_WIDTH, COVER_WIDTH))
cover_draw = ImageDraw.Draw(cover)
# Draw background gradient
for i, color in enumerate(_interpolate_color(top_color, bottom_color, cover.height)):
cover_draw.line(((0, i), (cover.width, i)), tuple(color), 1)
# Insert thumbnail image in the middle
tn_margin = int((COVER_WIDTH - tn_height) / 2)
cover.paste(tn, (0, tn_margin))
# Add channel avatar
avt_margin = 0
avt_size = 0
if avatar:
avt_margin = int(tn_margin * 0.05)
avt_size = tn_margin - 2 * avt_margin
avt = avatar.resize((avt_size, avt_size), Image.Resampling.LANCZOS)
circle_mask = Image.new('L', (avt_size, avt_size))
circle_mask_draw = ImageDraw.Draw(circle_mask)
circle_mask_draw.ellipse((0, 0, avt_size, avt_size), 255)
cover.paste(avt, (avt_margin, avt_margin), circle_mask)
# Add text
text_margin_x = 16
text_margin_topleft = avt_margin + avt_size + text_margin_x
text_vertical_offset = -17
text_line_space = -4
fnt = ImageFont.truetype(SourceSansPro, 50)
top_text_color = _get_text_color(top_color)
bottom_text_color = _get_text_color(bottom_color)
_draw_text_box(cover_draw, (text_margin_topleft, text_vertical_offset, COVER_WIDTH - text_margin_x, tn_margin),
channel,
fnt, top_text_color, text_line_space)
_draw_text_box(cover_draw,
(text_margin_x, COVER_WIDTH - tn_margin + text_vertical_offset,
COVER_WIDTH - text_margin_x, COVER_WIDTH), title, fnt, bottom_text_color, text_line_space)
return cover
def create_cover_file(thumbnail_path: types.Path, avatar_path: Optional[types.Path], title: str, channel: str,
cover_path: types.Path):
thumbnail = Image.open(thumbnail_path)
avatar = None
if avatar_path:
avatar = Image.open(avatar_path)
cvr = _create_cover_image(thumbnail, avatar, title, channel)
cvr.save(cover_path)

6
ucast/types.py Normal file
View file

@ -0,0 +1,6 @@
# coding=utf-8
from os import PathLike
from typing import Tuple, Union
Color = Tuple[int, int, int]
Path = Union[str, bytes, PathLike]

9
ucast/util.py Normal file
View file

@ -0,0 +1,9 @@
# coding=utf-8
import requests
from ucast import types
def download_file(url: str, download_path: types.Path):
r = requests.get(url, allow_redirects=True)
open(download_path, 'wb').write(r.content)

79
ucast/youtube.py Normal file
View file

@ -0,0 +1,79 @@
# coding=utf-8
from operator import itemgetter
import json
from dataclasses import dataclass
from yt_dlp import YoutubeDL
from scrapetube import scrapetube
import requests
def get_thumbnail_url(vinfo):
"""Get the best quality thumbnail"""
return max(vinfo['thumbnails'], key=itemgetter('preference'))['url']
def get_video_info(video_id):
with YoutubeDL() as ydl:
return ydl.extract_info(video_id, download=False)
def download_video(video_id, download_path, sponsorblock=False):
ydl_params = {
'format': 'bestaudio',
'postprocessors': [
{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3'
},
],
'outtmpl': download_path,
}
if sponsorblock:
# noinspection PyTypeChecker
ydl_params['postprocessors'].extend([
{
'key': 'SponsorBlock',
'categories': ['sponsor'],
'when': 'after_filter'
},
{
'key': 'ModifyChapters',
'remove_sponsor_segments': ['sponsor']
}
])
with YoutubeDL(ydl_params) as ydl:
# extract_info downloads the video and returns its metadata
return ydl.extract_info(video_id)
@dataclass
class ChannelMetadata:
id: str
name: str
description: str
avatar_url: str
def get_channel_metadata(channel_url):
session = requests.Session()
session.headers[
"User-Agent"
] = "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.101 Safari/537.36"
url = f"{channel_url}/videos?view=0&flow=grid"
html = scrapetube.get_initial_data(session, url)
data = json.loads(
scrapetube.get_json_from_html(html, "var ytInitialData = ", 0, "};") + "}"
)
metadata = data['metadata']['channelMetadataRenderer']
channel_id = metadata['externalId']
name = metadata['title']
description = metadata['description']
avatar = metadata['avatar']['thumbnails'][0]['url']
return ChannelMetadata(channel_id, name, description, avatar)