ucast/ucast/tests/service/test_youtube.py

import datetime
import re
import subprocess
import tempfile
from pathlib import Path

import pytest
from PIL import Image, ImageChops

from ucast import tests
from ucast.service import youtube

VIDEO_ID_THETADEV = "ZPxEr4YdWt8"
VIDEO_ID_SHORT = "lcQZ6YwQHiw"
VIDEO_ID_PERSUASION = "DWjFW7Yq1fA"

CHANNEL_ID_THETADEV = "UCGiJh0NZ52wRhYKYnuZI08Q"
CHANNEL_ID_BLENDER = "UCSMOQeBJ2RAnuFungnQOxLg"
CHANNEL_URL_BLENDER = "https://www.youtube.com/c/BlenderFoundation"


@pytest.fixture(scope="module")
def video_details() -> youtube.VideoDetails:
    return youtube.get_video_details(VIDEO_ID_THETADEV)


def test_download_thumbnail(video_details):
    tmpdir_o = tempfile.TemporaryDirectory()
    tmpdir = Path(tmpdir_o.name)
    tn_file = tmpdir / "thumbnail.webp"
    expected_tn_file = tests.DIR_TESTFILES / "thumbnail" / "t1.webp"

    youtube.download_thumbnail(video_details, tn_file)

    tn = Image.open(tn_file)
    expected_tn = Image.open(expected_tn_file)

    diff = ImageChops.difference(tn, expected_tn)
    assert diff.getbbox() is None


def test_get_video_details(video_details):
    assert video_details.id == VIDEO_ID_THETADEV
    assert video_details.title == "ThetaDev @ Embedded World 2019"
    assert video_details.channel_id == "UCGiJh0NZ52wRhYKYnuZI08Q"
    assert (
        video_details.description
        == """This february I spent one day at the Embedded World in Nuremberg. They showed tons of interesting electronics stuff, so I had to take some pictures and videos for you to see ;-)

Sorry for the late upload, I just didn't have time to edit my footage.

Embedded World: https://www.embedded-world.de/

My website: https://thdev.org
Twitter: https://twitter.com/Theta_Dev"""
    )
    assert video_details.duration == 267
    assert not video_details.is_currently_live
    assert not video_details.is_livestream
    assert not video_details.is_short
    assert video_details.published == datetime.datetime(
        2019, 6, 2, tzinfo=datetime.timezone.utc
    )


def test_get_video_details_short():
    vinfo = youtube.get_video_details(VIDEO_ID_SHORT)
    assert vinfo.id == VIDEO_ID_SHORT
    assert (
        vinfo.title
        == "Small pink flowers | #shorts | Free Stock Video | \
creative commons short videos | creative #short"
    )
    assert not vinfo.is_currently_live
    assert not vinfo.is_livestream
    assert vinfo.is_short


def test_download_audio():
    tmpdir_o = tempfile.TemporaryDirectory()
    tmpdir = Path(tmpdir_o.name)
    download_file = tmpdir / "download.mp3"

    vinfo = youtube.download_audio(VIDEO_ID_PERSUASION, download_file)
    assert vinfo.id == VIDEO_ID_PERSUASION
    assert vinfo.title == "Persuasion (Instrumental) – RYYZN (No Copyright Music)"
    assert vinfo.duration == 100

    # Check with ffmpeg if the audio file is valid
    res = subprocess.run(
        ["ffmpeg", "-i", str(download_file)],
        capture_output=True,
        universal_newlines=True,
    )
    assert "Stream #0:0: Audio: mp3" in res.stderr

    match = re.search(r"Duration: (\d{2}:\d{2}:\d{2})", res.stderr)
    assert match[1] == "00:01:40"


@pytest.mark.parametrize(
    "channel_str,channel_url",
    [
        (
            "https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q",
            "https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q",
        ),
        (
            "https://www.youtube.com/c/MrBeast6000",
            "https://www.youtube.com/c/MrBeast6000",
        ),
        (
            "https://www.youtube.com/user/LinusTechTips",
            "https://www.youtube.com/user/LinusTechTips",
        ),
        (
            "UCGiJh0NZ52wRhYKYnuZI08Q",
            "https://www.youtube.com/channel/UCGiJh0NZ52wRhYKYnuZI08Q",
        ),
        (
            "https://piped.mha.fi/user/LinusTechTips",
            "https://www.youtube.com/user/LinusTechTips",
        ),
    ],
)
def test_channel_url_from_str(channel_str: str, channel_url: str):
    url = youtube.channel_url_from_str(channel_str)
    assert url == channel_url


@pytest.mark.parametrize(
    "channel_url,channel_id,name,avatar_url",
    [
        (
            youtube.channel_url_from_id(CHANNEL_ID_THETADEV),
            CHANNEL_ID_THETADEV,
            "ThetaDev",
            "https://yt3.ggpht.com/ytc/AKedOLSnFfmpibLLoqyaYdsF6bJ-zaLPzomII__FrJve1w=s900-c-k-c0x00ffffff-no-rj",
        ),
        (
            CHANNEL_URL_BLENDER,
            CHANNEL_ID_BLENDER,
            "Blender",
            "https://yt3.ggpht.com/ytc/AKedOLT_31fFSD3FWEBnHZnyZeJx-GPHJwYCQKcEpaq8NQ=s900-c-k-c0x00ffffff-no-rj",
        ),
    ],
)
def test_channel_metadata(
    channel_url: str, channel_id: str, name: str, avatar_url: str
):
    metadata = youtube.get_channel_metadata(channel_url)
    assert metadata.id == channel_id
    assert metadata.name == name
    assert metadata.avatar_url == avatar_url
    assert metadata.description


def test_get_channel_videos_from_feed():
    videos = youtube.get_channel_videos_from_feed(CHANNEL_ID_THETADEV)
    assert videos

    v1 = videos[0]
    assert len(v1.id) == 11
    assert v1.published.tzinfo == datetime.timezone.utc
    assert v1.published.second > 0 or v1.published.minute > 0 or v1.published.hour > 0


def test_get_channel_videos_from_scraper():
    videos = youtube.get_channel_videos_from_scraper(CHANNEL_ID_THETADEV)
    assert videos

    v1 = videos.__next__()
    assert len(v1.id) == 11
    assert v1.published is None