diff --git a/cli/Cargo.toml b/cli/Cargo.toml index e07a928..6f68f47 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -41,7 +41,7 @@ rustls-tls-native-roots = [ ] [dependencies] -rustypipe.workspace = true +rustypipe = { workspace = true, features = ["rss"] } rustypipe-downloader.workspace = true reqwest.workspace = true tokio = { workspace = true, features = ["macros", "rt-multi-thread"] } diff --git a/cli/src/main.rs b/cli/src/main.rs index 6a954a9..9458e81 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -1,7 +1,12 @@ #![doc = include_str!("../README.md")] #![warn(clippy::todo, clippy::dbg_macro)] -use std::{path::PathBuf, str::FromStr, time::Duration}; +use std::{ + path::PathBuf, + str::FromStr, + sync::{atomic::AtomicUsize, Arc}, + time::Duration, +}; use clap::{Parser, Subcommand, ValueEnum}; use futures::stream::{self, StreamExt}; @@ -89,20 +94,23 @@ enum Commands { #[clap(short, long)] resolution: Option, /// Download only the audio track - #[clap(long)] + #[clap(short, long)] audio: bool, /// Number of videos downloaded in parallel #[clap(short, long, default_value_t = 8)] parallel: usize, /// Use YouTube Music for downloading playlists - #[clap(long)] + #[clap(short, long)] music: bool, /// Limit the number of videos to download - #[clap(long, default_value_t = 1000)] + #[clap(short, long, default_value_t = 1000)] limit: usize, /// YT Client used to fetch player data + #[clap(short, long)] + client_type: Option>, + /// `pot` token to circumvent bot detection #[clap(long)] - client_type: Option, + pot: Option, }, /// Extract video, playlist, album or channel data Get { @@ -115,17 +123,20 @@ enum Commands { #[clap(long)] pretty: bool, /// Output as text - #[clap(long)] + #[clap(short, long)] txt: bool, /// Limit the number of items to fetch - #[clap(long, default_value_t = 20)] + #[clap(short, long, default_value_t = 20)] limit: usize, /// Channel tab #[clap(long, default_value = "videos")] tab: ChannelTab, /// Use YouTube Music - #[clap(long)] + #[clap(short, long)] music: bool, + /// Use the RSS feed of a channel + #[clap(long)] + rss: bool, /// Get comments #[clap(long)] comments: Option, @@ -136,8 +147,8 @@ enum Commands { #[clap(long)] player: bool, /// YT Client used to fetch player data - #[clap(long)] - client_type: Option, + #[clap(short, long)] + client_type: Option, }, /// Search YouTube Search { @@ -150,10 +161,10 @@ enum Commands { #[clap(long)] pretty: bool, /// Output as text - #[clap(long)] + #[clap(short, long)] txt: bool, /// Limit the number of items to fetch - #[clap(long, default_value_t = 20)] + #[clap(short, long, default_value_t = 20)] limit: usize, /// Filter results by item type #[clap(long)] @@ -171,9 +182,10 @@ enum Commands { #[clap(long)] channel: Option, /// YouTube Music search filter - #[clap(long)] + #[clap(short, long)] music: Option, }, + /// Get a YouTube visitor data cookie Vdata, } @@ -251,7 +263,7 @@ enum MusicSearchCategory { } #[derive(Copy, Clone, PartialEq, Eq, ValueEnum)] -enum PlayerType { +enum ClientTypeArg { Desktop, Tv, TvEmbed, @@ -301,14 +313,14 @@ impl From for search_filter::Order { } } -impl From for ClientType { - fn from(value: PlayerType) -> Self { +impl From for ClientType { + fn from(value: ClientTypeArg) -> Self { match value { - PlayerType::Desktop => Self::Desktop, - PlayerType::TvEmbed => Self::TvHtml5Embed, - PlayerType::Tv => Self::Tv, - PlayerType::Android => Self::Android, - PlayerType::Ios => Self::Ios, + ClientTypeArg::Desktop => Self::Desktop, + ClientTypeArg::TvEmbed => Self::TvHtml5Embed, + ClientTypeArg::Tv => Self::Tv, + ClientTypeArg::Android => Self::Android, + ClientTypeArg::Ios => Self::Ios, } } } @@ -415,11 +427,11 @@ async fn download_video( dl: &Downloader, id: &str, target: &DownloadTarget, - client_type: Option, + client_types: Option<&[ClientType]>, ) { let mut q = target.apply(dl.id(id)); - if let Some(client_type) = client_type { - q = q.client_type(client_type.into()); + if let Some(client_types) = client_types { + q = q.client_types(client_types); } let res = q.download().await; if let Err(e) = res { @@ -432,9 +444,9 @@ async fn download_videos( videos: Vec, target: &DownloadTarget, parallel: usize, - client_type: Option, + client_types: Option<&[ClientType]>, multi: MultiProgress, -) { +) -> anyhow::Result<()> { // Indicatif setup let main = multi.add(ProgressBar::new( videos.len().try_into().unwrap_or_default(), @@ -448,27 +460,38 @@ async fn download_videos( ); main.tick(); + let n_failed = Arc::new(AtomicUsize::default()); + stream::iter(videos) .for_each_concurrent(parallel, |video| { let dl = dl.clone(); let main = main.clone(); let id = video.id().to_owned(); + let n_failed = n_failed.clone(); let mut q = target.apply(dl.video(video)); - if let Some(client_type) = client_type { - q = q.client_type(client_type.into()); + if let Some(client_types) = client_types { + q = q.client_types(client_types); } async move { if let Err(e) = q.download().await { if !matches!(e, DownloadError::Exists(_)) { tracing::error!("[{id}]: {e}"); + n_failed.fetch_add(1, std::sync::atomic::Ordering::Relaxed); } + } else { + main.inc(1); } - main.inc(1); } }) .await; + + let n_failed = n_failed.load(std::sync::atomic::Ordering::Relaxed); + if n_failed > 0 { + anyhow::bail!("{n_failed} downloads failed"); + } + Ok(()) } /// Stderr writer that suspends the progress bars before printing logs @@ -495,6 +518,14 @@ impl std::io::Write for ProgWriter { #[tokio::main] async fn main() { + if let Err(e) = run().await { + println!("{}", "Error:".red().bold()); + println!("{}", e); + std::process::exit(1); + } +} + +async fn run() -> anyhow::Result<()> { let cli = Cli::parse(); let multi = MultiProgress::new(); @@ -524,7 +555,7 @@ async fn main() { if let Some(country) = cli.country { rp = rp.country(Country::from_str(&country.to_ascii_uppercase()).expect("invalid country")); } - let rp = rp.build().unwrap(); + let rp = rp.build()?; match cli.command { Commands::Download { @@ -536,8 +567,9 @@ async fn main() { music, limit, client_type, + pot, } => { - let url_target = rp.query().resolve_string(&id, false).await.unwrap(); + let url_target = rp.query().resolve_string(&id, false).await?; let mut filter = StreamFilter::new(); if let Some(res) = resolution { @@ -555,20 +587,21 @@ async fn main() { dl = dl.audio_tag().crop_cover(); filter = filter.no_video(); } + if let Some(pot) = pot { + dl = dl.pot(pot); + } let dl = dl.stream_filter(filter).build(); + let cts = client_type.map(|c| c.into_iter().map(ClientType::from).collect::>()); + match url_target { UrlTarget::Video { id, .. } => { - download_video(&dl, &id, &target, client_type).await; + download_video(&dl, &id, &target, cts.as_deref()).await; } UrlTarget::Channel { id } => { target.assert_dir(); - let mut channel = rp.query().channel_videos(id).await.unwrap(); - channel - .content - .extend_limit(&rp.query(), limit) - .await - .unwrap(); + let mut channel = rp.query().channel_videos(id).await?; + channel.content.extend_limit(&rp.query(), limit).await?; let videos = channel .content .items @@ -576,17 +609,13 @@ async fn main() { .take(limit) .map(|v| DownloadVideo::from_entity(&v)) .collect(); - download_videos(&dl, videos, &target, parallel, client_type, multi).await; + download_videos(&dl, videos, &target, parallel, cts.as_deref(), multi).await?; } UrlTarget::Playlist { id } => { target.assert_dir(); let videos = if music { - let mut playlist = rp.query().music_playlist(id).await.unwrap(); - playlist - .tracks - .extend_limit(&rp.query(), limit) - .await - .unwrap(); + let mut playlist = rp.query().music_playlist(id).await?; + playlist.tracks.extend_limit(&rp.query(), limit).await?; playlist .tracks .items @@ -595,12 +624,8 @@ async fn main() { .map(|v| DownloadVideo::from_track(&v)) .collect() } else { - let mut playlist = rp.query().playlist(id).await.unwrap(); - playlist - .videos - .extend_limit(&rp.query(), limit) - .await - .unwrap(); + let mut playlist = rp.query().playlist(id).await?; + playlist.videos.extend_limit(&rp.query(), limit).await?; playlist .videos .items @@ -609,18 +634,18 @@ async fn main() { .map(|v| DownloadVideo::from_entity(&v)) .collect() }; - download_videos(&dl, videos, &target, parallel, client_type, multi).await; + download_videos(&dl, videos, &target, parallel, cts.as_deref(), multi).await?; } UrlTarget::Album { id } => { target.assert_dir(); - let album = rp.query().music_album(id).await.unwrap(); + let album = rp.query().music_album(id).await?; let videos = album .tracks .into_iter() .take(limit) .map(|v| DownloadVideo::from_track(&v)) .collect(); - download_videos(&dl, videos, &target, parallel, client_type, multi).await; + download_videos(&dl, videos, &target, parallel, cts.as_deref(), multi).await?; } } } @@ -632,22 +657,23 @@ async fn main() { limit, tab, music, + rss, comments, lyrics, player, client_type, } => { - let target = rp.query().resolve_string(&id, false).await.unwrap(); + let target = rp.query().resolve_string(&id, false).await?; match target { UrlTarget::Video { id, .. } => { if lyrics { - let details = rp.query().music_details(&id).await.unwrap(); + let details = rp.query().music_details(&id).await?; match details.lyrics_id { Some(lyrics_id) => { - let lyrics = rp.query().music_lyrics(lyrics_id).await.unwrap(); + let lyrics = rp.query().music_lyrics(lyrics_id).await?; if txt { - println!("{}\n\n{}", lyrics.body, lyrics.footer); + println!("{}\n\n{}", lyrics.body, lyrics.footer.blue()); } else { print_data(&lyrics, format, pretty); } @@ -655,21 +681,26 @@ async fn main() { None => eprintln!("no lyrics found"), } } else if music { - let details = rp.query().music_details(&id).await.unwrap(); + let details = rp.query().music_details(&id).await?; if txt { if details.track.is_video { - println!("[MV]"); + anstream::println!("{}", "[MV]".on_green().black()); } else { - println!("[Track]"); + anstream::println!("{}", "[Track]".on_green().black()); } - print!("{} [{}]", details.track.name, details.track.id); + anstream::print!( + "{} [{}]", + details.track.name.green().bold(), + details.track.id + ); print_duration(details.track.duration); println!(); print_artists(&details.track.artists); println!(); if !details.track.is_video { - println!( - "Album: {}", + anstream::println!( + "{} {}", + "Album:".blue(), details .track .album @@ -679,7 +710,7 @@ async fn main() { ) } if let Some(view_count) = details.track.view_count { - println!("Views: {view_count}"); + anstream::println!("{} {}", "Views:".blue(), view_count); } } else { print_data(&details, format, pretty); @@ -689,26 +720,20 @@ async fn main() { rp.query().player_from_client(&id, client_type.into()).await } else { rp.query().player(&id).await - } - .unwrap(); + }?; print_data(&player, format, pretty); } else { - let mut details = rp.query().video_details(&id).await.unwrap(); + let mut details = rp.query().video_details(&id).await?; match comments { Some(CommentsOrder::Top) => { - details - .top_comments - .extend_limit(rp.query(), limit) - .await - .unwrap(); + details.top_comments.extend_limit(rp.query(), limit).await?; } Some(CommentsOrder::Latest) => { details .latest_comments .extend_limit(rp.query(), limit) - .await - .unwrap(); + .await?; } None => {} } @@ -786,7 +811,7 @@ async fn main() { } UrlTarget::Channel { id } => { if music { - let artist = rp.query().music_artist(&id, true).await.unwrap(); + let artist = rp.query().music_artist(&id, true).await?; if txt { anstream::println!( "{}\n{} [{}]", @@ -836,6 +861,31 @@ async fn main() { } else { print_data(&artist, format, pretty); } + } else if rss { + let rss = rp.query().channel_rss(&id).await?; + + if txt { + anstream::println!( + "{}\n{} [{}]\n{} {}", + "[Channel RSS]".on_green().black(), + rss.name.green().bold(), + rss.id, + "Created on:".blue(), + rss.create_date, + ); + if let Some(v) = rss.videos.first() { + anstream::println!( + "{} {} [{}]", + "Latest video:".blue(), + v.publish_date, + v.id + ); + } + println!(); + print_entities(&rss.videos); + } else { + print_data(&rss, format, pretty); + } } else { match tab { ChannelTab::Videos | ChannelTab::Shorts | ChannelTab::Live => { @@ -846,13 +896,9 @@ async fn main() { _ => unreachable!(), }; let mut channel = - rp.query().channel_videos_tab(&id, video_tab).await.unwrap(); + rp.query().channel_videos_tab(&id, video_tab).await?; - channel - .content - .extend_limit(rp.query(), limit) - .await - .unwrap(); + channel.content.extend_limit(rp.query(), limit).await?; if txt { anstream::print!( @@ -874,7 +920,7 @@ async fn main() { } } ChannelTab::Playlists => { - let channel = rp.query().channel_playlists(&id).await.unwrap(); + let channel = rp.query().channel_playlists(&id).await?; if txt { anstream::println!( @@ -894,7 +940,7 @@ async fn main() { } } ChannelTab::Info => { - let info = rp.query().channel_info(&id).await.unwrap(); + let info = rp.query().channel_info(&id).await?; if txt { anstream::println!( @@ -930,12 +976,8 @@ async fn main() { } UrlTarget::Playlist { id } => { if music { - let mut playlist = rp.query().music_playlist(&id).await.unwrap(); - playlist - .tracks - .extend_limit(rp.query(), limit) - .await - .unwrap(); + let mut playlist = rp.query().music_playlist(&id).await?; + playlist.tracks.extend_limit(rp.query(), limit).await?; if txt { anstream::println!( "{}\n{} [{}]\n{} {}", @@ -959,12 +1001,8 @@ async fn main() { print_data(&playlist, format, pretty); } } else { - let mut playlist = rp.query().playlist(&id).await.unwrap(); - playlist - .videos - .extend_limit(rp.query(), limit) - .await - .unwrap(); + let mut playlist = rp.query().playlist(&id).await?; + playlist.videos.extend_limit(rp.query(), limit).await?; if txt { anstream::println!( "{}\n{} [{}]\n{} {}", @@ -993,7 +1031,7 @@ async fn main() { } } UrlTarget::Album { id } => { - let album = rp.query().music_album(&id).await.unwrap(); + let album = rp.query().music_album(&id).await?; if txt { anstream::print!( "{}\n{} [{}] ({:?}", @@ -1036,8 +1074,8 @@ async fn main() { } => match music { None => match channel { Some(channel) => { - rustypipe::validate::channel_id(&channel).unwrap(); - let res = rp.query().channel_search(&channel, &query).await.unwrap(); + rustypipe::validate::channel_id(&channel)?; + let res = rp.query().channel_search(&channel, &query).await?; print_data(&res, format, pretty); } None => { @@ -1049,9 +1087,8 @@ async fn main() { let mut res = rp .query() .search_filter::(&query, &filter) - .await - .unwrap(); - res.items.extend_limit(rp.query(), limit).await.unwrap(); + .await?; + res.items.extend_limit(rp.query(), limit).await?; if txt { if let Some(corr) = res.corrected_query { @@ -1064,27 +1101,27 @@ async fn main() { } }, Some(MusicSearchCategory::All) => { - let res = rp.query().music_search_main(&query).await.unwrap(); + let res = rp.query().music_search_main(&query).await?; print_music_search(&res, format, pretty, txt); } Some(MusicSearchCategory::Tracks) => { - let mut res = rp.query().music_search_tracks(&query).await.unwrap(); - res.items.extend_limit(rp.query(), limit).await.unwrap(); + let mut res = rp.query().music_search_tracks(&query).await?; + res.items.extend_limit(rp.query(), limit).await?; print_music_search(&res, format, pretty, txt); } Some(MusicSearchCategory::Videos) => { - let mut res = rp.query().music_search_videos(&query).await.unwrap(); - res.items.extend_limit(rp.query(), limit).await.unwrap(); + let mut res = rp.query().music_search_videos(&query).await?; + res.items.extend_limit(rp.query(), limit).await?; print_music_search(&res, format, pretty, txt); } Some(MusicSearchCategory::Artists) => { - let mut res = rp.query().music_search_artists(&query).await.unwrap(); - res.items.extend_limit(rp.query(), limit).await.unwrap(); + let mut res = rp.query().music_search_artists(&query).await?; + res.items.extend_limit(rp.query(), limit).await?; print_music_search(&res, format, pretty, txt); } Some(MusicSearchCategory::Albums) => { - let mut res = rp.query().music_search_albums(&query).await.unwrap(); - res.items.extend_limit(rp.query(), limit).await.unwrap(); + let mut res = rp.query().music_search_albums(&query).await?; + res.items.extend_limit(rp.query(), limit).await?; print_music_search(&res, format, pretty, txt); } Some(MusicSearchCategory::PlaylistsYtm | MusicSearchCategory::PlaylistsCommunity) => { @@ -1094,15 +1131,15 @@ async fn main() { &query, music == Some(MusicSearchCategory::PlaylistsCommunity), ) - .await - .unwrap(); - res.items.extend_limit(rp.query(), limit).await.unwrap(); + .await?; + res.items.extend_limit(rp.query(), limit).await?; print_music_search(&res, format, pretty, txt); } }, Commands::Vdata => { - let vd = rp.query().get_visitor_data().await.unwrap(); + let vd = rp.query().get_visitor_data().await?; println!("{vd}"); } }; + Ok(()) } diff --git a/downloader/src/lib.rs b/downloader/src/lib.rs index 40c2583..53c301c 100644 --- a/downloader/src/lib.rs +++ b/downloader/src/lib.rs @@ -17,9 +17,9 @@ use futures::stream::{self, StreamExt}; use once_cell::sync::Lazy; use rand::Rng; use regex::Regex; -use reqwest::{header, Client, StatusCode}; +use reqwest::{header, Client, StatusCode, Url}; use rustypipe::{ - client::{ClientType, RustyPipe}, + client::{ClientType, RustyPipe, DEFAULT_PLAYER_CLIENT_ORDER}, model::{ traits::{FileFormat, YtEntity}, AudioCodec, TrackItem, VideoCodec, VideoPlayer, @@ -74,6 +74,8 @@ pub struct DownloaderBuilder { audio_tag: bool, #[cfg(feature = "audiotag")] crop_cover: bool, + client_types: Option>, + pot: Option, } struct DownloaderInner { @@ -103,6 +105,10 @@ struct DownloaderInner { /// Crop YT thumbnails to ensure square album covers #[cfg(feature = "audiotag")] crop_cover: bool, + /// Client types for fetching videos + client_types: Option>, + /// Pot token to circumvent bot detection + pot: Option, } /// Download query @@ -120,8 +126,10 @@ pub struct DownloadQuery { filter: Option, /// Target video format video_format: Option, - /// ClientType type for fetching videos - client_type: Option, + /// Client types for fetching videos + client_types: Option>, + /// Pot token to circumvent bot detection + pot: Option, } /// Video to be downloaded @@ -287,6 +295,8 @@ impl Default for DownloaderBuilder { audio_tag: false, #[cfg(feature = "audiotag")] crop_cover: false, + client_types: None, + pot: None, } } } @@ -384,6 +394,38 @@ impl DownloaderBuilder { self } + /// Set the [`ClientType`] used to fetch the YT player + #[must_use] + pub fn client_type(mut self, client_type: ClientType) -> Self { + self.client_types = Some(vec![client_type]); + self + } + + /// Set a list of client types used to fetch the YT player + /// + /// The clients are used in the given order. If a client cannot fetch the requested video, + /// an attempt is made with the next one. + #[must_use] + pub fn client_types>>(mut self, client_types: T) -> Self { + self.client_types = Some(client_types.into()); + self + } + + /// Set the `pot` token to circumvent bot detection + /// + /// YouTube has implemented the token to prevent other clients from downloading YouTube videos. + /// The token is generated using YouTube's botguard. Therefore you need a full browser environment + /// to obtain one. + /// + /// The Invidious project has created a script to extract this token: + /// + /// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients. + #[must_use] + pub fn pot>(mut self, pot: S) -> Self { + self.pot = Some(pot.into()); + self + } + /// Create a new, configured [`Downloader`] instance pub fn build(self) -> Downloader { self.build_with_client( @@ -417,6 +459,8 @@ impl DownloaderBuilder { audio_tag: self.audio_tag, #[cfg(feature = "audiotag")] crop_cover: self.crop_cover, + client_types: self.client_types, + pot: self.pot, }), } } @@ -450,7 +494,8 @@ impl Downloader { progress: None, filter: None, video_format: None, - client_type: None, + client_types: None, + pot: None, } } @@ -586,7 +631,32 @@ impl DownloadQuery { /// Set the [`ClientType`] used to fetch the YT player #[must_use] pub fn client_type(mut self, client_type: ClientType) -> Self { - self.client_type = Some(client_type); + self.client_types = Some(vec![client_type]); + self + } + + /// Set a list of client types used to fetch the YT player + /// + /// The clients are used in the given order. If a client cannot fetch the requested video, + /// an attempt is made with the next one. + #[must_use] + pub fn client_types>>(mut self, client_types: T) -> Self { + self.client_types = Some(client_types.into()); + self + } + + /// Set the `pot` token to circumvent bot detection + /// + /// YouTube has implemented the token to prevent other clients from downloading YouTube videos. + /// The token is generated using YouTube's botguard. Therefore you need a full browser environment + /// to obtain one. + /// + /// The Invidious project has created a script to extract this token: + /// + /// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients. + #[must_use] + pub fn pot>(mut self, pot: S) -> Self { + self.pot = Some(pot.into()); self } @@ -594,9 +664,10 @@ impl DownloadQuery { /// /// If no download path is set, the video is downloaded to the current directory /// with a filename created by this template: `{track} {title} [{id}]`. - #[tracing::instrument(skip(self), fields(id = self.video.id))] + #[tracing::instrument(skip(self), level="error", fields(id = self.video.id))] pub async fn download(&self) -> Result { let mut last_err = None; + let mut failed_client = None; // Progress bar #[cfg(feature = "indicatif")] @@ -613,14 +684,19 @@ impl DownloadQuery { let err = match self .download_attempt( n, + failed_client, #[cfg(feature = "indicatif")] &pb, ) .await { Ok(res) => return Ok(res), + Err(DownloadError::Forbidden(c)) => { + failed_client = Some(c); + DownloadError::Forbidden(c) + } Err(DownloadError::Http(e)) => { - if !e.is_timeout() && e.status() != Some(StatusCode::FORBIDDEN) { + if !e.is_timeout() { return Err(DownloadError::Http(e)); } DownloadError::Http(e) @@ -640,6 +716,7 @@ impl DownloadQuery { async fn download_attempt( &self, #[allow(unused_variables)] n: u32, + failed_client: Option, #[cfg(feature = "indicatif")] pb: &Option, ) -> Result { let filter = self.filter.as_ref().unwrap_or(&self.dl.i.filter); @@ -672,19 +749,45 @@ impl DownloadQuery { }; #[cfg(feature = "indicatif")] if let Some(pb) = pb { - pb.set_message(format!( - "Fetching player data for {}{}", - self.video.name.as_deref().unwrap_or_default(), - attempt_suffix - )) + if let Some(n) = &self.video.name { + pb.set_message(format!("Fetching player data for {n}{attempt_suffix}")); + } else { + pb.set_message(format!("Fetching player data{attempt_suffix}")); + } } let q = self.dl.i.rp.query(); - let player_data = match self.client_type { - Some(client_type) => q.player_from_client(&self.video.id, client_type).await?, - None => q.player(&self.video.id).await?, - }; + + let mut client_types = Cow::Borrowed( + self.client_types + .as_ref() + .or(self.dl.i.client_types.as_ref()) + .map(Vec::as_slice) + .unwrap_or(DEFAULT_PLAYER_CLIENT_ORDER), + ); + + // If the last download failed, try another client if possible + if let Some(failed_client) = failed_client { + if let Some(pos) = client_types.iter().position(|c| c == &failed_client) { + let p2 = pos + 1; + if p2 < client_types.len() { + let mut v = client_types[p2..].to_vec(); + v.extend(&client_types[..p2]); + client_types = v.into(); + } + } + } + + let player_data = q.player_from_clients(&self.video.id, &client_types).await?; let user_agent = q.user_agent(player_data.client_type); + let pot = if matches!( + player_data.client_type, + ClientType::Desktop | ClientType::DesktopMusic + ) { + self.pot.as_deref().or(self.dl.i.pot.as_deref()) + } else { + None + }; // Select streams to download let (video, audio) = player_data.select_video_audio_stream(filter); @@ -762,10 +865,19 @@ impl DownloadQuery { &downloads, &self.dl.i.http, &user_agent, + pot, #[cfg(feature = "indicatif")] pb.clone(), ) - .await?; + .await + .map_err(|e| { + if let DownloadError::Http(e) = &e { + if e.status() == Some(StatusCode::FORBIDDEN) { + return DownloadError::Forbidden(player_data.client_type); + } + } + e + })?; #[cfg(feature = "indicatif")] if let Some(pb) = &pb { @@ -1006,6 +1118,7 @@ async fn download_single_file( output: &Path, http: &Client, user_agent: &str, + pot: Option<&str>, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { // Check if file is already downloaded @@ -1102,6 +1215,7 @@ async fn download_single_file( size.unwrap(), offset, user_agent, + pot, #[cfg(feature = "indicatif")] pb, ) @@ -1209,6 +1323,7 @@ async fn download_chunks_by_header( // Use the `range` url parameter to download a stream in chunks. // This ist used by YouTube's web player. The file size // must be known beforehand (it is included in the stream url). +#[allow(clippy::too_many_arguments)] async fn download_chunks_by_param( http: &Client, file: &mut File, @@ -1216,6 +1331,7 @@ async fn download_chunks_by_param( size: u64, offset: u64, user_agent: &str, + pot: Option<&str>, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { let mut offset = offset; @@ -1228,8 +1344,15 @@ async fn download_chunks_by_param( let range = get_download_range(offset, Some(size)); tracing::debug!("Fetching range {}-{}", range.start, range.end); + let mut urlp = + Url::parse_with_params(url, [("range", &format!("{}-{}", range.start, range.end))]) + .map_err(|e| DownloadError::Progressive(format!("url parsing: {e}").into()))?; + if let Some(pot) = pot { + urlp.query_pairs_mut().append_pair("pot", pot); + } + let res = http - .get(format!("{}&range={}-{}", url, range.start, range.end)) + .get(urlp) .header(header::USER_AGENT, user_agent) .header(header::ORIGIN, "https://www.youtube.com") .header(header::REFERER, "https://www.youtube.com/") @@ -1277,6 +1400,7 @@ async fn download_streams( downloads: &Vec, http: &Client, user_agent: &str, + pot: Option<&str>, #[cfg(feature = "indicatif")] pb: Option, ) -> Result<()> { let n = downloads.len(); @@ -1288,6 +1412,7 @@ async fn download_streams( &d.file, http, user_agent, + pot, #[cfg(feature = "indicatif")] pb.clone(), ) diff --git a/downloader/src/util.rs b/downloader/src/util.rs index 3934e2f..5069c96 100644 --- a/downloader/src/util.rs +++ b/downloader/src/util.rs @@ -1,6 +1,7 @@ use std::{borrow::Cow, collections::BTreeMap, path::PathBuf}; use reqwest::Url; +use rustypipe::client::ClientType; /// Error from the video downloader #[derive(thiserror::Error, Debug)] @@ -12,6 +13,9 @@ pub enum DownloadError { /// Error from the HTTP client #[error("http error: {0}")] Http(#[from] reqwest::Error), + /// 403 error trying to download video + #[error("YouTube returned 403 error")] + Forbidden(ClientType), /// File IO error #[error(transparent)] Io(#[from] std::io::Error), diff --git a/notes/po_token.md b/notes/po_token.md new file mode 100644 index 0000000..26064e6 --- /dev/null +++ b/notes/po_token.md @@ -0,0 +1,30 @@ +# About the new `pot` token + +YouTube has implemented a new method to prevent downloaders and alternative clients from accessing +their videos. Now requests to YouTube's video servers require a `pot` URL parameter. + +It is currently only required in the web player. The YTM and embedded player sends the token, too, but does not require it (this may change in the future). + +The TV player does not use the token at all and is currently the best workaround. The only downside +is that the TV player does not return any video metadata like title and description text. + +The first part of a video file (range: 0-1007959 bytes) can be downloaded without the token. +Requesting more of the file requires the pot token to be set, otherwise YouTube responds with a 403 +error. + +The pot token is base64-formatted and usually starts with a M + +`MnToZ2brHmyo0ehfKtK_EWUq60dPYDXksNX_UsaniM_Uj6zbtiIZujCHY02hr7opxB_n3XHetJQCBV9cnNHovuhvDqrjfxsKR-sjn-eIxqv3qOZKphvyDpQzlYBnT2AXK41R-ti6iPonrvlvKIASNmYX2lhsEg==` + +The token is generated from YouTubes Botguard script. The token is bound to the visitor data cookie +used to fetch the player data. + +This feature has been A/B-tested for a few weeks. During that time, refetching the player in case +of a 403 download error often made things work again. As of 08.08.2024 this new feature seems to be +stabilized and retrying requests does not work any more. + +## Getting a `pot` token + +You need a real browser environment to run YouTube's botguard and obtain a pot token. The Invidious project has created a script to +. +The script opens YouTube's embedded video player, starts playback and extracts the visitor data diff --git a/src/client/channel.rs b/src/client/channel.rs index 254a39f..a688ee3 100644 --- a/src/client/channel.rs +++ b/src/client/channel.rs @@ -82,7 +82,7 @@ impl RustyPipeQuery { } /// Get the videos from a YouTube channel - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_videos + Debug>( &self, channel_id: S, @@ -94,7 +94,7 @@ impl RustyPipeQuery { /// Get a ordered list of videos from a YouTube channel /// /// This function does not return channel metadata. - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_videos_order + Debug>( &self, channel_id: S, @@ -105,7 +105,7 @@ impl RustyPipeQuery { } /// Get the videos of the given tab (Shorts, Livestreams) from a YouTube channel - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_videos_tab + Debug>( &self, channel_id: S, @@ -118,7 +118,7 @@ impl RustyPipeQuery { /// Get a ordered list of videos from the given tab (Shorts, Livestreams) of a YouTube channel /// /// This function does not return channel metadata. - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_videos_tab_order + Debug>( &self, channel_id: S, @@ -136,7 +136,7 @@ impl RustyPipeQuery { } /// Search the videos of a channel - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_search + Debug, S2: AsRef + Debug>( &self, channel_id: S, @@ -152,7 +152,7 @@ impl RustyPipeQuery { } /// Get the playlists of a channel - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_playlists + Debug>( &self, channel_id: S, @@ -177,7 +177,7 @@ impl RustyPipeQuery { } /// Get additional metadata from the *About* tab of a channel - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_info + Debug>( &self, channel_id: S, diff --git a/src/client/channel_rss.rs b/src/client/channel_rss.rs index a2f0db3..b28a802 100644 --- a/src/client/channel_rss.rs +++ b/src/client/channel_rss.rs @@ -18,7 +18,7 @@ impl RustyPipeQuery { /// for checking a lot of channels or implementing a subscription feed. /// /// The downside of using the RSS feed is that it does not provide video durations. - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn channel_rss + Debug>( &self, channel_id: S, diff --git a/src/client/mod.rs b/src/client/mod.rs index a8bd5d4..3bbb724 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -216,6 +216,17 @@ static CLIENT_VERSION_REGEX: Lazy = static VISITOR_DATA_REGEX: Lazy = Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap()); +/// Default order of client types when fetching player data +/// +/// The order may change in the future in case YouTube applies changes to their +/// platform that disable a client or make it less reliable. +pub const DEFAULT_PLAYER_CLIENT_ORDER: &[ClientType] = &[ + ClientType::Tv, + ClientType::TvHtml5Embed, + ClientType::Android, + ClientType::Ios, +]; + /// The RustyPipe client used to access YouTube's API /// /// RustyPipe uses an [`Arc`] internally, so if you are using the client diff --git a/src/client/music_charts.rs b/src/client/music_charts.rs index 1075913..7ad6149 100644 --- a/src/client/music_charts.rs +++ b/src/client/music_charts.rs @@ -32,7 +32,7 @@ struct FormData { impl RustyPipeQuery { /// Get the YouTube Music charts for a given country - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_charts(&self, country: Option) -> Result { let context = self.get_context(ClientType::DesktopMusic, true, None).await; let request_body = QCharts { diff --git a/src/client/music_details.rs b/src/client/music_details.rs index 919b07f..e5ff5f5 100644 --- a/src/client/music_details.rs +++ b/src/client/music_details.rs @@ -40,7 +40,7 @@ struct QRadio<'a> { impl RustyPipeQuery { /// Get the metadata of a YouTube music track - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_details + Debug>( &self, video_id: S, @@ -68,7 +68,7 @@ impl RustyPipeQuery { /// Get the lyrics of a YouTube music track /// /// The `lyrics_id` has to be obtained using [`RustyPipeQuery::music_details`]. - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_lyrics + Debug>(&self, lyrics_id: S) -> Result { let lyrics_id = lyrics_id.as_ref(); let context = self.get_context(ClientType::DesktopMusic, true, None).await; @@ -90,7 +90,7 @@ impl RustyPipeQuery { /// Get related items (tracks, playlists, artists) to a YouTube Music track /// /// The `related_id` has to be obtained using [`RustyPipeQuery::music_details`]. - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_related + Debug>( &self, related_id: S, @@ -115,7 +115,7 @@ impl RustyPipeQuery { /// Get a YouTube Music radio (a dynamically generated playlist) /// /// The `radio_id` can be obtained using [`RustyPipeQuery::music_artist`] to get an artist's radio. - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_radio + Debug>( &self, radio_id: S, @@ -146,7 +146,7 @@ impl RustyPipeQuery { } /// Get a YouTube Music radio (a dynamically generated playlist) for a track - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_radio_track + Debug>( &self, video_id: S, @@ -156,7 +156,7 @@ impl RustyPipeQuery { } /// Get a YouTube Music radio (a dynamically generated playlist) for a playlist - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_radio_playlist + Debug>( &self, playlist_id: S, diff --git a/src/client/music_genres.rs b/src/client/music_genres.rs index 16f3b53..627b93b 100644 --- a/src/client/music_genres.rs +++ b/src/client/music_genres.rs @@ -13,7 +13,7 @@ use super::{ impl RustyPipeQuery { /// Get a list of moods and genres from YouTube Music - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_genres(&self) -> Result, Error> { let context = self.get_context(ClientType::DesktopMusic, true, None).await; let request_body = QBrowse { @@ -32,7 +32,7 @@ impl RustyPipeQuery { } /// Get the playlists from a YouTube Music genre - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_genre + Debug>( &self, genre_id: S, diff --git a/src/client/music_new.rs b/src/client/music_new.rs index 68251e6..cfc5b6d 100644 --- a/src/client/music_new.rs +++ b/src/client/music_new.rs @@ -11,7 +11,7 @@ use super::{response, ClientType, MapRespCtx, MapResponse, QBrowse, RustyPipeQue impl RustyPipeQuery { /// Get the new albums that were released on YouTube Music - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_new_albums(&self) -> Result, Error> { let context = self.get_context(ClientType::DesktopMusic, true, None).await; let request_body = QBrowse { @@ -30,7 +30,7 @@ impl RustyPipeQuery { } /// Get the new music videos that were released on YouTube Music - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_new_videos(&self) -> Result, Error> { let context = self.get_context(ClientType::DesktopMusic, true, None).await; let request_body = QBrowse { diff --git a/src/client/music_playlist.rs b/src/client/music_playlist.rs index 8261b2b..0d1ec26 100644 --- a/src/client/music_playlist.rs +++ b/src/client/music_playlist.rs @@ -22,7 +22,7 @@ use super::{ impl RustyPipeQuery { /// Get a playlist from YouTube Music - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_playlist + Debug>( &self, playlist_id: S, @@ -54,7 +54,7 @@ impl RustyPipeQuery { } /// Get an album from YouTube Music - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_album + Debug>( &self, album_id: S, diff --git a/src/client/music_search.rs b/src/client/music_search.rs index f443229..ea197c7 100644 --- a/src/client/music_search.rs +++ b/src/client/music_search.rs @@ -126,7 +126,7 @@ impl RustyPipeQuery { } /// Get YouTube Music search suggestions - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn music_search_suggestion + Debug>( &self, query: S, diff --git a/src/client/pagination.rs b/src/client/pagination.rs index 94c44e8..1251574 100644 --- a/src/client/pagination.rs +++ b/src/client/pagination.rs @@ -14,7 +14,7 @@ use super::{response, ClientType, MapRespCtx, MapResponse, QContinuation, RustyP impl RustyPipeQuery { /// Get more YouTube items from the given continuation token and endpoint - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn continuation + Debug>( &self, ctoken: S, diff --git a/src/client/player.rs b/src/client/player.rs index b763bb5..b78a15c 100644 --- a/src/client/player.rs +++ b/src/client/player.rs @@ -25,6 +25,7 @@ use super::{ player::{self, Format}, }, ClientType, MapRespCtx, MapResponse, MapResult, RustyPipeQuery, YTContext, + DEFAULT_PLAYER_CLIENT_ORDER, }; #[derive(Debug, Serialize)] @@ -65,7 +66,7 @@ struct QContentPlaybackContext<'a> { impl RustyPipeQuery { /// Get YouTube player data (video/audio streams + basic metadata) pub async fn player + Debug>(&self, video_id: S) -> Result { - self.player_from_clients(video_id, &[ClientType::Desktop, ClientType::TvHtml5Embed]) + self.player_from_clients(video_id, DEFAULT_PLAYER_CLIENT_ORDER) .await } @@ -113,7 +114,7 @@ impl RustyPipeQuery { } /// Get YouTube player data (video/audio streams + basic metadata) using the specified client - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn player_from_client + Debug>( &self, video_id: S, diff --git a/src/client/playlist.rs b/src/client/playlist.rs index ecbf205..1ca0ab3 100644 --- a/src/client/playlist.rs +++ b/src/client/playlist.rs @@ -17,7 +17,7 @@ use super::{response, ClientType, MapRespCtx, MapResponse, MapResult, QBrowse, R impl RustyPipeQuery { /// Get a YouTube playlist - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn playlist + Debug>(&self, playlist_id: S) -> Result { let playlist_id = playlist_id.as_ref(); // YTM playlists require visitor data for continuations to work diff --git a/src/client/search.rs b/src/client/search.rs index 03529f4..50f3b33 100644 --- a/src/client/search.rs +++ b/src/client/search.rs @@ -24,7 +24,7 @@ struct QSearch<'a> { impl RustyPipeQuery { /// Search YouTube - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn search + Debug>( &self, query: S, @@ -48,7 +48,7 @@ impl RustyPipeQuery { } /// Search YouTube using the given [`SearchFilter`] - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn search_filter + Debug>( &self, query: S, @@ -73,7 +73,7 @@ impl RustyPipeQuery { } /// Get YouTube search suggestions - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn search_suggestion + Debug>( &self, query: S, diff --git a/src/client/trends.rs b/src/client/trends.rs index 0a46bc5..a445c91 100644 --- a/src/client/trends.rs +++ b/src/client/trends.rs @@ -16,7 +16,7 @@ use super::{ impl RustyPipeQuery { /// Get the videos from the YouTube startpage - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn startpage(&self) -> Result, Error> { let context = self.get_context(ClientType::Desktop, true, None).await; let request_body = QBrowse { @@ -35,7 +35,7 @@ impl RustyPipeQuery { } /// Get the videos from the YouTube trending page - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn trending(&self) -> Result, Error> { let context = self.get_context(ClientType::Desktop, true, None).await; let request_body = QBrowseParams { diff --git a/src/client/url_resolver.rs b/src/client/url_resolver.rs index ab69903..e771d6e 100644 --- a/src/client/url_resolver.rs +++ b/src/client/url_resolver.rs @@ -58,7 +58,7 @@ impl RustyPipeQuery { /// ); /// # }); /// ``` - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn resolve_url + Debug>( self, url: S, @@ -236,7 +236,7 @@ impl RustyPipeQuery { /// ); /// # }); /// ``` - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn resolve_string + Debug>( self, s: S, diff --git a/src/client/video_details.rs b/src/client/video_details.rs index 1d8a9db..a1fb93e 100644 --- a/src/client/video_details.rs +++ b/src/client/video_details.rs @@ -31,7 +31,7 @@ struct QVideo<'a> { impl RustyPipeQuery { /// Get the metadata for a video - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn video_details + Debug>( &self, video_id: S, @@ -56,7 +56,7 @@ impl RustyPipeQuery { } /// Get the comments for a video using the continuation token obtained from `rusty_pipe_query.video_details()` - #[tracing::instrument(skip(self))] + #[tracing::instrument(skip(self), level = "error")] pub async fn video_comments + Debug>( &self, ctoken: S, diff --git a/tests/youtube.rs b/tests/youtube.rs index d1e97f6..b6a0577 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -137,8 +137,11 @@ async fn get_player_from_client(#[case] client_type: ClientType, rp: RustyPipe) assert_eq!(audio.format, AudioFormat::Webm); assert_eq!(audio.codec, AudioCodec::Opus); - check_video_stream(video).await; - check_video_stream(audio).await; + // Desktop client now requires pot token so the streams cannot be tested here + if client_type != ClientType::Desktop { + check_video_stream(video).await; + check_video_stream(audio).await; + } } assert!(player_data.expires_in_seconds > 10000); @@ -246,19 +249,25 @@ async fn get_player( let details = player_data.details; assert_eq!(details.id, id); - assert_eq!(details.name.expect("name"), name); - let desc = details.description.expect("description"); - assert!(desc.contains(description), "description: {desc}"); + if let Some(n) = &details.name { + assert_eq!(n, name); + } + if let Some(desc) = &details.description { + assert!(desc.contains(description), "description: {desc}"); + } assert_eq!(details.duration, duration); assert_eq!(details.channel_id, channel_id); - assert_eq!(details.channel_name.expect("channel name"), channel_name); - assert_gte(details.view_count.expect("view count"), views, "views"); + if let Some(cn) = &details.channel_name { + assert_eq!(cn, channel_name); + } + if let Some(vc) = details.view_count { + assert_gte(vc, views, "views"); + } assert_eq!(details.is_live, is_live); assert_eq!(details.is_live_content, is_live_content); if is_live { - assert!(player_data.hls_manifest_url.is_some()); - assert!(player_data.dash_manifest_url.is_some()); + assert!(player_data.hls_manifest_url.is_some() || player_data.dash_manifest_url.is_some()); } else { assert!(!player_data.video_only_streams.is_empty()); assert!(!player_data.audio_streams.is_empty());