Compare commits

...

8 commits

22 changed files with 396 additions and 179 deletions

View file

@ -41,7 +41,7 @@ rustls-tls-native-roots = [
]
[dependencies]
rustypipe.workspace = true
rustypipe = { workspace = true, features = ["rss"] }
rustypipe-downloader.workspace = true
reqwest.workspace = true
tokio = { workspace = true, features = ["macros", "rt-multi-thread"] }

View file

@ -1,7 +1,12 @@
#![doc = include_str!("../README.md")]
#![warn(clippy::todo, clippy::dbg_macro)]
use std::{path::PathBuf, str::FromStr, time::Duration};
use std::{
path::PathBuf,
str::FromStr,
sync::{atomic::AtomicUsize, Arc},
time::Duration,
};
use clap::{Parser, Subcommand, ValueEnum};
use futures::stream::{self, StreamExt};
@ -89,20 +94,23 @@ enum Commands {
#[clap(short, long)]
resolution: Option<u32>,
/// Download only the audio track
#[clap(long)]
#[clap(short, long)]
audio: bool,
/// Number of videos downloaded in parallel
#[clap(short, long, default_value_t = 8)]
parallel: usize,
/// Use YouTube Music for downloading playlists
#[clap(long)]
#[clap(short, long)]
music: bool,
/// Limit the number of videos to download
#[clap(long, default_value_t = 1000)]
#[clap(short, long, default_value_t = 1000)]
limit: usize,
/// YT Client used to fetch player data
#[clap(short, long)]
client_type: Option<Vec<ClientTypeArg>>,
/// `pot` token to circumvent bot detection
#[clap(long)]
client_type: Option<PlayerType>,
pot: Option<String>,
},
/// Extract video, playlist, album or channel data
Get {
@ -115,17 +123,20 @@ enum Commands {
#[clap(long)]
pretty: bool,
/// Output as text
#[clap(long)]
#[clap(short, long)]
txt: bool,
/// Limit the number of items to fetch
#[clap(long, default_value_t = 20)]
#[clap(short, long, default_value_t = 20)]
limit: usize,
/// Channel tab
#[clap(long, default_value = "videos")]
tab: ChannelTab,
/// Use YouTube Music
#[clap(long)]
#[clap(short, long)]
music: bool,
/// Use the RSS feed of a channel
#[clap(long)]
rss: bool,
/// Get comments
#[clap(long)]
comments: Option<CommentsOrder>,
@ -136,8 +147,8 @@ enum Commands {
#[clap(long)]
player: bool,
/// YT Client used to fetch player data
#[clap(long)]
client_type: Option<PlayerType>,
#[clap(short, long)]
client_type: Option<ClientTypeArg>,
},
/// Search YouTube
Search {
@ -150,10 +161,10 @@ enum Commands {
#[clap(long)]
pretty: bool,
/// Output as text
#[clap(long)]
#[clap(short, long)]
txt: bool,
/// Limit the number of items to fetch
#[clap(long, default_value_t = 20)]
#[clap(short, long, default_value_t = 20)]
limit: usize,
/// Filter results by item type
#[clap(long)]
@ -171,9 +182,10 @@ enum Commands {
#[clap(long)]
channel: Option<String>,
/// YouTube Music search filter
#[clap(long)]
#[clap(short, long)]
music: Option<MusicSearchCategory>,
},
/// Get a YouTube visitor data cookie
Vdata,
}
@ -251,7 +263,7 @@ enum MusicSearchCategory {
}
#[derive(Copy, Clone, PartialEq, Eq, ValueEnum)]
enum PlayerType {
enum ClientTypeArg {
Desktop,
Tv,
TvEmbed,
@ -301,14 +313,14 @@ impl From<SearchOrder> for search_filter::Order {
}
}
impl From<PlayerType> for ClientType {
fn from(value: PlayerType) -> Self {
impl From<ClientTypeArg> for ClientType {
fn from(value: ClientTypeArg) -> Self {
match value {
PlayerType::Desktop => Self::Desktop,
PlayerType::TvEmbed => Self::TvHtml5Embed,
PlayerType::Tv => Self::Tv,
PlayerType::Android => Self::Android,
PlayerType::Ios => Self::Ios,
ClientTypeArg::Desktop => Self::Desktop,
ClientTypeArg::TvEmbed => Self::TvHtml5Embed,
ClientTypeArg::Tv => Self::Tv,
ClientTypeArg::Android => Self::Android,
ClientTypeArg::Ios => Self::Ios,
}
}
}
@ -415,11 +427,11 @@ async fn download_video(
dl: &Downloader,
id: &str,
target: &DownloadTarget,
client_type: Option<PlayerType>,
client_types: Option<&[ClientType]>,
) {
let mut q = target.apply(dl.id(id));
if let Some(client_type) = client_type {
q = q.client_type(client_type.into());
if let Some(client_types) = client_types {
q = q.client_types(client_types);
}
let res = q.download().await;
if let Err(e) = res {
@ -432,9 +444,9 @@ async fn download_videos(
videos: Vec<DownloadVideo>,
target: &DownloadTarget,
parallel: usize,
client_type: Option<PlayerType>,
client_types: Option<&[ClientType]>,
multi: MultiProgress,
) {
) -> anyhow::Result<()> {
// Indicatif setup
let main = multi.add(ProgressBar::new(
videos.len().try_into().unwrap_or_default(),
@ -448,27 +460,38 @@ async fn download_videos(
);
main.tick();
let n_failed = Arc::new(AtomicUsize::default());
stream::iter(videos)
.for_each_concurrent(parallel, |video| {
let dl = dl.clone();
let main = main.clone();
let id = video.id().to_owned();
let n_failed = n_failed.clone();
let mut q = target.apply(dl.video(video));
if let Some(client_type) = client_type {
q = q.client_type(client_type.into());
if let Some(client_types) = client_types {
q = q.client_types(client_types);
}
async move {
if let Err(e) = q.download().await {
if !matches!(e, DownloadError::Exists(_)) {
tracing::error!("[{id}]: {e}");
n_failed.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
}
} else {
main.inc(1);
}
main.inc(1);
}
})
.await;
let n_failed = n_failed.load(std::sync::atomic::Ordering::Relaxed);
if n_failed > 0 {
anyhow::bail!("{n_failed} downloads failed");
}
Ok(())
}
/// Stderr writer that suspends the progress bars before printing logs
@ -495,6 +518,14 @@ impl std::io::Write for ProgWriter {
#[tokio::main]
async fn main() {
if let Err(e) = run().await {
println!("{}", "Error:".red().bold());
println!("{}", e);
std::process::exit(1);
}
}
async fn run() -> anyhow::Result<()> {
let cli = Cli::parse();
let multi = MultiProgress::new();
@ -524,7 +555,7 @@ async fn main() {
if let Some(country) = cli.country {
rp = rp.country(Country::from_str(&country.to_ascii_uppercase()).expect("invalid country"));
}
let rp = rp.build().unwrap();
let rp = rp.build()?;
match cli.command {
Commands::Download {
@ -536,8 +567,9 @@ async fn main() {
music,
limit,
client_type,
pot,
} => {
let url_target = rp.query().resolve_string(&id, false).await.unwrap();
let url_target = rp.query().resolve_string(&id, false).await?;
let mut filter = StreamFilter::new();
if let Some(res) = resolution {
@ -555,20 +587,21 @@ async fn main() {
dl = dl.audio_tag().crop_cover();
filter = filter.no_video();
}
if let Some(pot) = pot {
dl = dl.pot(pot);
}
let dl = dl.stream_filter(filter).build();
let cts = client_type.map(|c| c.into_iter().map(ClientType::from).collect::<Vec<_>>());
match url_target {
UrlTarget::Video { id, .. } => {
download_video(&dl, &id, &target, client_type).await;
download_video(&dl, &id, &target, cts.as_deref()).await;
}
UrlTarget::Channel { id } => {
target.assert_dir();
let mut channel = rp.query().channel_videos(id).await.unwrap();
channel
.content
.extend_limit(&rp.query(), limit)
.await
.unwrap();
let mut channel = rp.query().channel_videos(id).await?;
channel.content.extend_limit(&rp.query(), limit).await?;
let videos = channel
.content
.items
@ -576,17 +609,13 @@ async fn main() {
.take(limit)
.map(|v| DownloadVideo::from_entity(&v))
.collect();
download_videos(&dl, videos, &target, parallel, client_type, multi).await;
download_videos(&dl, videos, &target, parallel, cts.as_deref(), multi).await?;
}
UrlTarget::Playlist { id } => {
target.assert_dir();
let videos = if music {
let mut playlist = rp.query().music_playlist(id).await.unwrap();
playlist
.tracks
.extend_limit(&rp.query(), limit)
.await
.unwrap();
let mut playlist = rp.query().music_playlist(id).await?;
playlist.tracks.extend_limit(&rp.query(), limit).await?;
playlist
.tracks
.items
@ -595,12 +624,8 @@ async fn main() {
.map(|v| DownloadVideo::from_track(&v))
.collect()
} else {
let mut playlist = rp.query().playlist(id).await.unwrap();
playlist
.videos
.extend_limit(&rp.query(), limit)
.await
.unwrap();
let mut playlist = rp.query().playlist(id).await?;
playlist.videos.extend_limit(&rp.query(), limit).await?;
playlist
.videos
.items
@ -609,18 +634,18 @@ async fn main() {
.map(|v| DownloadVideo::from_entity(&v))
.collect()
};
download_videos(&dl, videos, &target, parallel, client_type, multi).await;
download_videos(&dl, videos, &target, parallel, cts.as_deref(), multi).await?;
}
UrlTarget::Album { id } => {
target.assert_dir();
let album = rp.query().music_album(id).await.unwrap();
let album = rp.query().music_album(id).await?;
let videos = album
.tracks
.into_iter()
.take(limit)
.map(|v| DownloadVideo::from_track(&v))
.collect();
download_videos(&dl, videos, &target, parallel, client_type, multi).await;
download_videos(&dl, videos, &target, parallel, cts.as_deref(), multi).await?;
}
}
}
@ -632,22 +657,23 @@ async fn main() {
limit,
tab,
music,
rss,
comments,
lyrics,
player,
client_type,
} => {
let target = rp.query().resolve_string(&id, false).await.unwrap();
let target = rp.query().resolve_string(&id, false).await?;
match target {
UrlTarget::Video { id, .. } => {
if lyrics {
let details = rp.query().music_details(&id).await.unwrap();
let details = rp.query().music_details(&id).await?;
match details.lyrics_id {
Some(lyrics_id) => {
let lyrics = rp.query().music_lyrics(lyrics_id).await.unwrap();
let lyrics = rp.query().music_lyrics(lyrics_id).await?;
if txt {
println!("{}\n\n{}", lyrics.body, lyrics.footer);
println!("{}\n\n{}", lyrics.body, lyrics.footer.blue());
} else {
print_data(&lyrics, format, pretty);
}
@ -655,21 +681,26 @@ async fn main() {
None => eprintln!("no lyrics found"),
}
} else if music {
let details = rp.query().music_details(&id).await.unwrap();
let details = rp.query().music_details(&id).await?;
if txt {
if details.track.is_video {
println!("[MV]");
anstream::println!("{}", "[MV]".on_green().black());
} else {
println!("[Track]");
anstream::println!("{}", "[Track]".on_green().black());
}
print!("{} [{}]", details.track.name, details.track.id);
anstream::print!(
"{} [{}]",
details.track.name.green().bold(),
details.track.id
);
print_duration(details.track.duration);
println!();
print_artists(&details.track.artists);
println!();
if !details.track.is_video {
println!(
"Album: {}",
anstream::println!(
"{} {}",
"Album:".blue(),
details
.track
.album
@ -679,7 +710,7 @@ async fn main() {
)
}
if let Some(view_count) = details.track.view_count {
println!("Views: {view_count}");
anstream::println!("{} {}", "Views:".blue(), view_count);
}
} else {
print_data(&details, format, pretty);
@ -689,26 +720,20 @@ async fn main() {
rp.query().player_from_client(&id, client_type.into()).await
} else {
rp.query().player(&id).await
}
.unwrap();
}?;
print_data(&player, format, pretty);
} else {
let mut details = rp.query().video_details(&id).await.unwrap();
let mut details = rp.query().video_details(&id).await?;
match comments {
Some(CommentsOrder::Top) => {
details
.top_comments
.extend_limit(rp.query(), limit)
.await
.unwrap();
details.top_comments.extend_limit(rp.query(), limit).await?;
}
Some(CommentsOrder::Latest) => {
details
.latest_comments
.extend_limit(rp.query(), limit)
.await
.unwrap();
.await?;
}
None => {}
}
@ -786,7 +811,7 @@ async fn main() {
}
UrlTarget::Channel { id } => {
if music {
let artist = rp.query().music_artist(&id, true).await.unwrap();
let artist = rp.query().music_artist(&id, true).await?;
if txt {
anstream::println!(
"{}\n{} [{}]",
@ -836,6 +861,31 @@ async fn main() {
} else {
print_data(&artist, format, pretty);
}
} else if rss {
let rss = rp.query().channel_rss(&id).await?;
if txt {
anstream::println!(
"{}\n{} [{}]\n{} {}",
"[Channel RSS]".on_green().black(),
rss.name.green().bold(),
rss.id,
"Created on:".blue(),
rss.create_date,
);
if let Some(v) = rss.videos.first() {
anstream::println!(
"{} {} [{}]",
"Latest video:".blue(),
v.publish_date,
v.id
);
}
println!();
print_entities(&rss.videos);
} else {
print_data(&rss, format, pretty);
}
} else {
match tab {
ChannelTab::Videos | ChannelTab::Shorts | ChannelTab::Live => {
@ -846,13 +896,9 @@ async fn main() {
_ => unreachable!(),
};
let mut channel =
rp.query().channel_videos_tab(&id, video_tab).await.unwrap();
rp.query().channel_videos_tab(&id, video_tab).await?;
channel
.content
.extend_limit(rp.query(), limit)
.await
.unwrap();
channel.content.extend_limit(rp.query(), limit).await?;
if txt {
anstream::print!(
@ -874,7 +920,7 @@ async fn main() {
}
}
ChannelTab::Playlists => {
let channel = rp.query().channel_playlists(&id).await.unwrap();
let channel = rp.query().channel_playlists(&id).await?;
if txt {
anstream::println!(
@ -894,7 +940,7 @@ async fn main() {
}
}
ChannelTab::Info => {
let info = rp.query().channel_info(&id).await.unwrap();
let info = rp.query().channel_info(&id).await?;
if txt {
anstream::println!(
@ -930,12 +976,8 @@ async fn main() {
}
UrlTarget::Playlist { id } => {
if music {
let mut playlist = rp.query().music_playlist(&id).await.unwrap();
playlist
.tracks
.extend_limit(rp.query(), limit)
.await
.unwrap();
let mut playlist = rp.query().music_playlist(&id).await?;
playlist.tracks.extend_limit(rp.query(), limit).await?;
if txt {
anstream::println!(
"{}\n{} [{}]\n{} {}",
@ -959,12 +1001,8 @@ async fn main() {
print_data(&playlist, format, pretty);
}
} else {
let mut playlist = rp.query().playlist(&id).await.unwrap();
playlist
.videos
.extend_limit(rp.query(), limit)
.await
.unwrap();
let mut playlist = rp.query().playlist(&id).await?;
playlist.videos.extend_limit(rp.query(), limit).await?;
if txt {
anstream::println!(
"{}\n{} [{}]\n{} {}",
@ -993,7 +1031,7 @@ async fn main() {
}
}
UrlTarget::Album { id } => {
let album = rp.query().music_album(&id).await.unwrap();
let album = rp.query().music_album(&id).await?;
if txt {
anstream::print!(
"{}\n{} [{}] ({:?}",
@ -1036,8 +1074,8 @@ async fn main() {
} => match music {
None => match channel {
Some(channel) => {
rustypipe::validate::channel_id(&channel).unwrap();
let res = rp.query().channel_search(&channel, &query).await.unwrap();
rustypipe::validate::channel_id(&channel)?;
let res = rp.query().channel_search(&channel, &query).await?;
print_data(&res, format, pretty);
}
None => {
@ -1049,9 +1087,8 @@ async fn main() {
let mut res = rp
.query()
.search_filter::<YouTubeItem, _>(&query, &filter)
.await
.unwrap();
res.items.extend_limit(rp.query(), limit).await.unwrap();
.await?;
res.items.extend_limit(rp.query(), limit).await?;
if txt {
if let Some(corr) = res.corrected_query {
@ -1064,27 +1101,27 @@ async fn main() {
}
},
Some(MusicSearchCategory::All) => {
let res = rp.query().music_search_main(&query).await.unwrap();
let res = rp.query().music_search_main(&query).await?;
print_music_search(&res, format, pretty, txt);
}
Some(MusicSearchCategory::Tracks) => {
let mut res = rp.query().music_search_tracks(&query).await.unwrap();
res.items.extend_limit(rp.query(), limit).await.unwrap();
let mut res = rp.query().music_search_tracks(&query).await?;
res.items.extend_limit(rp.query(), limit).await?;
print_music_search(&res, format, pretty, txt);
}
Some(MusicSearchCategory::Videos) => {
let mut res = rp.query().music_search_videos(&query).await.unwrap();
res.items.extend_limit(rp.query(), limit).await.unwrap();
let mut res = rp.query().music_search_videos(&query).await?;
res.items.extend_limit(rp.query(), limit).await?;
print_music_search(&res, format, pretty, txt);
}
Some(MusicSearchCategory::Artists) => {
let mut res = rp.query().music_search_artists(&query).await.unwrap();
res.items.extend_limit(rp.query(), limit).await.unwrap();
let mut res = rp.query().music_search_artists(&query).await?;
res.items.extend_limit(rp.query(), limit).await?;
print_music_search(&res, format, pretty, txt);
}
Some(MusicSearchCategory::Albums) => {
let mut res = rp.query().music_search_albums(&query).await.unwrap();
res.items.extend_limit(rp.query(), limit).await.unwrap();
let mut res = rp.query().music_search_albums(&query).await?;
res.items.extend_limit(rp.query(), limit).await?;
print_music_search(&res, format, pretty, txt);
}
Some(MusicSearchCategory::PlaylistsYtm | MusicSearchCategory::PlaylistsCommunity) => {
@ -1094,15 +1131,15 @@ async fn main() {
&query,
music == Some(MusicSearchCategory::PlaylistsCommunity),
)
.await
.unwrap();
res.items.extend_limit(rp.query(), limit).await.unwrap();
.await?;
res.items.extend_limit(rp.query(), limit).await?;
print_music_search(&res, format, pretty, txt);
}
},
Commands::Vdata => {
let vd = rp.query().get_visitor_data().await.unwrap();
let vd = rp.query().get_visitor_data().await?;
println!("{vd}");
}
};
Ok(())
}

View file

@ -17,9 +17,9 @@ use futures::stream::{self, StreamExt};
use once_cell::sync::Lazy;
use rand::Rng;
use regex::Regex;
use reqwest::{header, Client, StatusCode};
use reqwest::{header, Client, StatusCode, Url};
use rustypipe::{
client::{ClientType, RustyPipe},
client::{ClientType, RustyPipe, DEFAULT_PLAYER_CLIENT_ORDER},
model::{
traits::{FileFormat, YtEntity},
AudioCodec, TrackItem, VideoCodec, VideoPlayer,
@ -74,6 +74,8 @@ pub struct DownloaderBuilder {
audio_tag: bool,
#[cfg(feature = "audiotag")]
crop_cover: bool,
client_types: Option<Vec<ClientType>>,
pot: Option<String>,
}
struct DownloaderInner {
@ -103,6 +105,10 @@ struct DownloaderInner {
/// Crop YT thumbnails to ensure square album covers
#[cfg(feature = "audiotag")]
crop_cover: bool,
/// Client types for fetching videos
client_types: Option<Vec<ClientType>>,
/// Pot token to circumvent bot detection
pot: Option<String>,
}
/// Download query
@ -120,8 +126,10 @@ pub struct DownloadQuery {
filter: Option<StreamFilter>,
/// Target video format
video_format: Option<DownloadVideoFormat>,
/// ClientType type for fetching videos
client_type: Option<ClientType>,
/// Client types for fetching videos
client_types: Option<Vec<ClientType>>,
/// Pot token to circumvent bot detection
pot: Option<String>,
}
/// Video to be downloaded
@ -287,6 +295,8 @@ impl Default for DownloaderBuilder {
audio_tag: false,
#[cfg(feature = "audiotag")]
crop_cover: false,
client_types: None,
pot: None,
}
}
}
@ -384,6 +394,38 @@ impl DownloaderBuilder {
self
}
/// Set the [`ClientType`] used to fetch the YT player
#[must_use]
pub fn client_type(mut self, client_type: ClientType) -> Self {
self.client_types = Some(vec![client_type]);
self
}
/// Set a list of client types used to fetch the YT player
///
/// The clients are used in the given order. If a client cannot fetch the requested video,
/// an attempt is made with the next one.
#[must_use]
pub fn client_types<T: Into<Vec<ClientType>>>(mut self, client_types: T) -> Self {
self.client_types = Some(client_types.into());
self
}
/// Set the `pot` token to circumvent bot detection
///
/// YouTube has implemented the token to prevent other clients from downloading YouTube videos.
/// The token is generated using YouTube's botguard. Therefore you need a full browser environment
/// to obtain one.
///
/// The Invidious project has created a script to extract this token: <https://github.com/iv-org/youtube-trusted-session-generator>
///
/// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients.
#[must_use]
pub fn pot<S: Into<String>>(mut self, pot: S) -> Self {
self.pot = Some(pot.into());
self
}
/// Create a new, configured [`Downloader`] instance
pub fn build(self) -> Downloader {
self.build_with_client(
@ -417,6 +459,8 @@ impl DownloaderBuilder {
audio_tag: self.audio_tag,
#[cfg(feature = "audiotag")]
crop_cover: self.crop_cover,
client_types: self.client_types,
pot: self.pot,
}),
}
}
@ -450,7 +494,8 @@ impl Downloader {
progress: None,
filter: None,
video_format: None,
client_type: None,
client_types: None,
pot: None,
}
}
@ -586,7 +631,32 @@ impl DownloadQuery {
/// Set the [`ClientType`] used to fetch the YT player
#[must_use]
pub fn client_type(mut self, client_type: ClientType) -> Self {
self.client_type = Some(client_type);
self.client_types = Some(vec![client_type]);
self
}
/// Set a list of client types used to fetch the YT player
///
/// The clients are used in the given order. If a client cannot fetch the requested video,
/// an attempt is made with the next one.
#[must_use]
pub fn client_types<T: Into<Vec<ClientType>>>(mut self, client_types: T) -> Self {
self.client_types = Some(client_types.into());
self
}
/// Set the `pot` token to circumvent bot detection
///
/// YouTube has implemented the token to prevent other clients from downloading YouTube videos.
/// The token is generated using YouTube's botguard. Therefore you need a full browser environment
/// to obtain one.
///
/// The Invidious project has created a script to extract this token: <https://github.com/iv-org/youtube-trusted-session-generator>
///
/// The `pot` token is only used for the [`ClientType::Desktop`] and [`ClientType::DesktopMusic`] clients.
#[must_use]
pub fn pot<S: Into<String>>(mut self, pot: S) -> Self {
self.pot = Some(pot.into());
self
}
@ -594,9 +664,10 @@ impl DownloadQuery {
///
/// If no download path is set, the video is downloaded to the current directory
/// with a filename created by this template: `{track} {title} [{id}]`.
#[tracing::instrument(skip(self), fields(id = self.video.id))]
#[tracing::instrument(skip(self), level="error", fields(id = self.video.id))]
pub async fn download(&self) -> Result<DownloadResult> {
let mut last_err = None;
let mut failed_client = None;
// Progress bar
#[cfg(feature = "indicatif")]
@ -613,14 +684,19 @@ impl DownloadQuery {
let err = match self
.download_attempt(
n,
failed_client,
#[cfg(feature = "indicatif")]
&pb,
)
.await
{
Ok(res) => return Ok(res),
Err(DownloadError::Forbidden(c)) => {
failed_client = Some(c);
DownloadError::Forbidden(c)
}
Err(DownloadError::Http(e)) => {
if !e.is_timeout() && e.status() != Some(StatusCode::FORBIDDEN) {
if !e.is_timeout() {
return Err(DownloadError::Http(e));
}
DownloadError::Http(e)
@ -640,6 +716,7 @@ impl DownloadQuery {
async fn download_attempt(
&self,
#[allow(unused_variables)] n: u32,
failed_client: Option<ClientType>,
#[cfg(feature = "indicatif")] pb: &Option<ProgressBar>,
) -> Result<DownloadResult> {
let filter = self.filter.as_ref().unwrap_or(&self.dl.i.filter);
@ -672,19 +749,45 @@ impl DownloadQuery {
};
#[cfg(feature = "indicatif")]
if let Some(pb) = pb {
pb.set_message(format!(
"Fetching player data for {}{}",
self.video.name.as_deref().unwrap_or_default(),
attempt_suffix
))
if let Some(n) = &self.video.name {
pb.set_message(format!("Fetching player data for {n}{attempt_suffix}"));
} else {
pb.set_message(format!("Fetching player data{attempt_suffix}"));
}
}
let q = self.dl.i.rp.query();
let player_data = match self.client_type {
Some(client_type) => q.player_from_client(&self.video.id, client_type).await?,
None => q.player(&self.video.id).await?,
};
let mut client_types = Cow::Borrowed(
self.client_types
.as_ref()
.or(self.dl.i.client_types.as_ref())
.map(Vec::as_slice)
.unwrap_or(DEFAULT_PLAYER_CLIENT_ORDER),
);
// If the last download failed, try another client if possible
if let Some(failed_client) = failed_client {
if let Some(pos) = client_types.iter().position(|c| c == &failed_client) {
let p2 = pos + 1;
if p2 < client_types.len() {
let mut v = client_types[p2..].to_vec();
v.extend(&client_types[..p2]);
client_types = v.into();
}
}
}
let player_data = q.player_from_clients(&self.video.id, &client_types).await?;
let user_agent = q.user_agent(player_data.client_type);
let pot = if matches!(
player_data.client_type,
ClientType::Desktop | ClientType::DesktopMusic
) {
self.pot.as_deref().or(self.dl.i.pot.as_deref())
} else {
None
};
// Select streams to download
let (video, audio) = player_data.select_video_audio_stream(filter);
@ -762,10 +865,19 @@ impl DownloadQuery {
&downloads,
&self.dl.i.http,
&user_agent,
pot,
#[cfg(feature = "indicatif")]
pb.clone(),
)
.await?;
.await
.map_err(|e| {
if let DownloadError::Http(e) = &e {
if e.status() == Some(StatusCode::FORBIDDEN) {
return DownloadError::Forbidden(player_data.client_type);
}
}
e
})?;
#[cfg(feature = "indicatif")]
if let Some(pb) = &pb {
@ -1006,6 +1118,7 @@ async fn download_single_file(
output: &Path,
http: &Client,
user_agent: &str,
pot: Option<&str>,
#[cfg(feature = "indicatif")] pb: Option<ProgressBar>,
) -> Result<()> {
// Check if file is already downloaded
@ -1102,6 +1215,7 @@ async fn download_single_file(
size.unwrap(),
offset,
user_agent,
pot,
#[cfg(feature = "indicatif")]
pb,
)
@ -1209,6 +1323,7 @@ async fn download_chunks_by_header(
// Use the `range` url parameter to download a stream in chunks.
// This ist used by YouTube's web player. The file size
// must be known beforehand (it is included in the stream url).
#[allow(clippy::too_many_arguments)]
async fn download_chunks_by_param(
http: &Client,
file: &mut File,
@ -1216,6 +1331,7 @@ async fn download_chunks_by_param(
size: u64,
offset: u64,
user_agent: &str,
pot: Option<&str>,
#[cfg(feature = "indicatif")] pb: Option<ProgressBar>,
) -> Result<()> {
let mut offset = offset;
@ -1228,8 +1344,15 @@ async fn download_chunks_by_param(
let range = get_download_range(offset, Some(size));
tracing::debug!("Fetching range {}-{}", range.start, range.end);
let mut urlp =
Url::parse_with_params(url, [("range", &format!("{}-{}", range.start, range.end))])
.map_err(|e| DownloadError::Progressive(format!("url parsing: {e}").into()))?;
if let Some(pot) = pot {
urlp.query_pairs_mut().append_pair("pot", pot);
}
let res = http
.get(format!("{}&range={}-{}", url, range.start, range.end))
.get(urlp)
.header(header::USER_AGENT, user_agent)
.header(header::ORIGIN, "https://www.youtube.com")
.header(header::REFERER, "https://www.youtube.com/")
@ -1277,6 +1400,7 @@ async fn download_streams(
downloads: &Vec<StreamDownload>,
http: &Client,
user_agent: &str,
pot: Option<&str>,
#[cfg(feature = "indicatif")] pb: Option<ProgressBar>,
) -> Result<()> {
let n = downloads.len();
@ -1288,6 +1412,7 @@ async fn download_streams(
&d.file,
http,
user_agent,
pot,
#[cfg(feature = "indicatif")]
pb.clone(),
)

View file

@ -1,6 +1,7 @@
use std::{borrow::Cow, collections::BTreeMap, path::PathBuf};
use reqwest::Url;
use rustypipe::client::ClientType;
/// Error from the video downloader
#[derive(thiserror::Error, Debug)]
@ -12,6 +13,9 @@ pub enum DownloadError {
/// Error from the HTTP client
#[error("http error: {0}")]
Http(#[from] reqwest::Error),
/// 403 error trying to download video
#[error("YouTube returned 403 error")]
Forbidden(ClientType),
/// File IO error
#[error(transparent)]
Io(#[from] std::io::Error),

30
notes/po_token.md Normal file
View file

@ -0,0 +1,30 @@
# About the new `pot` token
YouTube has implemented a new method to prevent downloaders and alternative clients from accessing
their videos. Now requests to YouTube's video servers require a `pot` URL parameter.
It is currently only required in the web player. The YTM and embedded player sends the token, too, but does not require it (this may change in the future).
The TV player does not use the token at all and is currently the best workaround. The only downside
is that the TV player does not return any video metadata like title and description text.
The first part of a video file (range: 0-1007959 bytes) can be downloaded without the token.
Requesting more of the file requires the pot token to be set, otherwise YouTube responds with a 403
error.
The pot token is base64-formatted and usually starts with a M
`MnToZ2brHmyo0ehfKtK_EWUq60dPYDXksNX_UsaniM_Uj6zbtiIZujCHY02hr7opxB_n3XHetJQCBV9cnNHovuhvDqrjfxsKR-sjn-eIxqv3qOZKphvyDpQzlYBnT2AXK41R-ti6iPonrvlvKIASNmYX2lhsEg==`
The token is generated from YouTubes Botguard script. The token is bound to the visitor data cookie
used to fetch the player data.
This feature has been A/B-tested for a few weeks. During that time, refetching the player in case
of a 403 download error often made things work again. As of 08.08.2024 this new feature seems to be
stabilized and retrying requests does not work any more.
## Getting a `pot` token
You need a real browser environment to run YouTube's botguard and obtain a pot token. The Invidious project has created a script to
<https://github.com/iv-org/youtube-trusted-session-generator/tree/master>.
The script opens YouTube's embedded video player, starts playback and extracts the visitor data

View file

@ -82,7 +82,7 @@ impl RustyPipeQuery {
}
/// Get the videos from a YouTube channel
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_videos<S: AsRef<str> + Debug>(
&self,
channel_id: S,
@ -94,7 +94,7 @@ impl RustyPipeQuery {
/// Get a ordered list of videos from a YouTube channel
///
/// This function does not return channel metadata.
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_videos_order<S: AsRef<str> + Debug>(
&self,
channel_id: S,
@ -105,7 +105,7 @@ impl RustyPipeQuery {
}
/// Get the videos of the given tab (Shorts, Livestreams) from a YouTube channel
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_videos_tab<S: AsRef<str> + Debug>(
&self,
channel_id: S,
@ -118,7 +118,7 @@ impl RustyPipeQuery {
/// Get a ordered list of videos from the given tab (Shorts, Livestreams) of a YouTube channel
///
/// This function does not return channel metadata.
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_videos_tab_order<S: AsRef<str> + Debug>(
&self,
channel_id: S,
@ -136,7 +136,7 @@ impl RustyPipeQuery {
}
/// Search the videos of a channel
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_search<S: AsRef<str> + Debug, S2: AsRef<str> + Debug>(
&self,
channel_id: S,
@ -152,7 +152,7 @@ impl RustyPipeQuery {
}
/// Get the playlists of a channel
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_playlists<S: AsRef<str> + Debug>(
&self,
channel_id: S,
@ -177,7 +177,7 @@ impl RustyPipeQuery {
}
/// Get additional metadata from the *About* tab of a channel
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_info<S: AsRef<str> + Debug>(
&self,
channel_id: S,

View file

@ -18,7 +18,7 @@ impl RustyPipeQuery {
/// for checking a lot of channels or implementing a subscription feed.
///
/// The downside of using the RSS feed is that it does not provide video durations.
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn channel_rss<S: AsRef<str> + Debug>(
&self,
channel_id: S,

View file

@ -216,6 +216,17 @@ static CLIENT_VERSION_REGEX: Lazy<Regex> =
static VISITOR_DATA_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#""visitorData":"([\w\d_\-%]+?)""#).unwrap());
/// Default order of client types when fetching player data
///
/// The order may change in the future in case YouTube applies changes to their
/// platform that disable a client or make it less reliable.
pub const DEFAULT_PLAYER_CLIENT_ORDER: &[ClientType] = &[
ClientType::Tv,
ClientType::TvHtml5Embed,
ClientType::Android,
ClientType::Ios,
];
/// The RustyPipe client used to access YouTube's API
///
/// RustyPipe uses an [`Arc`] internally, so if you are using the client

View file

@ -32,7 +32,7 @@ struct FormData {
impl RustyPipeQuery {
/// Get the YouTube Music charts for a given country
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_charts(&self, country: Option<Country>) -> Result<MusicCharts, Error> {
let context = self.get_context(ClientType::DesktopMusic, true, None).await;
let request_body = QCharts {

View file

@ -40,7 +40,7 @@ struct QRadio<'a> {
impl RustyPipeQuery {
/// Get the metadata of a YouTube music track
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_details<S: AsRef<str> + Debug>(
&self,
video_id: S,
@ -68,7 +68,7 @@ impl RustyPipeQuery {
/// Get the lyrics of a YouTube music track
///
/// The `lyrics_id` has to be obtained using [`RustyPipeQuery::music_details`].
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_lyrics<S: AsRef<str> + Debug>(&self, lyrics_id: S) -> Result<Lyrics, Error> {
let lyrics_id = lyrics_id.as_ref();
let context = self.get_context(ClientType::DesktopMusic, true, None).await;
@ -90,7 +90,7 @@ impl RustyPipeQuery {
/// Get related items (tracks, playlists, artists) to a YouTube Music track
///
/// The `related_id` has to be obtained using [`RustyPipeQuery::music_details`].
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_related<S: AsRef<str> + Debug>(
&self,
related_id: S,
@ -115,7 +115,7 @@ impl RustyPipeQuery {
/// Get a YouTube Music radio (a dynamically generated playlist)
///
/// The `radio_id` can be obtained using [`RustyPipeQuery::music_artist`] to get an artist's radio.
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_radio<S: AsRef<str> + Debug>(
&self,
radio_id: S,
@ -146,7 +146,7 @@ impl RustyPipeQuery {
}
/// Get a YouTube Music radio (a dynamically generated playlist) for a track
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_radio_track<S: AsRef<str> + Debug>(
&self,
video_id: S,
@ -156,7 +156,7 @@ impl RustyPipeQuery {
}
/// Get a YouTube Music radio (a dynamically generated playlist) for a playlist
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_radio_playlist<S: AsRef<str> + Debug>(
&self,
playlist_id: S,

View file

@ -13,7 +13,7 @@ use super::{
impl RustyPipeQuery {
/// Get a list of moods and genres from YouTube Music
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_genres(&self) -> Result<Vec<MusicGenreItem>, Error> {
let context = self.get_context(ClientType::DesktopMusic, true, None).await;
let request_body = QBrowse {
@ -32,7 +32,7 @@ impl RustyPipeQuery {
}
/// Get the playlists from a YouTube Music genre
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_genre<S: AsRef<str> + Debug>(
&self,
genre_id: S,

View file

@ -11,7 +11,7 @@ use super::{response, ClientType, MapRespCtx, MapResponse, QBrowse, RustyPipeQue
impl RustyPipeQuery {
/// Get the new albums that were released on YouTube Music
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_new_albums(&self) -> Result<Vec<AlbumItem>, Error> {
let context = self.get_context(ClientType::DesktopMusic, true, None).await;
let request_body = QBrowse {
@ -30,7 +30,7 @@ impl RustyPipeQuery {
}
/// Get the new music videos that were released on YouTube Music
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_new_videos(&self) -> Result<Vec<TrackItem>, Error> {
let context = self.get_context(ClientType::DesktopMusic, true, None).await;
let request_body = QBrowse {

View file

@ -22,7 +22,7 @@ use super::{
impl RustyPipeQuery {
/// Get a playlist from YouTube Music
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_playlist<S: AsRef<str> + Debug>(
&self,
playlist_id: S,
@ -54,7 +54,7 @@ impl RustyPipeQuery {
}
/// Get an album from YouTube Music
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_album<S: AsRef<str> + Debug>(
&self,
album_id: S,

View file

@ -126,7 +126,7 @@ impl RustyPipeQuery {
}
/// Get YouTube Music search suggestions
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn music_search_suggestion<S: AsRef<str> + Debug>(
&self,
query: S,

View file

@ -14,7 +14,7 @@ use super::{response, ClientType, MapRespCtx, MapResponse, QContinuation, RustyP
impl RustyPipeQuery {
/// Get more YouTube items from the given continuation token and endpoint
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn continuation<T: FromYtItem, S: AsRef<str> + Debug>(
&self,
ctoken: S,

View file

@ -25,6 +25,7 @@ use super::{
player::{self, Format},
},
ClientType, MapRespCtx, MapResponse, MapResult, RustyPipeQuery, YTContext,
DEFAULT_PLAYER_CLIENT_ORDER,
};
#[derive(Debug, Serialize)]
@ -65,7 +66,7 @@ struct QContentPlaybackContext<'a> {
impl RustyPipeQuery {
/// Get YouTube player data (video/audio streams + basic metadata)
pub async fn player<S: AsRef<str> + Debug>(&self, video_id: S) -> Result<VideoPlayer, Error> {
self.player_from_clients(video_id, &[ClientType::Desktop, ClientType::TvHtml5Embed])
self.player_from_clients(video_id, DEFAULT_PLAYER_CLIENT_ORDER)
.await
}
@ -113,7 +114,7 @@ impl RustyPipeQuery {
}
/// Get YouTube player data (video/audio streams + basic metadata) using the specified client
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn player_from_client<S: AsRef<str> + Debug>(
&self,
video_id: S,

View file

@ -17,7 +17,7 @@ use super::{response, ClientType, MapRespCtx, MapResponse, MapResult, QBrowse, R
impl RustyPipeQuery {
/// Get a YouTube playlist
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn playlist<S: AsRef<str> + Debug>(&self, playlist_id: S) -> Result<Playlist, Error> {
let playlist_id = playlist_id.as_ref();
// YTM playlists require visitor data for continuations to work

View file

@ -24,7 +24,7 @@ struct QSearch<'a> {
impl RustyPipeQuery {
/// Search YouTube
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn search<T: FromYtItem, S: AsRef<str> + Debug>(
&self,
query: S,
@ -48,7 +48,7 @@ impl RustyPipeQuery {
}
/// Search YouTube using the given [`SearchFilter`]
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn search_filter<T: FromYtItem, S: AsRef<str> + Debug>(
&self,
query: S,
@ -73,7 +73,7 @@ impl RustyPipeQuery {
}
/// Get YouTube search suggestions
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn search_suggestion<S: AsRef<str> + Debug>(
&self,
query: S,

View file

@ -16,7 +16,7 @@ use super::{
impl RustyPipeQuery {
/// Get the videos from the YouTube startpage
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn startpage(&self) -> Result<Paginator<VideoItem>, Error> {
let context = self.get_context(ClientType::Desktop, true, None).await;
let request_body = QBrowse {
@ -35,7 +35,7 @@ impl RustyPipeQuery {
}
/// Get the videos from the YouTube trending page
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn trending(&self) -> Result<Vec<VideoItem>, Error> {
let context = self.get_context(ClientType::Desktop, true, None).await;
let request_body = QBrowseParams {

View file

@ -58,7 +58,7 @@ impl RustyPipeQuery {
/// );
/// # });
/// ```
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn resolve_url<S: AsRef<str> + Debug>(
self,
url: S,
@ -236,7 +236,7 @@ impl RustyPipeQuery {
/// );
/// # });
/// ```
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn resolve_string<S: AsRef<str> + Debug>(
self,
s: S,

View file

@ -31,7 +31,7 @@ struct QVideo<'a> {
impl RustyPipeQuery {
/// Get the metadata for a video
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn video_details<S: AsRef<str> + Debug>(
&self,
video_id: S,
@ -56,7 +56,7 @@ impl RustyPipeQuery {
}
/// Get the comments for a video using the continuation token obtained from `rusty_pipe_query.video_details()`
#[tracing::instrument(skip(self))]
#[tracing::instrument(skip(self), level = "error")]
pub async fn video_comments<S: AsRef<str> + Debug>(
&self,
ctoken: S,

View file

@ -137,8 +137,11 @@ async fn get_player_from_client(#[case] client_type: ClientType, rp: RustyPipe)
assert_eq!(audio.format, AudioFormat::Webm);
assert_eq!(audio.codec, AudioCodec::Opus);
check_video_stream(video).await;
check_video_stream(audio).await;
// Desktop client now requires pot token so the streams cannot be tested here
if client_type != ClientType::Desktop {
check_video_stream(video).await;
check_video_stream(audio).await;
}
}
assert!(player_data.expires_in_seconds > 10000);
@ -246,19 +249,25 @@ async fn get_player(
let details = player_data.details;
assert_eq!(details.id, id);
assert_eq!(details.name.expect("name"), name);
let desc = details.description.expect("description");
assert!(desc.contains(description), "description: {desc}");
if let Some(n) = &details.name {
assert_eq!(n, name);
}
if let Some(desc) = &details.description {
assert!(desc.contains(description), "description: {desc}");
}
assert_eq!(details.duration, duration);
assert_eq!(details.channel_id, channel_id);
assert_eq!(details.channel_name.expect("channel name"), channel_name);
assert_gte(details.view_count.expect("view count"), views, "views");
if let Some(cn) = &details.channel_name {
assert_eq!(cn, channel_name);
}
if let Some(vc) = details.view_count {
assert_gte(vc, views, "views");
}
assert_eq!(details.is_live, is_live);
assert_eq!(details.is_live_content, is_live_content);
if is_live {
assert!(player_data.hls_manifest_url.is_some());
assert!(player_data.dash_manifest_url.is_some());
assert!(player_data.hls_manifest_url.is_some() || player_data.dash_manifest_url.is_some());
} else {
assert!(!player_data.video_only_streams.is_empty());
assert!(!player_data.audio_streams.is_empty());