From c263ba23458dc187be1cca7d9bedd3dab20cb420 Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Mon, 7 Nov 2022 23:01:10 +0100 Subject: [PATCH 1/2] feat: resolve music album urls --- src/client/response/url_endpoint.rs | 2 +- src/client/url_resolver.rs | 97 +++++++++++++++++++++-------- src/model/mod.rs | 12 ++++ src/util/mod.rs | 5 +- tests/youtube.rs | 9 ++- 5 files changed, 94 insertions(+), 31 deletions(-) diff --git a/src/client/response/url_endpoint.rs b/src/client/response/url_endpoint.rs index bae1add..0627324 100644 --- a/src/client/response/url_endpoint.rs +++ b/src/client/response/url_endpoint.rs @@ -139,7 +139,7 @@ impl PageType { pub(crate) fn to_url_target(self, id: String) -> UrlTarget { match self { PageType::Artist => UrlTarget::Channel { id }, - PageType::Album => UrlTarget::Playlist { id }, + PageType::Album => UrlTarget::Album { id }, PageType::Channel => UrlTarget::Channel { id }, PageType::Playlist => UrlTarget::Playlist { id }, } diff --git a/src/client/url_resolver.rs b/src/client/url_resolver.rs index c4f5a8e..9ced71a 100644 --- a/src/client/url_resolver.rs +++ b/src/client/url_resolver.rs @@ -20,7 +20,7 @@ struct QResolveUrl<'a> { } impl RustyPipeQuery { - pub async fn resolve_url(self, url: &str) -> Result { + pub async fn resolve_url(self, url: &str, resolve_albums: bool) -> Result { let (url, params) = util::url_to_params(url)?; let mut is_shortlink = url.domain().and_then(|d| match d { @@ -61,7 +61,16 @@ impl RustyPipeQuery { .ok_or(Error::Other(Cow::Borrowed("invalid url: no playlist id")))? .to_string(); - Ok(UrlTarget::Playlist { id }) + // YouTube Music album has to be resolved by the YTM API + if resolve_albums && id.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) { + self._navigation_resolve_url( + &format!("/playlist?list={}", id), + ClientType::DesktopMusic, + ) + .await + } else { + Ok(UrlTarget::Playlist { id }) + } } // Channel vanity URL or youtu.be shortlink Some(mut id) => { @@ -86,7 +95,8 @@ impl RustyPipeQuery { } Some(false) => { // Vanity URL (e.g. youtube.com/LinusTechTips) has to be resolved by the Innertube API - self._navigation_resolve_url(url.path()).await + self._navigation_resolve_url(url.path(), ClientType::Desktop) + .await } None => { // We dont have the original YT domain, so this can be both @@ -98,7 +108,10 @@ impl RustyPipeQuery { .is_match(url.path()) .unwrap_or_default() { - match self._navigation_resolve_url(url.path()).await { + match self + ._navigation_resolve_url(url.path(), ClientType::Desktop) + .await + { Ok(target) => Ok(target), Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => { match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { @@ -131,14 +144,19 @@ impl RustyPipeQuery { Ok(target) } - pub async fn resolve_string(self, string: &str) -> Result { + pub async fn resolve_string( + self, + string: &str, + resolve_albums: bool, + ) -> Result { // URL with protocol if string.starts_with("http://") || string.starts_with("https://") { - self.resolve_url(string).await + self.resolve_url(string, resolve_albums).await } // URL without protocol else if string.contains('/') && string.contains('.') { - self.resolve_url(&format!("https://{}", string)).await + self.resolve_url(&format!("https://{}", string), resolve_albums) + .await } // ID only else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() { @@ -151,28 +169,54 @@ impl RustyPipeQuery { id: string.to_owned(), }) } else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() { - Ok(UrlTarget::Playlist { + if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) { + self._navigation_resolve_url( + &format!("/playlist?list={}", string), + ClientType::DesktopMusic, + ) + .await + } else { + Ok(UrlTarget::Playlist { + id: string.to_owned(), + }) + } + } else if util::ALBUM_ID_REGEX.is_match(string).unwrap_or_default() { + Ok(UrlTarget::Album { id: string.to_owned(), }) } // Channel name only else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() { - self._navigation_resolve_url(&format!("/{}", string.trim_start_matches('/'))) - .await + self._navigation_resolve_url( + &format!("/{}", string.trim_start_matches('/')), + ClientType::Desktop, + ) + .await } else { Err(Error::Other("invalid input string".into())) } } - async fn _navigation_resolve_url(&self, url_path: &str) -> Result { - let context = self.get_context(ClientType::Desktop, true, None).await; + async fn _navigation_resolve_url( + &self, + url_path: &str, + ctype: ClientType, + ) -> Result { + let context = self.get_context(ctype, true, None).await; let request_body = QResolveUrl { context, - url: format!("https://www.youtube.com{}", url_path), + url: format!( + "https://{}.youtube.com{}", + match ctype { + ClientType::DesktopMusic => "music", + _ => "www", + }, + url_path + ), }; self.execute_request::( - ClientType::Desktop, + ctype, "channel_id", &request_body.url, "navigation/resolve_url", @@ -189,23 +233,24 @@ impl MapResponse for response::ResolvedUrl { _lang: Language, _deobf: Option<&crate::deobfuscate::Deobfuscator>, ) -> Result, ExtractionError> { + let browse_endpoint = self + .endpoint + .browse_endpoint + .ok_or(ExtractionError::InvalidData(Cow::Borrowed("No browse ID")))?; + let page_type = self .endpoint .command_metadata - .ok_or(ExtractionError::InvalidData(Cow::Borrowed( - "No command metadata", - )))? - .web_command_metadata - .web_page_type; - - let id = self - .endpoint - .browse_endpoint - .ok_or(ExtractionError::InvalidData(Cow::Borrowed("No browse ID")))? - .browse_id; + .map(|c| c.web_command_metadata.web_page_type) + .or_else(|| { + browse_endpoint + .browse_endpoint_context_supported_configs + .map(|c| c.browse_endpoint_context_music_config.page_type) + }) + .ok_or(ExtractionError::InvalidData(Cow::Borrowed("No page type")))?; Ok(MapResult { - c: page_type.to_url_target(id), + c: page_type.to_url_target(browse_endpoint.browse_id), warnings: Vec::new(), }) } diff --git a/src/model/mod.rs b/src/model/mod.rs index 1810d67..57d0de5 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -41,6 +41,7 @@ pub enum UrlTarget { Video { id: String, start_time: u32 }, Channel { id: String }, Playlist { id: String }, + Album { id: String }, } impl ToString for UrlTarget { @@ -66,6 +67,11 @@ impl UrlTarget { UrlTarget::Playlist { id } => { format!("{}/playlist?list={}", yt_host, id) } + UrlTarget::Album { id } => { + // The official album URLs use the playlist ID + // This looks weird, but it works + format!("{}/channel/{}", yt_host, id) + } } } @@ -89,6 +95,12 @@ impl UrlTarget { false => Err(Error::Other("invalid playlist id".into())), } } + UrlTarget::Album { id } => { + match util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() { + true => Ok(()), + false => Err(Error::Other("invalid album id".into())), + } + } } } } diff --git a/src/util/mod.rs b/src/util/mod.rs index 8ea6877..eff901a 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -23,7 +23,9 @@ pub static VIDEO_ID_REGEX: Lazy = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_- pub static CHANNEL_ID_REGEX: Lazy = Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap()); pub static PLAYLIST_ID_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^(?:PL|RD)[A-Za-z0-9_-]{30,}$").unwrap()); + Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap()); +pub static ALBUM_ID_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap()); pub static VANITY_PATH_REGEX: Lazy = Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap()); @@ -32,6 +34,7 @@ pub const DOT_SEPARATOR: &str = " • "; /// YouTube Music name (author of official playlists) pub const YT_MUSIC_NAME: &str = "YouTube Music"; pub const VARIOUS_ARTISTS: &str = "Various Artists"; +pub const PLAYLIST_ID_ALBUM_PREFIX: &str = "OLAK"; const CONTENT_PLAYBACK_NONCE_ALPHABET: &[u8; 64] = b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; diff --git a/tests/youtube.rs b/tests/youtube.rs index dd1601a..9892fcf 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -1165,10 +1165,11 @@ async fn search_suggestion_empty() { #[case("https://piped.mha.fi/dQw4w9WgXcQ", UrlTarget::Channel {id: "UCoG6BrhgmivrkcbEHcYtK4Q".to_owned()})] // Both a video ID and a channel name + video time param => returns video #[case("https://piped.mha.fi/dQw4w9WgXcQ?t=0", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] +#[case("https://music.youtube.com/playlist?list=OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})] #[tokio::test] async fn resolve_url(#[case] url: &str, #[case] expect: UrlTarget) { let rp = RustyPipe::builder().strict().build(); - let target = rp.query().resolve_url(url).await.unwrap(); + let target = rp.query().resolve_url(url, true).await.unwrap(); assert_eq!(target, expect); } @@ -1182,10 +1183,12 @@ async fn resolve_url(#[case] url: &str, #[case] expect: UrlTarget) { #[case("dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})] #[case("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI", UrlTarget::Playlist {id: "PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI".to_owned()})] #[case("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk", UrlTarget::Playlist {id: "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk".to_owned()})] +#[case("OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})] +#[case("MPREb_GyH43gCvdM5", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})] #[tokio::test] async fn resolve_string(#[case] string: &str, #[case] expect: UrlTarget) { let rp = RustyPipe::builder().strict().build(); - let target = rp.query().resolve_string(string).await.unwrap(); + let target = rp.query().resolve_string(string, true).await.unwrap(); assert_eq!(target, expect); } @@ -1194,7 +1197,7 @@ async fn resolve_channel_not_found() { let rp = RustyPipe::builder().strict().build(); let err = rp .query() - .resolve_url("https://www.youtube.com/feeqegnhq3rkwghjq43ruih43io3") + .resolve_url("https://www.youtube.com/feeqegnhq3rkwghjq43ruih43io3", true) .await .unwrap_err(); From 6d0302d3bf8e060a5d50afe3b9cef2486d7b8b4c Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Mon, 7 Nov 2022 23:03:46 +0100 Subject: [PATCH 2/2] fix: ignore channels in YTM search --- src/client/response/music_item.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/client/response/music_item.rs b/src/client/response/music_item.rs index 4c3ab6c..5757964 100644 --- a/src/client/response/music_item.rs +++ b/src/client/response/music_item.rs @@ -414,7 +414,8 @@ impl MusicListMapper { Ok(MusicEntityType::Playlist) } PageType::Channel => { - Err(format!("channel items unsupported. id: {}", id)) + // There may be broken YT channels from the artist search. They can be skipped. + Ok(MusicEntityType::Artist) } } }