fix: ignore channels in YTM search

feat: resolve music album urls
2022-11-07 23:03:46 +01:00 · 2022-11-07 23:01:10 +01:00
6 changed files with 96 additions and 32 deletions
--- a/src/client/response/music_item.rs
+++ b/src/client/response/music_item.rs
@ -414,7 +414,8 @@ impl MusicListMapper {
                                Ok(MusicEntityType::Playlist)
                            }
                            PageType::Channel => {
-                                Err(format!("channel items unsupported. id: {}", id))
+                                // There may be broken YT channels from the artist search. They can be skipped.
                                Ok(MusicEntityType::Artist)
                            }
                        }
                    }
--- a/src/client/response/url_endpoint.rs
+++ b/src/client/response/url_endpoint.rs
@ -139,7 +139,7 @@ impl PageType {
    pub(crate) fn to_url_target(self, id: String) -> UrlTarget {
        match self {
            PageType::Artist => UrlTarget::Channel { id },
-            PageType::Album => UrlTarget::Playlist { id },
+            PageType::Album => UrlTarget::Album { id },
            PageType::Channel => UrlTarget::Channel { id },
            PageType::Playlist => UrlTarget::Playlist { id },
        }
--- a/src/client/url_resolver.rs
+++ b/src/client/url_resolver.rs
@ -20,7 +20,7 @@ struct QResolveUrl<'a> {
 }
 impl RustyPipeQuery {
-    pub async fn resolve_url(self, url: &str) -> Result<UrlTarget, Error> {
+    pub async fn resolve_url(self, url: &str, resolve_albums: bool) -> Result<UrlTarget, Error> {
        let (url, params) = util::url_to_params(url)?;
        let mut is_shortlink = url.domain().and_then(|d| match d {
@ -61,8 +61,17 @@ impl RustyPipeQuery {
                    .ok_or(Error::Other(Cow::Borrowed("invalid url: no playlist id")))?
                    .to_string();
                // YouTube Music album has to be resolved by the YTM API
                if resolve_albums && id.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) {
                    self._navigation_resolve_url(
                        &format!("/playlist?list={}", id),
                        ClientType::DesktopMusic,
                    )
                    .await
                } else {
                    Ok(UrlTarget::Playlist { id })
                }
            }
            // Channel vanity URL or youtu.be shortlink
            Some(mut id) => {
                if id == "c" || id == "user" {
@ -86,7 +95,8 @@ impl RustyPipeQuery {
                    }
                    Some(false) => {
                        // Vanity URL (e.g. youtube.com/LinusTechTips) has to be resolved by the Innertube API
-                        self._navigation_resolve_url(url.path()).await
+                        self._navigation_resolve_url(url.path(), ClientType::Desktop)
                            .await
                    }
                    None => {
                        // We dont have the original YT domain, so this can be both
@ -98,7 +108,10 @@ impl RustyPipeQuery {
                                .is_match(url.path())
                                .unwrap_or_default()
                        {
-                            match self._navigation_resolve_url(url.path()).await {
+                            match self
                                ._navigation_resolve_url(url.path(), ClientType::Desktop)
                                .await
                            {
                                Ok(target) => Ok(target),
                                Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => {
                                    match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
@ -131,14 +144,19 @@ impl RustyPipeQuery {
        Ok(target)
    }
-    pub async fn resolve_string(self, string: &str) -> Result<UrlTarget, Error> {
+    pub async fn resolve_string(
        self,
        string: &str,
        resolve_albums: bool,
    ) -> Result<UrlTarget, Error> {
        // URL with protocol
        if string.starts_with("http://") || string.starts_with("https://") {
-            self.resolve_url(string).await
+            self.resolve_url(string, resolve_albums).await
        }
        // URL without protocol
        else if string.contains('/') && string.contains('.') {
-            self.resolve_url(&format!("https://{}", string)).await
+            self.resolve_url(&format!("https://{}", string), resolve_albums)
                .await
        }
        // ID only
        else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() {
@ -151,28 +169,54 @@ impl RustyPipeQuery {
                id: string.to_owned(),
            })
        } else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() {
            if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) {
                self._navigation_resolve_url(
                    &format!("/playlist?list={}", string),
                    ClientType::DesktopMusic,
                )
                .await
            } else {
                Ok(UrlTarget::Playlist {
                    id: string.to_owned(),
                })
            }
        } else if util::ALBUM_ID_REGEX.is_match(string).unwrap_or_default() {
            Ok(UrlTarget::Album {
                id: string.to_owned(),
            })
        }
        // Channel name only
        else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() {
-            self._navigation_resolve_url(&format!("/{}", string.trim_start_matches('/')))
+            self._navigation_resolve_url(
                &format!("/{}", string.trim_start_matches('/')),
                ClientType::Desktop,
            )
            .await
        } else {
            Err(Error::Other("invalid input string".into()))
        }
    }
-    async fn _navigation_resolve_url(&self, url_path: &str) -> Result<UrlTarget, Error> {
+    async fn _navigation_resolve_url(
-        let context = self.get_context(ClientType::Desktop, true, None).await;
+        &self,
        url_path: &str,
        ctype: ClientType,
    ) -> Result<UrlTarget, Error> {
        let context = self.get_context(ctype, true, None).await;
        let request_body = QResolveUrl {
            context,
-            url: format!("https://www.youtube.com{}", url_path),
+            url: format!(
                "https://{}.youtube.com{}",
                match ctype {
                    ClientType::DesktopMusic => "music",
                    _ => "www",
                },
                url_path
            ),
        };
        self.execute_request::<response::ResolvedUrl, _, _>(
-            ClientType::Desktop,
+            ctype,
            "channel_id",
            &request_body.url,
            "navigation/resolve_url",
@ -189,23 +233,24 @@ impl MapResponse<UrlTarget> for response::ResolvedUrl {
        _lang: Language,
        _deobf: Option<&crate::deobfuscate::Deobfuscator>,
    ) -> Result<MapResult<UrlTarget>, ExtractionError> {
        let browse_endpoint = self
            .endpoint
            .browse_endpoint
            .ok_or(ExtractionError::InvalidData(Cow::Borrowed("No browse ID")))?;
        let page_type = self
            .endpoint
            .command_metadata
-            .ok_or(ExtractionError::InvalidData(Cow::Borrowed(
+            .map(|c| c.web_command_metadata.web_page_type)
-                "No command metadata",
+            .or_else(|| {
-            )))?
+                browse_endpoint
-            .web_command_metadata
+                    .browse_endpoint_context_supported_configs
-            .web_page_type;
+                    .map(|c| c.browse_endpoint_context_music_config.page_type)
-
+            })
-        let id = self
+            .ok_or(ExtractionError::InvalidData(Cow::Borrowed("No page type")))?;
            .endpoint
            .browse_endpoint
            .ok_or(ExtractionError::InvalidData(Cow::Borrowed("No browse ID")))?
            .browse_id;
        Ok(MapResult {
-            c: page_type.to_url_target(id),
+            c: page_type.to_url_target(browse_endpoint.browse_id),
            warnings: Vec::new(),
        })
    }
--- a/src/model/mod.rs
+++ b/src/model/mod.rs
@ -41,6 +41,7 @@ pub enum UrlTarget {
    Video { id: String, start_time: u32 },
    Channel { id: String },
    Playlist { id: String },
    Album { id: String },
 }
 impl ToString for UrlTarget {
@ -66,6 +67,11 @@ impl UrlTarget {
            UrlTarget::Playlist { id } => {
                format!("{}/playlist?list={}", yt_host, id)
            }
            UrlTarget::Album { id } => {
                // The official album URLs use the playlist ID
                // This looks weird, but it works
                format!("{}/channel/{}", yt_host, id)
            }
        }
    }
@ -89,6 +95,12 @@ impl UrlTarget {
                    false => Err(Error::Other("invalid playlist id".into())),
                }
            }
            UrlTarget::Album { id } => {
                match util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() {
                    true => Ok(()),
                    false => Err(Error::Other("invalid album id".into())),
                }
            }
        }
    }
 }
--- a/src/util/mod.rs
+++ b/src/util/mod.rs
@ -23,7 +23,9 @@ pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-
 pub static CHANNEL_ID_REGEX: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap());
 pub static PLAYLIST_ID_REGEX: Lazy<Regex> =
-    Lazy::new(|| Regex::new(r"^(?:PL|RD)[A-Za-z0-9_-]{30,}$").unwrap());
+    Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap());
 pub static ALBUM_ID_REGEX: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap());
 pub static VANITY_PATH_REGEX: Lazy<Regex> =
    Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap());
@ -32,6 +34,7 @@ pub const DOT_SEPARATOR: &str = " • ";
 /// YouTube Music name (author of official playlists)
 pub const YT_MUSIC_NAME: &str = "YouTube Music";
 pub const VARIOUS_ARTISTS: &str = "Various Artists";
 pub const PLAYLIST_ID_ALBUM_PREFIX: &str = "OLAK";
 const CONTENT_PLAYBACK_NONCE_ALPHABET: &[u8; 64] =
    b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
--- a/tests/youtube.rs
+++ b/tests/youtube.rs
@ -1165,10 +1165,11 @@ async fn search_suggestion_empty() {
 #[case("https://piped.mha.fi/dQw4w9WgXcQ", UrlTarget::Channel {id: "UCoG6BrhgmivrkcbEHcYtK4Q".to_owned()})]
 // Both a video ID and a channel name + video time param => returns video
 #[case("https://piped.mha.fi/dQw4w9WgXcQ?t=0", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})]
 #[case("https://music.youtube.com/playlist?list=OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})]
 #[tokio::test]
 async fn resolve_url(#[case] url: &str, #[case] expect: UrlTarget) {
    let rp = RustyPipe::builder().strict().build();
-    let target = rp.query().resolve_url(url).await.unwrap();
+    let target = rp.query().resolve_url(url, true).await.unwrap();
    assert_eq!(target, expect);
 }
@ -1182,10 +1183,12 @@ async fn resolve_url(#[case] url: &str, #[case] expect: UrlTarget) {
 #[case("dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})]
 #[case("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI", UrlTarget::Playlist {id: "PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI".to_owned()})]
 #[case("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk", UrlTarget::Playlist {id: "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk".to_owned()})]
 #[case("OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})]
 #[case("MPREb_GyH43gCvdM5", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})]
 #[tokio::test]
 async fn resolve_string(#[case] string: &str, #[case] expect: UrlTarget) {
    let rp = RustyPipe::builder().strict().build();
-    let target = rp.query().resolve_string(string).await.unwrap();
+    let target = rp.query().resolve_string(string, true).await.unwrap();
    assert_eq!(target, expect);
 }
@ -1194,7 +1197,7 @@ async fn resolve_channel_not_found() {
    let rp = RustyPipe::builder().strict().build();
    let err = rp
        .query()
-        .resolve_url("https://www.youtube.com/feeqegnhq3rkwghjq43ruih43io3")
+        .resolve_url("https://www.youtube.com/feeqegnhq3rkwghjq43ruih43io3", true)
        .await
        .unwrap_err();
Author	SHA1	Message	Date
ThetaDev	6d0302d3bf	fix: ignore channels in YTM search	2022-11-07 23:03:46 +01:00
ThetaDev	c263ba2345	feat: resolve music album urls	2022-11-07 23:01:10 +01:00