Compare commits

...

2 commits

Author SHA1 Message Date
6d0302d3bf fix: ignore channels in YTM search 2022-11-07 23:03:46 +01:00
c263ba2345 feat: resolve music album urls 2022-11-07 23:01:10 +01:00
6 changed files with 96 additions and 32 deletions

View file

@ -414,7 +414,8 @@ impl MusicListMapper {
Ok(MusicEntityType::Playlist)
}
PageType::Channel => {
Err(format!("channel items unsupported. id: {}", id))
// There may be broken YT channels from the artist search. They can be skipped.
Ok(MusicEntityType::Artist)
}
}
}

View file

@ -139,7 +139,7 @@ impl PageType {
pub(crate) fn to_url_target(self, id: String) -> UrlTarget {
match self {
PageType::Artist => UrlTarget::Channel { id },
PageType::Album => UrlTarget::Playlist { id },
PageType::Album => UrlTarget::Album { id },
PageType::Channel => UrlTarget::Channel { id },
PageType::Playlist => UrlTarget::Playlist { id },
}

View file

@ -20,7 +20,7 @@ struct QResolveUrl<'a> {
}
impl RustyPipeQuery {
pub async fn resolve_url(self, url: &str) -> Result<UrlTarget, Error> {
pub async fn resolve_url(self, url: &str, resolve_albums: bool) -> Result<UrlTarget, Error> {
let (url, params) = util::url_to_params(url)?;
let mut is_shortlink = url.domain().and_then(|d| match d {
@ -61,7 +61,16 @@ impl RustyPipeQuery {
.ok_or(Error::Other(Cow::Borrowed("invalid url: no playlist id")))?
.to_string();
Ok(UrlTarget::Playlist { id })
// YouTube Music album has to be resolved by the YTM API
if resolve_albums && id.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) {
self._navigation_resolve_url(
&format!("/playlist?list={}", id),
ClientType::DesktopMusic,
)
.await
} else {
Ok(UrlTarget::Playlist { id })
}
}
// Channel vanity URL or youtu.be shortlink
Some(mut id) => {
@ -86,7 +95,8 @@ impl RustyPipeQuery {
}
Some(false) => {
// Vanity URL (e.g. youtube.com/LinusTechTips) has to be resolved by the Innertube API
self._navigation_resolve_url(url.path()).await
self._navigation_resolve_url(url.path(), ClientType::Desktop)
.await
}
None => {
// We dont have the original YT domain, so this can be both
@ -98,7 +108,10 @@ impl RustyPipeQuery {
.is_match(url.path())
.unwrap_or_default()
{
match self._navigation_resolve_url(url.path()).await {
match self
._navigation_resolve_url(url.path(), ClientType::Desktop)
.await
{
Ok(target) => Ok(target),
Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => {
match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
@ -131,14 +144,19 @@ impl RustyPipeQuery {
Ok(target)
}
pub async fn resolve_string(self, string: &str) -> Result<UrlTarget, Error> {
pub async fn resolve_string(
self,
string: &str,
resolve_albums: bool,
) -> Result<UrlTarget, Error> {
// URL with protocol
if string.starts_with("http://") || string.starts_with("https://") {
self.resolve_url(string).await
self.resolve_url(string, resolve_albums).await
}
// URL without protocol
else if string.contains('/') && string.contains('.') {
self.resolve_url(&format!("https://{}", string)).await
self.resolve_url(&format!("https://{}", string), resolve_albums)
.await
}
// ID only
else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() {
@ -151,28 +169,54 @@ impl RustyPipeQuery {
id: string.to_owned(),
})
} else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() {
Ok(UrlTarget::Playlist {
if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) {
self._navigation_resolve_url(
&format!("/playlist?list={}", string),
ClientType::DesktopMusic,
)
.await
} else {
Ok(UrlTarget::Playlist {
id: string.to_owned(),
})
}
} else if util::ALBUM_ID_REGEX.is_match(string).unwrap_or_default() {
Ok(UrlTarget::Album {
id: string.to_owned(),
})
}
// Channel name only
else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() {
self._navigation_resolve_url(&format!("/{}", string.trim_start_matches('/')))
.await
self._navigation_resolve_url(
&format!("/{}", string.trim_start_matches('/')),
ClientType::Desktop,
)
.await
} else {
Err(Error::Other("invalid input string".into()))
}
}
async fn _navigation_resolve_url(&self, url_path: &str) -> Result<UrlTarget, Error> {
let context = self.get_context(ClientType::Desktop, true, None).await;
async fn _navigation_resolve_url(
&self,
url_path: &str,
ctype: ClientType,
) -> Result<UrlTarget, Error> {
let context = self.get_context(ctype, true, None).await;
let request_body = QResolveUrl {
context,
url: format!("https://www.youtube.com{}", url_path),
url: format!(
"https://{}.youtube.com{}",
match ctype {
ClientType::DesktopMusic => "music",
_ => "www",
},
url_path
),
};
self.execute_request::<response::ResolvedUrl, _, _>(
ClientType::Desktop,
ctype,
"channel_id",
&request_body.url,
"navigation/resolve_url",
@ -189,23 +233,24 @@ impl MapResponse<UrlTarget> for response::ResolvedUrl {
_lang: Language,
_deobf: Option<&crate::deobfuscate::Deobfuscator>,
) -> Result<MapResult<UrlTarget>, ExtractionError> {
let browse_endpoint = self
.endpoint
.browse_endpoint
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("No browse ID")))?;
let page_type = self
.endpoint
.command_metadata
.ok_or(ExtractionError::InvalidData(Cow::Borrowed(
"No command metadata",
)))?
.web_command_metadata
.web_page_type;
let id = self
.endpoint
.browse_endpoint
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("No browse ID")))?
.browse_id;
.map(|c| c.web_command_metadata.web_page_type)
.or_else(|| {
browse_endpoint
.browse_endpoint_context_supported_configs
.map(|c| c.browse_endpoint_context_music_config.page_type)
})
.ok_or(ExtractionError::InvalidData(Cow::Borrowed("No page type")))?;
Ok(MapResult {
c: page_type.to_url_target(id),
c: page_type.to_url_target(browse_endpoint.browse_id),
warnings: Vec::new(),
})
}

View file

@ -41,6 +41,7 @@ pub enum UrlTarget {
Video { id: String, start_time: u32 },
Channel { id: String },
Playlist { id: String },
Album { id: String },
}
impl ToString for UrlTarget {
@ -66,6 +67,11 @@ impl UrlTarget {
UrlTarget::Playlist { id } => {
format!("{}/playlist?list={}", yt_host, id)
}
UrlTarget::Album { id } => {
// The official album URLs use the playlist ID
// This looks weird, but it works
format!("{}/channel/{}", yt_host, id)
}
}
}
@ -89,6 +95,12 @@ impl UrlTarget {
false => Err(Error::Other("invalid playlist id".into())),
}
}
UrlTarget::Album { id } => {
match util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() {
true => Ok(()),
false => Err(Error::Other("invalid album id".into())),
}
}
}
}
}

View file

@ -23,7 +23,9 @@ pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-
pub static CHANNEL_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap());
pub static PLAYLIST_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?:PL|RD)[A-Za-z0-9_-]{30,}$").unwrap());
Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap());
pub static ALBUM_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap());
pub static VANITY_PATH_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap());
@ -32,6 +34,7 @@ pub const DOT_SEPARATOR: &str = " • ";
/// YouTube Music name (author of official playlists)
pub const YT_MUSIC_NAME: &str = "YouTube Music";
pub const VARIOUS_ARTISTS: &str = "Various Artists";
pub const PLAYLIST_ID_ALBUM_PREFIX: &str = "OLAK";
const CONTENT_PLAYBACK_NONCE_ALPHABET: &[u8; 64] =
b"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";

View file

@ -1165,10 +1165,11 @@ async fn search_suggestion_empty() {
#[case("https://piped.mha.fi/dQw4w9WgXcQ", UrlTarget::Channel {id: "UCoG6BrhgmivrkcbEHcYtK4Q".to_owned()})]
// Both a video ID and a channel name + video time param => returns video
#[case("https://piped.mha.fi/dQw4w9WgXcQ?t=0", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})]
#[case("https://music.youtube.com/playlist?list=OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})]
#[tokio::test]
async fn resolve_url(#[case] url: &str, #[case] expect: UrlTarget) {
let rp = RustyPipe::builder().strict().build();
let target = rp.query().resolve_url(url).await.unwrap();
let target = rp.query().resolve_url(url, true).await.unwrap();
assert_eq!(target, expect);
}
@ -1182,10 +1183,12 @@ async fn resolve_url(#[case] url: &str, #[case] expect: UrlTarget) {
#[case("dQw4w9WgXcQ", UrlTarget::Video {id: "dQw4w9WgXcQ".to_owned(), start_time: 0})]
#[case("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI", UrlTarget::Playlist {id: "PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI".to_owned()})]
#[case("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk", UrlTarget::Playlist {id: "RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk".to_owned()})]
#[case("OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})]
#[case("MPREb_GyH43gCvdM5", UrlTarget::Album {id: "MPREb_GyH43gCvdM5".to_owned()})]
#[tokio::test]
async fn resolve_string(#[case] string: &str, #[case] expect: UrlTarget) {
let rp = RustyPipe::builder().strict().build();
let target = rp.query().resolve_string(string).await.unwrap();
let target = rp.query().resolve_string(string, true).await.unwrap();
assert_eq!(target, expect);
}
@ -1194,7 +1197,7 @@ async fn resolve_channel_not_found() {
let rp = RustyPipe::builder().strict().build();
let err = rp
.query()
.resolve_url("https://www.youtube.com/feeqegnhq3rkwghjq43ruih43io3")
.resolve_url("https://www.youtube.com/feeqegnhq3rkwghjq43ruih43io3", true)
.await
.unwrap_err();