diff --git a/Cargo.toml b/Cargo.toml index 9e3e1b7..e429901 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,6 +26,7 @@ rustls-tls-native-roots = ["reqwest/rustls-tls-native-roots"] # quick-js = "0.4.1" quick-js = { path = "../quickjs-rs", default-features = false } once_cell = "1.12.0" +regex = "1.6.0" fancy-regex = "0.11.0" thiserror = "1.0.36" url = "2.2.2" diff --git a/src/client/mod.rs b/src/client/mod.rs index 66c8d60..41ce447 100644 --- a/src/client/mod.rs +++ b/src/client/mod.rs @@ -25,10 +25,10 @@ mod channel_rss; use std::sync::Arc; use std::{borrow::Cow, fmt::Debug}; -use fancy_regex::Regex; use log::{debug, error, warn}; use once_cell::sync::Lazy; use rand::Rng; +use regex::Regex; use reqwest::{header, Client, ClientBuilder, Request, RequestBuilder, Response}; use serde::{de::DeserializeOwned, Deserialize, Serialize}; use time::{Duration, OffsetDateTime}; diff --git a/src/client/music_artist.rs b/src/client/music_artist.rs index 91d734a..b16dab4 100644 --- a/src/client/music_artist.rs +++ b/src/client/music_artist.rs @@ -1,8 +1,8 @@ use std::{borrow::Cow, rc::Rc}; -use fancy_regex::Regex; use futures::{stream, StreamExt}; use once_cell::sync::Lazy; +use regex::Regex; use serde::Serialize; use crate::{ @@ -268,8 +268,6 @@ fn map_artist_page( let wikipedia_url = header.description.as_deref().and_then(|h| { WIKIPEDIA_REGEX .captures(h) - .ok() - .flatten() .and_then(|c| c.get(0)) .map(|m| m.as_str().to_owned()) }); diff --git a/src/client/player.rs b/src/client/player.rs index 74fe94b..c646b31 100644 --- a/src/client/player.rs +++ b/src/client/player.rs @@ -3,8 +3,8 @@ use std::{ collections::{BTreeMap, HashMap}, }; -use fancy_regex::Regex; use once_cell::sync::Lazy; +use regex::Regex; use serde::Serialize; use url::Url; @@ -530,8 +530,6 @@ fn map_audio_stream( Some(t) => { let lang = LANG_PATTERN .captures(&t.id) - .ok() - .flatten() .map(|m| m.get(1).unwrap().as_str().to_owned()); Some(AudioTrack { @@ -557,7 +555,7 @@ fn parse_mime(mime: &str) -> Option<(&str, Vec<&str>)> { static PATTERN: Lazy = Lazy::new(|| Regex::new(r#"(\w+/\w+);\scodecs="([a-zA-Z-0-9.,\s]*)""#).unwrap()); - let captures = some_or_bail!(PATTERN.captures(mime).ok().flatten(), None); + let captures = some_or_bail!(PATTERN.captures(mime), None); Some(( captures.get(1).unwrap().as_str(), captures diff --git a/src/client/response/channel_rss.rs b/src/client/response/channel_rss.rs index 017626b..b2a963a 100644 --- a/src/client/response/channel_rss.rs +++ b/src/client/response/channel_rss.rs @@ -97,7 +97,7 @@ impl From for crate::model::ChannelRss { .uri .strip_prefix("https://www.youtube.com/channel/") .and_then(|id| { - if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() { + if util::CHANNEL_ID_REGEX.is_match(id) { Some(id.to_owned()) } else { None diff --git a/src/client/response/video_item.rs b/src/client/response/video_item.rs index 7aab593..d877bb0 100644 --- a/src/client/response/video_item.rs +++ b/src/client/response/video_item.rs @@ -1,5 +1,5 @@ -use fancy_regex::Regex; use once_cell::sync::Lazy; +use regex::Regex; use serde::Deserialize; use serde_with::{ json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError, @@ -503,20 +503,12 @@ impl YouTubeListMapper { id: video.video_id, name: video.headline, length: video.accessibility.and_then(|acc| { - ACCESSIBILITY_SEP_REGEX - .captures(&acc) - .ok() - .flatten() - .and_then(|cap| { - cap.get(1).and_then(|c| { - timeago::parse_timeago_or_warn( - self.lang, - c.as_str(), - &mut self.warnings, - ) + ACCESSIBILITY_SEP_REGEX.captures(&acc).and_then(|cap| { + cap.get(1).and_then(|c| { + timeago::parse_timeago_or_warn(self.lang, c.as_str(), &mut self.warnings) .map(|ta| Duration::from(ta).whole_seconds() as u32) - }) }) + }) }), thumbnail: video.thumbnail.into(), channel: self.channel.clone(), diff --git a/src/client/url_resolver.rs b/src/client/url_resolver.rs index 05377bd..28ab508 100644 --- a/src/client/url_resolver.rs +++ b/src/client/url_resolver.rs @@ -112,9 +112,9 @@ impl RustyPipeQuery { // Album or channel Some("browse") => match path_split.next() { Some(id) => { - if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() { + if util::CHANNEL_ID_REGEX.is_match(id) { Ok(UrlTarget::Channel { id: id.to_owned() }) - } else if util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() { + } else if util::ALBUM_ID_REGEX.is_match(id) { Ok(UrlTarget::Album { id: id.to_owned() }) } else { Err(Error::Other("invalid url: no browse id".into())) @@ -153,10 +153,7 @@ impl RustyPipeQuery { // If there is a timestamp parameter, it has to be a video // First check the innertube API if this is a channel vanity url // If no channel is found and the identifier has the video ID format, assume it is a video - if !params.contains_key("t") - && util::VANITY_PATH_REGEX - .is_match(url.path()) - .unwrap_or_default() + if !params.contains_key("t") && util::VANITY_PATH_REGEX.is_match(url.path()) { match self ._navigation_resolve_url(url.path(), ClientType::Desktop) @@ -164,7 +161,7 @@ impl RustyPipeQuery { { Ok(target) => Ok(target), Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => { - match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { + match util::VIDEO_ID_REGEX.is_match(id) { true => Ok(UrlTarget::Video { id: id.to_owned(), start_time: get_start_time(), @@ -176,7 +173,7 @@ impl RustyPipeQuery { } Err(e) => Err(e), } - } else if util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { + } else if util::VIDEO_ID_REGEX.is_match(id) { Ok(UrlTarget::Video { id: id.to_owned(), start_time: get_start_time(), @@ -232,16 +229,16 @@ impl RustyPipeQuery { .await } // ID only - else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() { + else if util::VIDEO_ID_REGEX.is_match(string) { Ok(UrlTarget::Video { id: string.to_owned(), start_time: 0, }) - } else if util::CHANNEL_ID_REGEX.is_match(string).unwrap_or_default() { + } else if util::CHANNEL_ID_REGEX.is_match(string) { Ok(UrlTarget::Channel { id: string.to_owned(), }) - } else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() { + } else if util::PLAYLIST_ID_REGEX.is_match(string) { if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) { self._navigation_resolve_url( &format!("/playlist?list={}", string), @@ -253,13 +250,13 @@ impl RustyPipeQuery { id: string.to_owned(), }) } - } else if util::ALBUM_ID_REGEX.is_match(string).unwrap_or_default() { + } else if util::ALBUM_ID_REGEX.is_match(string) { Ok(UrlTarget::Album { id: string.to_owned(), }) } // Channel name only - else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() { + else if util::VANITY_PATH_REGEX.is_match(string) { self._navigation_resolve_url( &format!("/{}", string.trim_start_matches('/')), ClientType::Desktop, diff --git a/src/deobfuscate.rs b/src/deobfuscate.rs index 03e1b98..9753e95 100644 --- a/src/deobfuscate.rs +++ b/src/deobfuscate.rs @@ -1,6 +1,7 @@ -use fancy_regex::Regex; +use fancy_regex::Regex as FancyRegex; use log::debug; use once_cell::sync::Lazy; +use regex::Regex; use reqwest::Client; use serde::{Deserialize, Serialize}; use std::result::Result::Ok; @@ -68,18 +69,18 @@ impl From for Deobfuscator { const DEOBFUSCATION_FUNC_NAME: &str = "deobfuscate"; fn get_sig_fn_name(player_js: &str) -> Result { - static FUNCTION_REGEXES: Lazy<[Regex; 6]> = Lazy::new(|| { + static FUNCTION_REGEXES: Lazy<[FancyRegex; 6]> = Lazy::new(|| { [ - Regex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(), - Regex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(), - Regex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(), - Regex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), - Regex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), - Regex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(), + FancyRegex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(), + FancyRegex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(), + FancyRegex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(), + FancyRegex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), + FancyRegex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), + FancyRegex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(), ] }); - util::get_cg_from_regexes(FUNCTION_REGEXES.iter(), player_js, 1) + util::get_cg_from_fancy_regexes(FUNCTION_REGEXES.iter(), player_js, 1) .ok_or(DeobfError::Extraction("deobf function name")) } @@ -98,8 +99,6 @@ fn get_sig_fn(player_js: &str) -> Result { let deobfuscate_function = "var ".to_owned() + function_pattern .captures(player_js) - .ok() - .flatten() .ok_or(DeobfError::Extraction("deobf function"))? .get(1) .unwrap() @@ -110,8 +109,6 @@ fn get_sig_fn(player_js: &str) -> Result { Lazy::new(|| Regex::new(";([A-Za-z0-9_\\$]{2})\\...\\(").unwrap()); let helper_object_name = HELPER_OBJECT_NAME_REGEX .captures(&deobfuscate_function) - .ok() - .flatten() .ok_or(DeobfError::Extraction("helper object name"))? .get(1) .unwrap() @@ -124,8 +121,6 @@ fn get_sig_fn(player_js: &str) -> Result { let player_js_nonl = player_js.replace('\n', ""); let helper_object = helper_pattern .captures(&player_js_nonl) - .ok() - .flatten() .ok_or(DeobfError::Extraction("helper object"))? .get(1) .unwrap() @@ -154,8 +149,6 @@ fn get_nsig_fn_name(player_js: &str) -> Result { let fname_match = FUNCTION_NAME_REGEX .captures(player_js) - .ok() - .flatten() .ok_or(DeobfError::Extraction("n_deobf function"))?; let function_name = fname_match.get(1).unwrap().as_str(); @@ -171,15 +164,13 @@ fn get_nsig_fn_name(player_js: &str) -> Result { .parse::() .or(Err(DeobfError::Other("could not parse array_num")))?; let array_pattern_str = - "var ".to_owned() + &fancy_regex::escape(function_name) + "\\s*=\\s*\\[(.+?)];"; + "var ".to_owned() + ®ex::escape(function_name) + "\\s*=\\s*\\[(.+?)];"; let array_pattern = Regex::new(&array_pattern_str).or(Err(DeobfError::Other( "could not parse helper pattern regex", )))?; let array_str = array_pattern .captures(player_js) - .ok() - .flatten() .ok_or(DeobfError::Extraction("n_deobf array_str"))? .get(1) .unwrap() @@ -274,13 +265,10 @@ async fn get_player_js_url(http: &Client) -> Result { let text = resp.text().await?; static PLAYER_HASH_PATTERN: Lazy = Lazy::new(|| { - Regex::new(r#"https:\\\/\\\/www\.youtube\.com\\\/s\\\/player\\\/([a-z0-9]{8})\\\/"#) - .unwrap() + Regex::new(r#"https:\\/\\/www\.youtube\.com\\/s\\/player\\/([a-z0-9]{8})\\/"#).unwrap() }); let player_hash = PLAYER_HASH_PATTERN .captures(&text) - .ok() - .flatten() .ok_or(DeobfError::Extraction("player hash"))? .get(1) .unwrap() @@ -303,8 +291,6 @@ fn get_sts(player_js: &str) -> Result { Ok(STS_PATTERN .captures(player_js) - .ok() - .flatten() .ok_or(DeobfError::Extraction("sts"))? .get(1) .unwrap() diff --git a/src/lib.rs b/src/lib.rs index 644fada..534205b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -15,3 +15,4 @@ pub mod model; pub mod param; pub mod report; pub mod timeago; +pub mod validate; diff --git a/src/model/mod.rs b/src/model/mod.rs index 4f43750..adfa535 100644 --- a/src/model/mod.rs +++ b/src/model/mod.rs @@ -111,30 +111,22 @@ impl UrlTarget { /// Validate the YouTube ID from the URL target pub(crate) fn validate(&self) -> Result<(), Error> { match self { - UrlTarget::Video { id, .. } => { - match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { - true => Ok(()), - false => Err(Error::Other("invalid video id".into())), - } - } - UrlTarget::Channel { id } => { - match util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() { - true => Ok(()), - false => Err(Error::Other("invalid channel id".into())), - } - } - UrlTarget::Playlist { id } => { - match util::PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default() { - true => Ok(()), - false => Err(Error::Other("invalid playlist id".into())), - } - } - UrlTarget::Album { id } => { - match util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() { - true => Ok(()), - false => Err(Error::Other("invalid album id".into())), - } - } + UrlTarget::Video { id, .. } => match util::VIDEO_ID_REGEX.is_match(id) { + true => Ok(()), + false => Err(Error::Other("invalid video id".into())), + }, + UrlTarget::Channel { id } => match util::CHANNEL_ID_REGEX.is_match(id) { + true => Ok(()), + false => Err(Error::Other("invalid channel id".into())), + }, + UrlTarget::Playlist { id } => match util::PLAYLIST_ID_REGEX.is_match(id) { + true => Ok(()), + false => Err(Error::Other("invalid playlist id".into())), + }, + UrlTarget::Album { id } => match util::ALBUM_ID_REGEX.is_match(id) { + true => Ok(()), + false => Err(Error::Other("invalid album id".into())), + }, } } } diff --git a/src/serializer/text.rs b/src/serializer/text.rs index abe8c80..192d790 100644 --- a/src/serializer/text.rs +++ b/src/serializer/text.rs @@ -1,7 +1,7 @@ use std::convert::TryFrom; -use fancy_regex::Regex; use once_cell::sync::Lazy; +use regex::Regex; use serde::{Deserialize, Deserializer}; use serde_with::{serde_as, DeserializeAs}; diff --git a/src/util/mod.rs b/src/util/mod.rs index f8d546a..463ab87 100644 --- a/src/util/mod.rs +++ b/src/util/mod.rs @@ -13,9 +13,10 @@ use std::{ }; use base64::Engine; -use fancy_regex::Regex; +use fancy_regex::Regex as FancyRegex; use once_cell::sync::Lazy; use rand::Rng; +use regex::Regex; use url::Url; use crate::{error::Error, param::Language}; @@ -24,11 +25,12 @@ pub static VIDEO_ID_REGEX: Lazy = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_- pub static CHANNEL_ID_REGEX: Lazy = Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap()); pub static PLAYLIST_ID_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap()); + Lazy::new(|| Regex::new(r"^(?:PL|RDCLAK|OLAK)[A-Za-z0-9_-]{30,50}$").unwrap()); pub static ALBUM_ID_REGEX: Lazy = Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap()); -pub static VANITY_PATH_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap()); +pub static VANITY_PATH_REGEX: Lazy = Lazy::new(|| { + Regex::new(r"^/?(?:(?:c/|user/)?[A-z0-9]{1,100})|(?:@[A-z0-9-_.]{1,100})$").unwrap() +}); /// Separator string for YouTube Music subtitles pub const DOT_SEPARATOR: &str = " • "; @@ -49,6 +51,16 @@ pub struct MappingError(pub(crate) Cow<'static, str>); pub fn get_cg_from_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option where I: Iterator, +{ + regexes + .find_map(|pattern| pattern.captures(text)) + .map(|c| c.get(cg).unwrap().as_str().to_owned()) +} + +/// Return the given capture group that matches first in a list of fancy regexes +pub fn get_cg_from_fancy_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option +where + I: Iterator, { regexes .find_map(|pattern| pattern.captures(text).ok().flatten()) @@ -132,7 +144,7 @@ where pub fn parse_video_length(text: &str) -> Option { static VIDEO_LENGTH_REGEX: Lazy = Lazy::new(|| Regex::new(r#"(?:(\d+):)?(\d{1,2}):(\d{2})"#).unwrap()); - VIDEO_LENGTH_REGEX.captures(text).ok().flatten().map(|cap| { + VIDEO_LENGTH_REGEX.captures(text).map(|cap| { let hrs = cap .get(1) .and_then(|x| x.as_str().parse::().ok()) @@ -339,8 +351,6 @@ pub fn video_id_from_thumbnail_url(url: &str) -> Option { Lazy::new(|| Regex::new(r"^https://i.ytimg.com/vi/([A-Za-z0-9_-]{11})/").unwrap()); URL_REGEX .captures(url) - .ok() - .flatten() .and_then(|cap| cap.get(1).map(|x| x.as_str().to_owned())) } diff --git a/src/validate.rs b/src/validate.rs new file mode 100644 index 0000000..f82c2ad --- /dev/null +++ b/src/validate.rs @@ -0,0 +1,179 @@ +//! # Input validation +//! +//! The extraction functions of RustyPipe will produce errors when fed with invalid input data +//! (e.g. YouTube ID's with invalid format). Therefore you will need to validate all untrusted +//! input data beforehand. The library offers two options for this: +//! +//! - The [URL resolver](crate::client::RustyPipeQuery::resolve_url) or +//! [string resolver](crate::client::RustyPipeQuery::resolve_string) is great for handling +//! arbitrary input and returns a [`UrlTarget`](crate::model::UrlTarget) enum that tells you +//! whether the given URL points to a video, channel, playlist, etc. +//! - The validation functions of this module are meant vor validating concrete data (video IDs, +//! channel IDs, playlist IDs) and return [`true`] if the given input is valid + +use crate::util; +use once_cell::sync::Lazy; +use regex::Regex; + +/// Validate the given video ID +/// +/// YouTube video IDs are exactly 11 characters long and consist of the charactes `A-Za-z0-9_-`. +/// +/// # Examples +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::video_id("dQw4w9WgXcQ")); +/// assert!(!validate::video_id("Abcd")); +/// assert!(!validate::video_id("dQw4w9WgXc@")); +/// ``` +pub fn video_id>(video_id: S) -> bool { + util::VIDEO_ID_REGEX.is_match(video_id.as_ref()) +} + +/// Validate the given channel ID +/// +/// YouTube channel IDs are exactly 24 characters long, start with the characters `UC`, +/// followed by 22 of these characters: `A-Za-z0-9_-`. +/// +/// # Examples +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::channel_id("UC2DjFE7Xf11URZqWBigcVOQ")); +/// assert!(!validate::channel_id("Abcd")); +/// assert!(!validate::channel_id("XY2DjFE7Xf11URZqWBigcVOQ")); +/// ``` +pub fn channel_id>(channel_id: S) -> bool { + util::CHANNEL_ID_REGEX.is_match(channel_id.as_ref()) +} + +/// Validate the given playlist ID +/// +/// YouTube playlist IDs start with the characters `PL` (user-created playlist), +/// `RDCLAK` (YouTube Music-curated playlist) or `OLAK` (YouTube Music album), +/// followed by at least 30 of these characters: `A-Za-z0-9_-`. +/// +/// # Examples +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::playlist_id("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI")); +/// assert!(validate::playlist_id("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk")); +/// assert!(validate::playlist_id("OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE")); +/// +/// assert!(!validate::playlist_id("Abcd")); +/// ``` +pub fn playlist_id>(playlist_id: S) -> bool { + util::PLAYLIST_ID_REGEX.is_match(playlist_id.as_ref()) +} + +/// Validate the given album ID +/// +/// YouTube Music album IDs are exactly 17 characters long, start with the characters `MPREB_`, +/// followed by 11 of these characters: `A-Za-z0-9_-`. +/// +/// # Examples +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::album_id("MPREb_GyH43gCvdM5")); +/// assert!(!validate::album_id("Abcd_GyH43gCvdM5")); +/// ``` +/// +/// # Note +/// +/// Albums on YouTube Music have an album ID (`MPREB_...`) and a playlist ID +/// (`OLAK...`). If you open an album on the YouTube Music website, the address bar shows +/// the playlist ID, not the album ID. +/// +/// If you have the playlist ID of an album and need the album ID, you can use the +/// [string resolver](crate::client::RustyPipeQuery::resolve_string) with the `resolve_albums` +/// option enabled. +pub fn album_id>(album_id: S) -> bool { + util::ALBUM_ID_REGEX.is_match(album_id.as_ref()) +} + +/// Validate the given radio ID +/// +/// YouTube radio IDs start with the characters `RD`, +/// followed by at least 22 of these characters: `A-Za-z0-9_-`. +/// +/// # Radio types +/// +/// - Artist radio: `RDEMSuoM_jxfse1_g8uCO7MCtg` +/// - Genre radio: `RDQM1xqCV6EdPUw` +/// - Shuffle radio: `RDAOVeZA-2uzuUKdoB81Ha3srw` +/// - Playlist radio (`RDAMPL` + playlist ID): `RDAMPLPL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI` +/// - Track radio (`RDAMVM` + video ID): `RDAMVMZeerrnuLi5E` +/// +/// # Examples +/// +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::radio_id("RDEMSuoM_jxfse1_g8uCO7MCtg")); +/// assert!(!validate::radio_id("Abcd")); +/// assert!(!validate::radio_id("XYEMSuoM_jxfse1_g8uCO7MCtg")); +/// ``` +pub fn radio_id>(radio_id: S) -> bool { + static RADIO_ID_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^RD[A-Za-z0-9_-]{22,50}$").unwrap()); + + RADIO_ID_REGEX.is_match(radio_id.as_ref()) +} + +/// Validate the given genre ID +/// +/// YouTube genre IDs are exactly 24 characters long, start with the characters `ggMPO`, +/// followed by 19 of these characters: `A-Za-z0-9_-`. +/// +/// # Examples +/// +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::genre_id("ggMPOg1uX1JOQWZFeDByc2Jm")); +/// assert!(!validate::genre_id("Abcd")); +/// assert!(!validate::genre_id("ggAbcg1uX1JOQWZFeDByc2Jm")); +/// ``` +pub fn genre_id>(genre_id: S) -> bool { + static GENRE_ID_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^ggMPO[A-Za-z0-9_-]{19}$").unwrap()); + + GENRE_ID_REGEX.is_match(genre_id.as_ref()) +} + +/// Validate the given related ID +/// +/// YouTube related IDs are exactly 17 characters long, start with the characters `MPTRt_`, +/// followed by 11 of these characters: `A-Za-z0-9_-`. +/// +/// # Examples +/// +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::track_related_id("MPTRt_wrKjTn9hmry")); +/// assert!(!validate::track_related_id("Abcd")); +/// assert!(!validate::track_related_id("Abcdt_wrKjTn9hmry")); +/// ``` +pub fn track_related_id>(related_id: S) -> bool { + static RELATED_ID_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^MPTRt_[A-Za-z0-9_-]{11}$").unwrap()); + + RELATED_ID_REGEX.is_match(related_id.as_ref()) +} + +/// Validate the given lyrics ID +/// +/// YouTube lyrics IDs are exactly 17 characters long, start with the characters `MPLYt_`, +/// followed by 11 of these characters: `A-Za-z0-9_-`. +/// +/// # Examples +/// +/// ``` +/// # use rustypipe::validate; +/// assert!(validate::track_lyrics_id("MPLYt_wrKjTn9hmry")); +/// assert!(!validate::track_lyrics_id("Abcd")); +/// assert!(!validate::track_lyrics_id("Abcdt_wrKjTn9hmry")); +/// ``` +pub fn track_lyrics_id>(lyrics_id: S) -> bool { + static LYRICS_ID_REGEX: Lazy = + Lazy::new(|| Regex::new(r"^MPLYt_[A-Za-z0-9_-]{11}$").unwrap()); + + LYRICS_ID_REGEX.is_match(lyrics_id.as_ref()) +} diff --git a/tests/youtube.rs b/tests/youtube.rs index 28ba4ff..0bf7260 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -1,9 +1,8 @@ use std::collections::HashSet; use std::fmt::Display; -use fancy_regex::Regex; -use once_cell::sync::Lazy; use rstest::rstest; +use rustypipe::validate; use time::macros::date; use time::OffsetDateTime; @@ -2121,6 +2120,7 @@ async fn music_genres() { assert!(!pop.is_mood); genres.iter().for_each(|g| { + assert!(validate::genre_id(&g.id)); assert_gte(g.color, 0xff000000, "color"); }); } @@ -2270,44 +2270,17 @@ async fn assert_next_items>( } fn assert_video_id(id: &str) { - static VIDEO_ID_REGEX: Lazy = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap()); - - assert!( - VIDEO_ID_REGEX.is_match(id).unwrap_or_default(), - "invalid video id: `{}`", - id - ); + assert!(validate::video_id(id), "invalid video id: `{}`", id) } fn assert_channel_id(id: &str) { - static CHANNEL_ID_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap()); - - assert!( - CHANNEL_ID_REGEX.is_match(id).unwrap_or_default(), - "invalid channel id: `{}`", - id - ); + assert!(validate::channel_id(id), "invalid channel id: `{}`", id); } fn assert_album_id(id: &str) { - static ALBUM_ID_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap()); - - assert!( - ALBUM_ID_REGEX.is_match(id).unwrap_or_default(), - "invalid album id: `{}`", - id - ); + assert!(validate::album_id(id), "invalid album id: `{}`", id); } fn assert_playlist_id(id: &str) { - static PLAYLIST_ID_REGEX: Lazy = - Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap()); - - assert!( - PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default(), - "invalid album id: `{}`", - id - ); + assert!(validate::playlist_id(id), "invalid playlist id: `{}`", id); }