Compare commits
	
		
			No commits in common. "f7fbf40721fc54edaaed152457201148ab4f0d75" and "4cc069fba273d0b8ea40395fc2032bc6729ffadb" have entirely different histories.
		
	
	
		
			
				f7fbf40721
			
			...
			
				4cc069fba2
			
		
	
		
					 14 changed files with 126 additions and 253 deletions
				
			
		|  | @ -26,7 +26,6 @@ rustls-tls-native-roots = ["reqwest/rustls-tls-native-roots"] | |||
| # quick-js = "0.4.1" | ||||
| quick-js = { path = "../quickjs-rs", default-features = false } | ||||
| once_cell = "1.12.0" | ||||
| regex = "1.6.0" | ||||
| fancy-regex = "0.11.0" | ||||
| thiserror = "1.0.36" | ||||
| url = "2.2.2" | ||||
|  |  | |||
|  | @ -25,10 +25,10 @@ mod channel_rss; | |||
| use std::sync::Arc; | ||||
| use std::{borrow::Cow, fmt::Debug}; | ||||
| 
 | ||||
| use fancy_regex::Regex; | ||||
| use log::{debug, error, warn}; | ||||
| use once_cell::sync::Lazy; | ||||
| use rand::Rng; | ||||
| use regex::Regex; | ||||
| use reqwest::{header, Client, ClientBuilder, Request, RequestBuilder, Response}; | ||||
| use serde::{de::DeserializeOwned, Deserialize, Serialize}; | ||||
| use time::{Duration, OffsetDateTime}; | ||||
|  |  | |||
|  | @ -1,8 +1,8 @@ | |||
| use std::{borrow::Cow, rc::Rc}; | ||||
| 
 | ||||
| use fancy_regex::Regex; | ||||
| use futures::{stream, StreamExt}; | ||||
| use once_cell::sync::Lazy; | ||||
| use regex::Regex; | ||||
| use serde::Serialize; | ||||
| 
 | ||||
| use crate::{ | ||||
|  | @ -268,6 +268,8 @@ fn map_artist_page( | |||
|     let wikipedia_url = header.description.as_deref().and_then(|h| { | ||||
|         WIKIPEDIA_REGEX | ||||
|             .captures(h) | ||||
|             .ok() | ||||
|             .flatten() | ||||
|             .and_then(|c| c.get(0)) | ||||
|             .map(|m| m.as_str().to_owned()) | ||||
|     }); | ||||
|  |  | |||
|  | @ -3,8 +3,8 @@ use std::{ | |||
|     collections::{BTreeMap, HashMap}, | ||||
| }; | ||||
| 
 | ||||
| use fancy_regex::Regex; | ||||
| use once_cell::sync::Lazy; | ||||
| use regex::Regex; | ||||
| use serde::Serialize; | ||||
| use url::Url; | ||||
| 
 | ||||
|  | @ -530,6 +530,8 @@ fn map_audio_stream( | |||
|                     Some(t) => { | ||||
|                         let lang = LANG_PATTERN | ||||
|                             .captures(&t.id) | ||||
|                             .ok() | ||||
|                             .flatten() | ||||
|                             .map(|m| m.get(1).unwrap().as_str().to_owned()); | ||||
| 
 | ||||
|                         Some(AudioTrack { | ||||
|  | @ -555,7 +557,7 @@ fn parse_mime(mime: &str) -> Option<(&str, Vec<&str>)> { | |||
|     static PATTERN: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r#"(\w+/\w+);\scodecs="([a-zA-Z-0-9.,\s]*)""#).unwrap()); | ||||
| 
 | ||||
|     let captures = some_or_bail!(PATTERN.captures(mime), None); | ||||
|     let captures = some_or_bail!(PATTERN.captures(mime).ok().flatten(), None); | ||||
|     Some(( | ||||
|         captures.get(1).unwrap().as_str(), | ||||
|         captures | ||||
|  |  | |||
|  | @ -97,7 +97,7 @@ impl From<ChannelRss> for crate::model::ChannelRss { | |||
|                         .uri | ||||
|                         .strip_prefix("https://www.youtube.com/channel/") | ||||
|                         .and_then(|id| { | ||||
|                             if util::CHANNEL_ID_REGEX.is_match(id) { | ||||
|                             if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                                 Some(id.to_owned()) | ||||
|                             } else { | ||||
|                                 None | ||||
|  |  | |||
|  | @ -1,5 +1,5 @@ | |||
| use fancy_regex::Regex; | ||||
| use once_cell::sync::Lazy; | ||||
| use regex::Regex; | ||||
| use serde::Deserialize; | ||||
| use serde_with::{ | ||||
|     json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError, | ||||
|  | @ -503,12 +503,20 @@ impl<T> YouTubeListMapper<T> { | |||
|             id: video.video_id, | ||||
|             name: video.headline, | ||||
|             length: video.accessibility.and_then(|acc| { | ||||
|                 ACCESSIBILITY_SEP_REGEX.captures(&acc).and_then(|cap| { | ||||
|                     cap.get(1).and_then(|c| { | ||||
|                         timeago::parse_timeago_or_warn(self.lang, c.as_str(), &mut self.warnings) | ||||
|                 ACCESSIBILITY_SEP_REGEX | ||||
|                     .captures(&acc) | ||||
|                     .ok() | ||||
|                     .flatten() | ||||
|                     .and_then(|cap| { | ||||
|                         cap.get(1).and_then(|c| { | ||||
|                             timeago::parse_timeago_or_warn( | ||||
|                                 self.lang, | ||||
|                                 c.as_str(), | ||||
|                                 &mut self.warnings, | ||||
|                             ) | ||||
|                             .map(|ta| Duration::from(ta).whole_seconds() as u32) | ||||
|                         }) | ||||
|                     }) | ||||
|                 }) | ||||
|             }), | ||||
|             thumbnail: video.thumbnail.into(), | ||||
|             channel: self.channel.clone(), | ||||
|  |  | |||
|  | @ -112,9 +112,9 @@ impl RustyPipeQuery { | |||
|             // Album or channel
 | ||||
|             Some("browse") => match path_split.next() { | ||||
|                 Some(id) => { | ||||
|                     if util::CHANNEL_ID_REGEX.is_match(id) { | ||||
|                     if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                         Ok(UrlTarget::Channel { id: id.to_owned() }) | ||||
|                     } else if util::ALBUM_ID_REGEX.is_match(id) { | ||||
|                     } else if util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                         Ok(UrlTarget::Album { id: id.to_owned() }) | ||||
|                     } else { | ||||
|                         Err(Error::Other("invalid url: no browse id".into())) | ||||
|  | @ -153,7 +153,10 @@ impl RustyPipeQuery { | |||
|                         // If there is a timestamp parameter, it has to be a video
 | ||||
|                         // First check the innertube API if this is a channel vanity url
 | ||||
|                         // If no channel is found and the identifier has the video ID format, assume it is a video
 | ||||
|                         if !params.contains_key("t") && util::VANITY_PATH_REGEX.is_match(url.path()) | ||||
|                         if !params.contains_key("t") | ||||
|                             && util::VANITY_PATH_REGEX | ||||
|                                 .is_match(url.path()) | ||||
|                                 .unwrap_or_default() | ||||
|                         { | ||||
|                             match self | ||||
|                                 ._navigation_resolve_url(url.path(), ClientType::Desktop) | ||||
|  | @ -161,7 +164,7 @@ impl RustyPipeQuery { | |||
|                             { | ||||
|                                 Ok(target) => Ok(target), | ||||
|                                 Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => { | ||||
|                                     match util::VIDEO_ID_REGEX.is_match(id) { | ||||
|                                     match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                                         true => Ok(UrlTarget::Video { | ||||
|                                             id: id.to_owned(), | ||||
|                                             start_time: get_start_time(), | ||||
|  | @ -173,7 +176,7 @@ impl RustyPipeQuery { | |||
|                                 } | ||||
|                                 Err(e) => Err(e), | ||||
|                             } | ||||
|                         } else if util::VIDEO_ID_REGEX.is_match(id) { | ||||
|                         } else if util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                             Ok(UrlTarget::Video { | ||||
|                                 id: id.to_owned(), | ||||
|                                 start_time: get_start_time(), | ||||
|  | @ -229,16 +232,16 @@ impl RustyPipeQuery { | |||
|                 .await | ||||
|         } | ||||
|         // ID only
 | ||||
|         else if util::VIDEO_ID_REGEX.is_match(string) { | ||||
|         else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() { | ||||
|             Ok(UrlTarget::Video { | ||||
|                 id: string.to_owned(), | ||||
|                 start_time: 0, | ||||
|             }) | ||||
|         } else if util::CHANNEL_ID_REGEX.is_match(string) { | ||||
|         } else if util::CHANNEL_ID_REGEX.is_match(string).unwrap_or_default() { | ||||
|             Ok(UrlTarget::Channel { | ||||
|                 id: string.to_owned(), | ||||
|             }) | ||||
|         } else if util::PLAYLIST_ID_REGEX.is_match(string) { | ||||
|         } else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() { | ||||
|             if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) { | ||||
|                 self._navigation_resolve_url( | ||||
|                     &format!("/playlist?list={}", string), | ||||
|  | @ -250,13 +253,13 @@ impl RustyPipeQuery { | |||
|                     id: string.to_owned(), | ||||
|                 }) | ||||
|             } | ||||
|         } else if util::ALBUM_ID_REGEX.is_match(string) { | ||||
|         } else if util::ALBUM_ID_REGEX.is_match(string).unwrap_or_default() { | ||||
|             Ok(UrlTarget::Album { | ||||
|                 id: string.to_owned(), | ||||
|             }) | ||||
|         } | ||||
|         // Channel name only
 | ||||
|         else if util::VANITY_PATH_REGEX.is_match(string) { | ||||
|         else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() { | ||||
|             self._navigation_resolve_url( | ||||
|                 &format!("/{}", string.trim_start_matches('/')), | ||||
|                 ClientType::Desktop, | ||||
|  |  | |||
|  | @ -1,7 +1,6 @@ | |||
| use fancy_regex::Regex as FancyRegex; | ||||
| use fancy_regex::Regex; | ||||
| use log::debug; | ||||
| use once_cell::sync::Lazy; | ||||
| use regex::Regex; | ||||
| use reqwest::Client; | ||||
| use serde::{Deserialize, Serialize}; | ||||
| use std::result::Result::Ok; | ||||
|  | @ -69,18 +68,18 @@ impl From<DeobfData> for Deobfuscator { | |||
| const DEOBFUSCATION_FUNC_NAME: &str = "deobfuscate"; | ||||
| 
 | ||||
| fn get_sig_fn_name(player_js: &str) -> Result<String> { | ||||
|     static FUNCTION_REGEXES: Lazy<[FancyRegex; 6]> = Lazy::new(|| { | ||||
|     static FUNCTION_REGEXES: Lazy<[Regex; 6]> = Lazy::new(|| { | ||||
|         [ | ||||
|         FancyRegex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(), | ||||
|         FancyRegex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(), | ||||
|         FancyRegex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(), | ||||
|         FancyRegex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), | ||||
|         FancyRegex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), | ||||
|         FancyRegex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(), | ||||
|         Regex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(), | ||||
|         Regex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(), | ||||
|         Regex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(), | ||||
|         Regex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), | ||||
|         Regex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), | ||||
|         Regex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(), | ||||
|     ] | ||||
|     }); | ||||
| 
 | ||||
|     util::get_cg_from_fancy_regexes(FUNCTION_REGEXES.iter(), player_js, 1) | ||||
|     util::get_cg_from_regexes(FUNCTION_REGEXES.iter(), player_js, 1) | ||||
|         .ok_or(DeobfError::Extraction("deobf function name")) | ||||
| } | ||||
| 
 | ||||
|  | @ -99,6 +98,8 @@ fn get_sig_fn(player_js: &str) -> Result<String> { | |||
|     let deobfuscate_function = "var ".to_owned() | ||||
|         + function_pattern | ||||
|             .captures(player_js) | ||||
|             .ok() | ||||
|             .flatten() | ||||
|             .ok_or(DeobfError::Extraction("deobf function"))? | ||||
|             .get(1) | ||||
|             .unwrap() | ||||
|  | @ -109,6 +110,8 @@ fn get_sig_fn(player_js: &str) -> Result<String> { | |||
|         Lazy::new(|| Regex::new(";([A-Za-z0-9_\\$]{2})\\...\\(").unwrap()); | ||||
|     let helper_object_name = HELPER_OBJECT_NAME_REGEX | ||||
|         .captures(&deobfuscate_function) | ||||
|         .ok() | ||||
|         .flatten() | ||||
|         .ok_or(DeobfError::Extraction("helper object name"))? | ||||
|         .get(1) | ||||
|         .unwrap() | ||||
|  | @ -121,6 +124,8 @@ fn get_sig_fn(player_js: &str) -> Result<String> { | |||
|     let player_js_nonl = player_js.replace('\n', ""); | ||||
|     let helper_object = helper_pattern | ||||
|         .captures(&player_js_nonl) | ||||
|         .ok() | ||||
|         .flatten() | ||||
|         .ok_or(DeobfError::Extraction("helper object"))? | ||||
|         .get(1) | ||||
|         .unwrap() | ||||
|  | @ -149,6 +154,8 @@ fn get_nsig_fn_name(player_js: &str) -> Result<String> { | |||
| 
 | ||||
|     let fname_match = FUNCTION_NAME_REGEX | ||||
|         .captures(player_js) | ||||
|         .ok() | ||||
|         .flatten() | ||||
|         .ok_or(DeobfError::Extraction("n_deobf function"))?; | ||||
| 
 | ||||
|     let function_name = fname_match.get(1).unwrap().as_str(); | ||||
|  | @ -164,13 +171,15 @@ fn get_nsig_fn_name(player_js: &str) -> Result<String> { | |||
|         .parse::<usize>() | ||||
|         .or(Err(DeobfError::Other("could not parse array_num")))?; | ||||
|     let array_pattern_str = | ||||
|         "var ".to_owned() + ®ex::escape(function_name) + "\\s*=\\s*\\[(.+?)];"; | ||||
|         "var ".to_owned() + &fancy_regex::escape(function_name) + "\\s*=\\s*\\[(.+?)];"; | ||||
|     let array_pattern = Regex::new(&array_pattern_str).or(Err(DeobfError::Other( | ||||
|         "could not parse helper pattern regex", | ||||
|     )))?; | ||||
| 
 | ||||
|     let array_str = array_pattern | ||||
|         .captures(player_js) | ||||
|         .ok() | ||||
|         .flatten() | ||||
|         .ok_or(DeobfError::Extraction("n_deobf array_str"))? | ||||
|         .get(1) | ||||
|         .unwrap() | ||||
|  | @ -265,10 +274,13 @@ async fn get_player_js_url(http: &Client) -> Result<String> { | |||
|     let text = resp.text().await?; | ||||
| 
 | ||||
|     static PLAYER_HASH_PATTERN: Lazy<Regex> = Lazy::new(|| { | ||||
|         Regex::new(r#"https:\\/\\/www\.youtube\.com\\/s\\/player\\/([a-z0-9]{8})\\/"#).unwrap() | ||||
|         Regex::new(r#"https:\\\/\\\/www\.youtube\.com\\\/s\\\/player\\\/([a-z0-9]{8})\\\/"#) | ||||
|             .unwrap() | ||||
|     }); | ||||
|     let player_hash = PLAYER_HASH_PATTERN | ||||
|         .captures(&text) | ||||
|         .ok() | ||||
|         .flatten() | ||||
|         .ok_or(DeobfError::Extraction("player hash"))? | ||||
|         .get(1) | ||||
|         .unwrap() | ||||
|  | @ -291,6 +303,8 @@ fn get_sts(player_js: &str) -> Result<String> { | |||
| 
 | ||||
|     Ok(STS_PATTERN | ||||
|         .captures(player_js) | ||||
|         .ok() | ||||
|         .flatten() | ||||
|         .ok_or(DeobfError::Extraction("sts"))? | ||||
|         .get(1) | ||||
|         .unwrap() | ||||
|  |  | |||
|  | @ -15,4 +15,3 @@ pub mod model; | |||
| pub mod param; | ||||
| pub mod report; | ||||
| pub mod timeago; | ||||
| pub mod validate; | ||||
|  |  | |||
|  | @ -111,22 +111,30 @@ impl UrlTarget { | |||
|     /// Validate the YouTube ID from the URL target
 | ||||
|     pub(crate) fn validate(&self) -> Result<(), Error> { | ||||
|         match self { | ||||
|             UrlTarget::Video { id, .. } => match util::VIDEO_ID_REGEX.is_match(id) { | ||||
|                 true => Ok(()), | ||||
|                 false => Err(Error::Other("invalid video id".into())), | ||||
|             }, | ||||
|             UrlTarget::Channel { id } => match util::CHANNEL_ID_REGEX.is_match(id) { | ||||
|                 true => Ok(()), | ||||
|                 false => Err(Error::Other("invalid channel id".into())), | ||||
|             }, | ||||
|             UrlTarget::Playlist { id } => match util::PLAYLIST_ID_REGEX.is_match(id) { | ||||
|                 true => Ok(()), | ||||
|                 false => Err(Error::Other("invalid playlist id".into())), | ||||
|             }, | ||||
|             UrlTarget::Album { id } => match util::ALBUM_ID_REGEX.is_match(id) { | ||||
|                 true => Ok(()), | ||||
|                 false => Err(Error::Other("invalid album id".into())), | ||||
|             }, | ||||
|             UrlTarget::Video { id, .. } => { | ||||
|                 match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                     true => Ok(()), | ||||
|                     false => Err(Error::Other("invalid video id".into())), | ||||
|                 } | ||||
|             } | ||||
|             UrlTarget::Channel { id } => { | ||||
|                 match util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                     true => Ok(()), | ||||
|                     false => Err(Error::Other("invalid channel id".into())), | ||||
|                 } | ||||
|             } | ||||
|             UrlTarget::Playlist { id } => { | ||||
|                 match util::PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                     true => Ok(()), | ||||
|                     false => Err(Error::Other("invalid playlist id".into())), | ||||
|                 } | ||||
|             } | ||||
|             UrlTarget::Album { id } => { | ||||
|                 match util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() { | ||||
|                     true => Ok(()), | ||||
|                     false => Err(Error::Other("invalid album id".into())), | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  |  | |||
|  | @ -1,7 +1,7 @@ | |||
| use std::convert::TryFrom; | ||||
| 
 | ||||
| use fancy_regex::Regex; | ||||
| use once_cell::sync::Lazy; | ||||
| use regex::Regex; | ||||
| use serde::{Deserialize, Deserializer}; | ||||
| use serde_with::{serde_as, DeserializeAs}; | ||||
| 
 | ||||
|  |  | |||
|  | @ -13,10 +13,9 @@ use std::{ | |||
| }; | ||||
| 
 | ||||
| use base64::Engine; | ||||
| use fancy_regex::Regex as FancyRegex; | ||||
| use fancy_regex::Regex; | ||||
| use once_cell::sync::Lazy; | ||||
| use rand::Rng; | ||||
| use regex::Regex; | ||||
| use url::Url; | ||||
| 
 | ||||
| use crate::{error::Error, param::Language}; | ||||
|  | @ -25,12 +24,11 @@ pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_- | |||
| pub static CHANNEL_ID_REGEX: Lazy<Regex> = | ||||
|     Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap()); | ||||
| pub static PLAYLIST_ID_REGEX: Lazy<Regex> = | ||||
|     Lazy::new(|| Regex::new(r"^(?:PL|RDCLAK|OLAK)[A-Za-z0-9_-]{30,50}$").unwrap()); | ||||
|     Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap()); | ||||
| pub static ALBUM_ID_REGEX: Lazy<Regex> = | ||||
|     Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap()); | ||||
| pub static VANITY_PATH_REGEX: Lazy<Regex> = Lazy::new(|| { | ||||
|     Regex::new(r"^/?(?:(?:c/|user/)?[A-z0-9]{1,100})|(?:@[A-z0-9-_.]{1,100})$").unwrap() | ||||
| }); | ||||
| pub static VANITY_PATH_REGEX: Lazy<Regex> = | ||||
|     Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap()); | ||||
| 
 | ||||
| /// Separator string for YouTube Music subtitles
 | ||||
| pub const DOT_SEPARATOR: &str = " • "; | ||||
|  | @ -51,16 +49,6 @@ pub struct MappingError(pub(crate) Cow<'static, str>); | |||
| pub fn get_cg_from_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String> | ||||
| where | ||||
|     I: Iterator<Item = &'a Regex>, | ||||
| { | ||||
|     regexes | ||||
|         .find_map(|pattern| pattern.captures(text)) | ||||
|         .map(|c| c.get(cg).unwrap().as_str().to_owned()) | ||||
| } | ||||
| 
 | ||||
| /// Return the given capture group that matches first in a list of fancy regexes
 | ||||
| pub fn get_cg_from_fancy_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String> | ||||
| where | ||||
|     I: Iterator<Item = &'a FancyRegex>, | ||||
| { | ||||
|     regexes | ||||
|         .find_map(|pattern| pattern.captures(text).ok().flatten()) | ||||
|  | @ -144,7 +132,7 @@ where | |||
| pub fn parse_video_length(text: &str) -> Option<u32> { | ||||
|     static VIDEO_LENGTH_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r#"(?:(\d+):)?(\d{1,2}):(\d{2})"#).unwrap()); | ||||
|     VIDEO_LENGTH_REGEX.captures(text).map(|cap| { | ||||
|     VIDEO_LENGTH_REGEX.captures(text).ok().flatten().map(|cap| { | ||||
|         let hrs = cap | ||||
|             .get(1) | ||||
|             .and_then(|x| x.as_str().parse::<u32>().ok()) | ||||
|  | @ -351,6 +339,8 @@ pub fn video_id_from_thumbnail_url(url: &str) -> Option<String> { | |||
|         Lazy::new(|| Regex::new(r"^https://i.ytimg.com/vi/([A-Za-z0-9_-]{11})/").unwrap()); | ||||
|     URL_REGEX | ||||
|         .captures(url) | ||||
|         .ok() | ||||
|         .flatten() | ||||
|         .and_then(|cap| cap.get(1).map(|x| x.as_str().to_owned())) | ||||
| } | ||||
| 
 | ||||
|  |  | |||
							
								
								
									
										179
									
								
								src/validate.rs
									
										
									
									
									
								
							
							
						
						
									
										179
									
								
								src/validate.rs
									
										
									
									
									
								
							|  | @ -1,179 +0,0 @@ | |||
| //! # Input validation
 | ||||
| //!
 | ||||
| //! The extraction functions of RustyPipe will produce errors when fed with invalid input data
 | ||||
| //! (e.g. YouTube ID's with invalid format). Therefore you will need to validate all untrusted
 | ||||
| //! input data beforehand. The library offers two options for this:
 | ||||
| //!
 | ||||
| //! - The [URL resolver](crate::client::RustyPipeQuery::resolve_url) or
 | ||||
| //!   [string resolver](crate::client::RustyPipeQuery::resolve_string) is great for handling
 | ||||
| //!   arbitrary input and returns a [`UrlTarget`](crate::model::UrlTarget) enum that tells you
 | ||||
| //!   whether the given URL points to a video, channel, playlist, etc.
 | ||||
| //! - The validation functions of this module are meant vor validating concrete data (video IDs,
 | ||||
| //!   channel IDs, playlist IDs) and return [`true`] if the given input is valid
 | ||||
| 
 | ||||
| use crate::util; | ||||
| use once_cell::sync::Lazy; | ||||
| use regex::Regex; | ||||
| 
 | ||||
| /// Validate the given video ID
 | ||||
| ///
 | ||||
| /// YouTube video IDs are exactly 11 characters long and consist of the charactes `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::video_id("dQw4w9WgXcQ"));
 | ||||
| /// assert!(!validate::video_id("Abcd"));
 | ||||
| /// assert!(!validate::video_id("dQw4w9WgXc@"));
 | ||||
| /// ```
 | ||||
| pub fn video_id<S: AsRef<str>>(video_id: S) -> bool { | ||||
|     util::VIDEO_ID_REGEX.is_match(video_id.as_ref()) | ||||
| } | ||||
| 
 | ||||
| /// Validate the given channel ID
 | ||||
| ///
 | ||||
| /// YouTube channel IDs are exactly 24 characters long, start with the characters `UC`,
 | ||||
| /// followed by 22 of these characters: `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::channel_id("UC2DjFE7Xf11URZqWBigcVOQ"));
 | ||||
| /// assert!(!validate::channel_id("Abcd"));
 | ||||
| /// assert!(!validate::channel_id("XY2DjFE7Xf11URZqWBigcVOQ"));
 | ||||
| /// ```
 | ||||
| pub fn channel_id<S: AsRef<str>>(channel_id: S) -> bool { | ||||
|     util::CHANNEL_ID_REGEX.is_match(channel_id.as_ref()) | ||||
| } | ||||
| 
 | ||||
| /// Validate the given playlist ID
 | ||||
| ///
 | ||||
| /// YouTube playlist IDs start with the characters `PL` (user-created playlist),
 | ||||
| /// `RDCLAK` (YouTube Music-curated playlist) or `OLAK` (YouTube Music album),
 | ||||
| /// followed by at least 30 of these characters: `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::playlist_id("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI"));
 | ||||
| /// assert!(validate::playlist_id("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"));
 | ||||
| /// assert!(validate::playlist_id("OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE"));
 | ||||
| ///
 | ||||
| /// assert!(!validate::playlist_id("Abcd"));
 | ||||
| /// ```
 | ||||
| pub fn playlist_id<S: AsRef<str>>(playlist_id: S) -> bool { | ||||
|     util::PLAYLIST_ID_REGEX.is_match(playlist_id.as_ref()) | ||||
| } | ||||
| 
 | ||||
| /// Validate the given album ID
 | ||||
| ///
 | ||||
| /// YouTube Music album IDs are exactly 17 characters long, start with the characters `MPREB_`,
 | ||||
| /// followed by 11 of these characters: `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::album_id("MPREb_GyH43gCvdM5"));
 | ||||
| /// assert!(!validate::album_id("Abcd_GyH43gCvdM5"));
 | ||||
| /// ```
 | ||||
| ///
 | ||||
| /// # Note
 | ||||
| ///
 | ||||
| /// Albums on YouTube Music have an album ID (`MPREB_...`) and a playlist ID
 | ||||
| /// (`OLAK...`). If you open an album on the YouTube Music website, the address bar shows
 | ||||
| /// the playlist ID, not the album ID.
 | ||||
| ///
 | ||||
| /// If you have the playlist ID of an album and need the album ID, you can use the
 | ||||
| /// [string resolver](crate::client::RustyPipeQuery::resolve_string) with the `resolve_albums`
 | ||||
| /// option enabled.
 | ||||
| pub fn album_id<S: AsRef<str>>(album_id: S) -> bool { | ||||
|     util::ALBUM_ID_REGEX.is_match(album_id.as_ref()) | ||||
| } | ||||
| 
 | ||||
| /// Validate the given radio ID
 | ||||
| ///
 | ||||
| /// YouTube radio IDs start with the characters `RD`,
 | ||||
| /// followed by at least 22 of these characters: `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Radio types
 | ||||
| ///
 | ||||
| /// - Artist radio: `RDEMSuoM_jxfse1_g8uCO7MCtg`
 | ||||
| /// - Genre radio: `RDQM1xqCV6EdPUw`
 | ||||
| /// - Shuffle radio: `RDAOVeZA-2uzuUKdoB81Ha3srw`
 | ||||
| /// - Playlist radio (`RDAMPL` + playlist ID): `RDAMPLPL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI`
 | ||||
| /// - Track radio (`RDAMVM` + video ID): `RDAMVMZeerrnuLi5E`
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| ///
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::radio_id("RDEMSuoM_jxfse1_g8uCO7MCtg"));
 | ||||
| /// assert!(!validate::radio_id("Abcd"));
 | ||||
| /// assert!(!validate::radio_id("XYEMSuoM_jxfse1_g8uCO7MCtg"));
 | ||||
| /// ```
 | ||||
| pub fn radio_id<S: AsRef<str>>(radio_id: S) -> bool { | ||||
|     static RADIO_ID_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r"^RD[A-Za-z0-9_-]{22,50}$").unwrap()); | ||||
| 
 | ||||
|     RADIO_ID_REGEX.is_match(radio_id.as_ref()) | ||||
| } | ||||
| 
 | ||||
| /// Validate the given genre ID
 | ||||
| ///
 | ||||
| /// YouTube genre IDs are exactly 24 characters long, start with the characters `ggMPO`,
 | ||||
| /// followed by 19 of these characters: `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| ///
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::genre_id("ggMPOg1uX1JOQWZFeDByc2Jm"));
 | ||||
| /// assert!(!validate::genre_id("Abcd"));
 | ||||
| /// assert!(!validate::genre_id("ggAbcg1uX1JOQWZFeDByc2Jm"));
 | ||||
| /// ```
 | ||||
| pub fn genre_id<S: AsRef<str>>(genre_id: S) -> bool { | ||||
|     static GENRE_ID_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r"^ggMPO[A-Za-z0-9_-]{19}$").unwrap()); | ||||
| 
 | ||||
|     GENRE_ID_REGEX.is_match(genre_id.as_ref()) | ||||
| } | ||||
| 
 | ||||
| /// Validate the given related ID
 | ||||
| ///
 | ||||
| /// YouTube related IDs are exactly 17 characters long, start with the characters `MPTRt_`,
 | ||||
| /// followed by 11 of these characters: `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| ///
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::track_related_id("MPTRt_wrKjTn9hmry"));
 | ||||
| /// assert!(!validate::track_related_id("Abcd"));
 | ||||
| /// assert!(!validate::track_related_id("Abcdt_wrKjTn9hmry"));
 | ||||
| /// ```
 | ||||
| pub fn track_related_id<S: AsRef<str>>(related_id: S) -> bool { | ||||
|     static RELATED_ID_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r"^MPTRt_[A-Za-z0-9_-]{11}$").unwrap()); | ||||
| 
 | ||||
|     RELATED_ID_REGEX.is_match(related_id.as_ref()) | ||||
| } | ||||
| 
 | ||||
| /// Validate the given lyrics ID
 | ||||
| ///
 | ||||
| /// YouTube lyrics IDs are exactly 17 characters long, start with the characters `MPLYt_`,
 | ||||
| /// followed by 11 of these characters: `A-Za-z0-9_-`.
 | ||||
| ///
 | ||||
| /// # Examples
 | ||||
| ///
 | ||||
| /// ```
 | ||||
| /// # use rustypipe::validate;
 | ||||
| /// assert!(validate::track_lyrics_id("MPLYt_wrKjTn9hmry"));
 | ||||
| /// assert!(!validate::track_lyrics_id("Abcd"));
 | ||||
| /// assert!(!validate::track_lyrics_id("Abcdt_wrKjTn9hmry"));
 | ||||
| /// ```
 | ||||
| pub fn track_lyrics_id<S: AsRef<str>>(lyrics_id: S) -> bool { | ||||
|     static LYRICS_ID_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r"^MPLYt_[A-Za-z0-9_-]{11}$").unwrap()); | ||||
| 
 | ||||
|     LYRICS_ID_REGEX.is_match(lyrics_id.as_ref()) | ||||
| } | ||||
|  | @ -1,8 +1,9 @@ | |||
| use std::collections::HashSet; | ||||
| use std::fmt::Display; | ||||
| 
 | ||||
| use fancy_regex::Regex; | ||||
| use once_cell::sync::Lazy; | ||||
| use rstest::rstest; | ||||
| use rustypipe::validate; | ||||
| use time::macros::date; | ||||
| use time::OffsetDateTime; | ||||
| 
 | ||||
|  | @ -2120,7 +2121,6 @@ async fn music_genres() { | |||
|     assert!(!pop.is_mood); | ||||
| 
 | ||||
|     genres.iter().for_each(|g| { | ||||
|         assert!(validate::genre_id(&g.id)); | ||||
|         assert_gte(g.color, 0xff000000, "color"); | ||||
|     }); | ||||
| } | ||||
|  | @ -2270,17 +2270,44 @@ async fn assert_next_items<T: FromYtItem, Q: AsRef<RustyPipeQuery>>( | |||
| } | ||||
| 
 | ||||
| fn assert_video_id(id: &str) { | ||||
|     assert!(validate::video_id(id), "invalid video id: `{}`", id) | ||||
|     static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap()); | ||||
| 
 | ||||
|     assert!( | ||||
|         VIDEO_ID_REGEX.is_match(id).unwrap_or_default(), | ||||
|         "invalid video id: `{}`", | ||||
|         id | ||||
|     ); | ||||
| } | ||||
| 
 | ||||
| fn assert_channel_id(id: &str) { | ||||
|     assert!(validate::channel_id(id), "invalid channel id: `{}`", id); | ||||
|     static CHANNEL_ID_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap()); | ||||
| 
 | ||||
|     assert!( | ||||
|         CHANNEL_ID_REGEX.is_match(id).unwrap_or_default(), | ||||
|         "invalid channel id: `{}`", | ||||
|         id | ||||
|     ); | ||||
| } | ||||
| 
 | ||||
| fn assert_album_id(id: &str) { | ||||
|     assert!(validate::album_id(id), "invalid album id: `{}`", id); | ||||
|     static ALBUM_ID_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap()); | ||||
| 
 | ||||
|     assert!( | ||||
|         ALBUM_ID_REGEX.is_match(id).unwrap_or_default(), | ||||
|         "invalid album id: `{}`", | ||||
|         id | ||||
|     ); | ||||
| } | ||||
| 
 | ||||
| fn assert_playlist_id(id: &str) { | ||||
|     assert!(validate::playlist_id(id), "invalid playlist id: `{}`", id); | ||||
|     static PLAYLIST_ID_REGEX: Lazy<Regex> = | ||||
|         Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap()); | ||||
| 
 | ||||
|     assert!( | ||||
|         PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default(), | ||||
|         "invalid album id: `{}`", | ||||
|         id | ||||
|     ); | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue