Compare commits

..

No commits in common. "f7fbf40721fc54edaaed152457201148ab4f0d75" and "4cc069fba273d0b8ea40395fc2032bc6729ffadb" have entirely different histories.

14 changed files with 126 additions and 253 deletions

View file

@ -26,7 +26,6 @@ rustls-tls-native-roots = ["reqwest/rustls-tls-native-roots"]
# quick-js = "0.4.1" # quick-js = "0.4.1"
quick-js = { path = "../quickjs-rs", default-features = false } quick-js = { path = "../quickjs-rs", default-features = false }
once_cell = "1.12.0" once_cell = "1.12.0"
regex = "1.6.0"
fancy-regex = "0.11.0" fancy-regex = "0.11.0"
thiserror = "1.0.36" thiserror = "1.0.36"
url = "2.2.2" url = "2.2.2"

View file

@ -25,10 +25,10 @@ mod channel_rss;
use std::sync::Arc; use std::sync::Arc;
use std::{borrow::Cow, fmt::Debug}; use std::{borrow::Cow, fmt::Debug};
use fancy_regex::Regex;
use log::{debug, error, warn}; use log::{debug, error, warn};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use rand::Rng; use rand::Rng;
use regex::Regex;
use reqwest::{header, Client, ClientBuilder, Request, RequestBuilder, Response}; use reqwest::{header, Client, ClientBuilder, Request, RequestBuilder, Response};
use serde::{de::DeserializeOwned, Deserialize, Serialize}; use serde::{de::DeserializeOwned, Deserialize, Serialize};
use time::{Duration, OffsetDateTime}; use time::{Duration, OffsetDateTime};

View file

@ -1,8 +1,8 @@
use std::{borrow::Cow, rc::Rc}; use std::{borrow::Cow, rc::Rc};
use fancy_regex::Regex;
use futures::{stream, StreamExt}; use futures::{stream, StreamExt};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex;
use serde::Serialize; use serde::Serialize;
use crate::{ use crate::{
@ -268,6 +268,8 @@ fn map_artist_page(
let wikipedia_url = header.description.as_deref().and_then(|h| { let wikipedia_url = header.description.as_deref().and_then(|h| {
WIKIPEDIA_REGEX WIKIPEDIA_REGEX
.captures(h) .captures(h)
.ok()
.flatten()
.and_then(|c| c.get(0)) .and_then(|c| c.get(0))
.map(|m| m.as_str().to_owned()) .map(|m| m.as_str().to_owned())
}); });

View file

@ -3,8 +3,8 @@ use std::{
collections::{BTreeMap, HashMap}, collections::{BTreeMap, HashMap},
}; };
use fancy_regex::Regex;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex;
use serde::Serialize; use serde::Serialize;
use url::Url; use url::Url;
@ -530,6 +530,8 @@ fn map_audio_stream(
Some(t) => { Some(t) => {
let lang = LANG_PATTERN let lang = LANG_PATTERN
.captures(&t.id) .captures(&t.id)
.ok()
.flatten()
.map(|m| m.get(1).unwrap().as_str().to_owned()); .map(|m| m.get(1).unwrap().as_str().to_owned());
Some(AudioTrack { Some(AudioTrack {
@ -555,7 +557,7 @@ fn parse_mime(mime: &str) -> Option<(&str, Vec<&str>)> {
static PATTERN: Lazy<Regex> = static PATTERN: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(\w+/\w+);\scodecs="([a-zA-Z-0-9.,\s]*)""#).unwrap()); Lazy::new(|| Regex::new(r#"(\w+/\w+);\scodecs="([a-zA-Z-0-9.,\s]*)""#).unwrap());
let captures = some_or_bail!(PATTERN.captures(mime), None); let captures = some_or_bail!(PATTERN.captures(mime).ok().flatten(), None);
Some(( Some((
captures.get(1).unwrap().as_str(), captures.get(1).unwrap().as_str(),
captures captures

View file

@ -97,7 +97,7 @@ impl From<ChannelRss> for crate::model::ChannelRss {
.uri .uri
.strip_prefix("https://www.youtube.com/channel/") .strip_prefix("https://www.youtube.com/channel/")
.and_then(|id| { .and_then(|id| {
if util::CHANNEL_ID_REGEX.is_match(id) { if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() {
Some(id.to_owned()) Some(id.to_owned())
} else { } else {
None None

View file

@ -1,5 +1,5 @@
use fancy_regex::Regex;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex;
use serde::Deserialize; use serde::Deserialize;
use serde_with::{ use serde_with::{
json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError, json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError,
@ -503,9 +503,17 @@ impl<T> YouTubeListMapper<T> {
id: video.video_id, id: video.video_id,
name: video.headline, name: video.headline,
length: video.accessibility.and_then(|acc| { length: video.accessibility.and_then(|acc| {
ACCESSIBILITY_SEP_REGEX.captures(&acc).and_then(|cap| { ACCESSIBILITY_SEP_REGEX
.captures(&acc)
.ok()
.flatten()
.and_then(|cap| {
cap.get(1).and_then(|c| { cap.get(1).and_then(|c| {
timeago::parse_timeago_or_warn(self.lang, c.as_str(), &mut self.warnings) timeago::parse_timeago_or_warn(
self.lang,
c.as_str(),
&mut self.warnings,
)
.map(|ta| Duration::from(ta).whole_seconds() as u32) .map(|ta| Duration::from(ta).whole_seconds() as u32)
}) })
}) })

View file

@ -112,9 +112,9 @@ impl RustyPipeQuery {
// Album or channel // Album or channel
Some("browse") => match path_split.next() { Some("browse") => match path_split.next() {
Some(id) => { Some(id) => {
if util::CHANNEL_ID_REGEX.is_match(id) { if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() {
Ok(UrlTarget::Channel { id: id.to_owned() }) Ok(UrlTarget::Channel { id: id.to_owned() })
} else if util::ALBUM_ID_REGEX.is_match(id) { } else if util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() {
Ok(UrlTarget::Album { id: id.to_owned() }) Ok(UrlTarget::Album { id: id.to_owned() })
} else { } else {
Err(Error::Other("invalid url: no browse id".into())) Err(Error::Other("invalid url: no browse id".into()))
@ -153,7 +153,10 @@ impl RustyPipeQuery {
// If there is a timestamp parameter, it has to be a video // If there is a timestamp parameter, it has to be a video
// First check the innertube API if this is a channel vanity url // First check the innertube API if this is a channel vanity url
// If no channel is found and the identifier has the video ID format, assume it is a video // If no channel is found and the identifier has the video ID format, assume it is a video
if !params.contains_key("t") && util::VANITY_PATH_REGEX.is_match(url.path()) if !params.contains_key("t")
&& util::VANITY_PATH_REGEX
.is_match(url.path())
.unwrap_or_default()
{ {
match self match self
._navigation_resolve_url(url.path(), ClientType::Desktop) ._navigation_resolve_url(url.path(), ClientType::Desktop)
@ -161,7 +164,7 @@ impl RustyPipeQuery {
{ {
Ok(target) => Ok(target), Ok(target) => Ok(target),
Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => { Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => {
match util::VIDEO_ID_REGEX.is_match(id) { match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
true => Ok(UrlTarget::Video { true => Ok(UrlTarget::Video {
id: id.to_owned(), id: id.to_owned(),
start_time: get_start_time(), start_time: get_start_time(),
@ -173,7 +176,7 @@ impl RustyPipeQuery {
} }
Err(e) => Err(e), Err(e) => Err(e),
} }
} else if util::VIDEO_ID_REGEX.is_match(id) { } else if util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
Ok(UrlTarget::Video { Ok(UrlTarget::Video {
id: id.to_owned(), id: id.to_owned(),
start_time: get_start_time(), start_time: get_start_time(),
@ -229,16 +232,16 @@ impl RustyPipeQuery {
.await .await
} }
// ID only // ID only
else if util::VIDEO_ID_REGEX.is_match(string) { else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() {
Ok(UrlTarget::Video { Ok(UrlTarget::Video {
id: string.to_owned(), id: string.to_owned(),
start_time: 0, start_time: 0,
}) })
} else if util::CHANNEL_ID_REGEX.is_match(string) { } else if util::CHANNEL_ID_REGEX.is_match(string).unwrap_or_default() {
Ok(UrlTarget::Channel { Ok(UrlTarget::Channel {
id: string.to_owned(), id: string.to_owned(),
}) })
} else if util::PLAYLIST_ID_REGEX.is_match(string) { } else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() {
if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) { if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) {
self._navigation_resolve_url( self._navigation_resolve_url(
&format!("/playlist?list={}", string), &format!("/playlist?list={}", string),
@ -250,13 +253,13 @@ impl RustyPipeQuery {
id: string.to_owned(), id: string.to_owned(),
}) })
} }
} else if util::ALBUM_ID_REGEX.is_match(string) { } else if util::ALBUM_ID_REGEX.is_match(string).unwrap_or_default() {
Ok(UrlTarget::Album { Ok(UrlTarget::Album {
id: string.to_owned(), id: string.to_owned(),
}) })
} }
// Channel name only // Channel name only
else if util::VANITY_PATH_REGEX.is_match(string) { else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() {
self._navigation_resolve_url( self._navigation_resolve_url(
&format!("/{}", string.trim_start_matches('/')), &format!("/{}", string.trim_start_matches('/')),
ClientType::Desktop, ClientType::Desktop,

View file

@ -1,7 +1,6 @@
use fancy_regex::Regex as FancyRegex; use fancy_regex::Regex;
use log::debug; use log::debug;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex;
use reqwest::Client; use reqwest::Client;
use serde::{Deserialize, Serialize}; use serde::{Deserialize, Serialize};
use std::result::Result::Ok; use std::result::Result::Ok;
@ -69,18 +68,18 @@ impl From<DeobfData> for Deobfuscator {
const DEOBFUSCATION_FUNC_NAME: &str = "deobfuscate"; const DEOBFUSCATION_FUNC_NAME: &str = "deobfuscate";
fn get_sig_fn_name(player_js: &str) -> Result<String> { fn get_sig_fn_name(player_js: &str) -> Result<String> {
static FUNCTION_REGEXES: Lazy<[FancyRegex; 6]> = Lazy::new(|| { static FUNCTION_REGEXES: Lazy<[Regex; 6]> = Lazy::new(|| {
[ [
FancyRegex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(), Regex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(),
FancyRegex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(), Regex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(),
FancyRegex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(), Regex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(),
FancyRegex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), Regex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(),
FancyRegex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(), Regex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(),
FancyRegex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(), Regex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(),
] ]
}); });
util::get_cg_from_fancy_regexes(FUNCTION_REGEXES.iter(), player_js, 1) util::get_cg_from_regexes(FUNCTION_REGEXES.iter(), player_js, 1)
.ok_or(DeobfError::Extraction("deobf function name")) .ok_or(DeobfError::Extraction("deobf function name"))
} }
@ -99,6 +98,8 @@ fn get_sig_fn(player_js: &str) -> Result<String> {
let deobfuscate_function = "var ".to_owned() let deobfuscate_function = "var ".to_owned()
+ function_pattern + function_pattern
.captures(player_js) .captures(player_js)
.ok()
.flatten()
.ok_or(DeobfError::Extraction("deobf function"))? .ok_or(DeobfError::Extraction("deobf function"))?
.get(1) .get(1)
.unwrap() .unwrap()
@ -109,6 +110,8 @@ fn get_sig_fn(player_js: &str) -> Result<String> {
Lazy::new(|| Regex::new(";([A-Za-z0-9_\\$]{2})\\...\\(").unwrap()); Lazy::new(|| Regex::new(";([A-Za-z0-9_\\$]{2})\\...\\(").unwrap());
let helper_object_name = HELPER_OBJECT_NAME_REGEX let helper_object_name = HELPER_OBJECT_NAME_REGEX
.captures(&deobfuscate_function) .captures(&deobfuscate_function)
.ok()
.flatten()
.ok_or(DeobfError::Extraction("helper object name"))? .ok_or(DeobfError::Extraction("helper object name"))?
.get(1) .get(1)
.unwrap() .unwrap()
@ -121,6 +124,8 @@ fn get_sig_fn(player_js: &str) -> Result<String> {
let player_js_nonl = player_js.replace('\n', ""); let player_js_nonl = player_js.replace('\n', "");
let helper_object = helper_pattern let helper_object = helper_pattern
.captures(&player_js_nonl) .captures(&player_js_nonl)
.ok()
.flatten()
.ok_or(DeobfError::Extraction("helper object"))? .ok_or(DeobfError::Extraction("helper object"))?
.get(1) .get(1)
.unwrap() .unwrap()
@ -149,6 +154,8 @@ fn get_nsig_fn_name(player_js: &str) -> Result<String> {
let fname_match = FUNCTION_NAME_REGEX let fname_match = FUNCTION_NAME_REGEX
.captures(player_js) .captures(player_js)
.ok()
.flatten()
.ok_or(DeobfError::Extraction("n_deobf function"))?; .ok_or(DeobfError::Extraction("n_deobf function"))?;
let function_name = fname_match.get(1).unwrap().as_str(); let function_name = fname_match.get(1).unwrap().as_str();
@ -164,13 +171,15 @@ fn get_nsig_fn_name(player_js: &str) -> Result<String> {
.parse::<usize>() .parse::<usize>()
.or(Err(DeobfError::Other("could not parse array_num")))?; .or(Err(DeobfError::Other("could not parse array_num")))?;
let array_pattern_str = let array_pattern_str =
"var ".to_owned() + &regex::escape(function_name) + "\\s*=\\s*\\[(.+?)];"; "var ".to_owned() + &fancy_regex::escape(function_name) + "\\s*=\\s*\\[(.+?)];";
let array_pattern = Regex::new(&array_pattern_str).or(Err(DeobfError::Other( let array_pattern = Regex::new(&array_pattern_str).or(Err(DeobfError::Other(
"could not parse helper pattern regex", "could not parse helper pattern regex",
)))?; )))?;
let array_str = array_pattern let array_str = array_pattern
.captures(player_js) .captures(player_js)
.ok()
.flatten()
.ok_or(DeobfError::Extraction("n_deobf array_str"))? .ok_or(DeobfError::Extraction("n_deobf array_str"))?
.get(1) .get(1)
.unwrap() .unwrap()
@ -265,10 +274,13 @@ async fn get_player_js_url(http: &Client) -> Result<String> {
let text = resp.text().await?; let text = resp.text().await?;
static PLAYER_HASH_PATTERN: Lazy<Regex> = Lazy::new(|| { static PLAYER_HASH_PATTERN: Lazy<Regex> = Lazy::new(|| {
Regex::new(r#"https:\\/\\/www\.youtube\.com\\/s\\/player\\/([a-z0-9]{8})\\/"#).unwrap() Regex::new(r#"https:\\\/\\\/www\.youtube\.com\\\/s\\\/player\\\/([a-z0-9]{8})\\\/"#)
.unwrap()
}); });
let player_hash = PLAYER_HASH_PATTERN let player_hash = PLAYER_HASH_PATTERN
.captures(&text) .captures(&text)
.ok()
.flatten()
.ok_or(DeobfError::Extraction("player hash"))? .ok_or(DeobfError::Extraction("player hash"))?
.get(1) .get(1)
.unwrap() .unwrap()
@ -291,6 +303,8 @@ fn get_sts(player_js: &str) -> Result<String> {
Ok(STS_PATTERN Ok(STS_PATTERN
.captures(player_js) .captures(player_js)
.ok()
.flatten()
.ok_or(DeobfError::Extraction("sts"))? .ok_or(DeobfError::Extraction("sts"))?
.get(1) .get(1)
.unwrap() .unwrap()

View file

@ -15,4 +15,3 @@ pub mod model;
pub mod param; pub mod param;
pub mod report; pub mod report;
pub mod timeago; pub mod timeago;
pub mod validate;

View file

@ -111,22 +111,30 @@ impl UrlTarget {
/// Validate the YouTube ID from the URL target /// Validate the YouTube ID from the URL target
pub(crate) fn validate(&self) -> Result<(), Error> { pub(crate) fn validate(&self) -> Result<(), Error> {
match self { match self {
UrlTarget::Video { id, .. } => match util::VIDEO_ID_REGEX.is_match(id) { UrlTarget::Video { id, .. } => {
match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
true => Ok(()), true => Ok(()),
false => Err(Error::Other("invalid video id".into())), false => Err(Error::Other("invalid video id".into())),
}, }
UrlTarget::Channel { id } => match util::CHANNEL_ID_REGEX.is_match(id) { }
UrlTarget::Channel { id } => {
match util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() {
true => Ok(()), true => Ok(()),
false => Err(Error::Other("invalid channel id".into())), false => Err(Error::Other("invalid channel id".into())),
}, }
UrlTarget::Playlist { id } => match util::PLAYLIST_ID_REGEX.is_match(id) { }
UrlTarget::Playlist { id } => {
match util::PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default() {
true => Ok(()), true => Ok(()),
false => Err(Error::Other("invalid playlist id".into())), false => Err(Error::Other("invalid playlist id".into())),
}, }
UrlTarget::Album { id } => match util::ALBUM_ID_REGEX.is_match(id) { }
UrlTarget::Album { id } => {
match util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() {
true => Ok(()), true => Ok(()),
false => Err(Error::Other("invalid album id".into())), false => Err(Error::Other("invalid album id".into())),
}, }
}
} }
} }
} }

View file

@ -1,7 +1,7 @@
use std::convert::TryFrom; use std::convert::TryFrom;
use fancy_regex::Regex;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use regex::Regex;
use serde::{Deserialize, Deserializer}; use serde::{Deserialize, Deserializer};
use serde_with::{serde_as, DeserializeAs}; use serde_with::{serde_as, DeserializeAs};

View file

@ -13,10 +13,9 @@ use std::{
}; };
use base64::Engine; use base64::Engine;
use fancy_regex::Regex as FancyRegex; use fancy_regex::Regex;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use rand::Rng; use rand::Rng;
use regex::Regex;
use url::Url; use url::Url;
use crate::{error::Error, param::Language}; use crate::{error::Error, param::Language};
@ -25,12 +24,11 @@ pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-
pub static CHANNEL_ID_REGEX: Lazy<Regex> = pub static CHANNEL_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap()); Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap());
pub static PLAYLIST_ID_REGEX: Lazy<Regex> = pub static PLAYLIST_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?:PL|RDCLAK|OLAK)[A-Za-z0-9_-]{30,50}$").unwrap()); Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap());
pub static ALBUM_ID_REGEX: Lazy<Regex> = pub static ALBUM_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap()); Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap());
pub static VANITY_PATH_REGEX: Lazy<Regex> = Lazy::new(|| { pub static VANITY_PATH_REGEX: Lazy<Regex> =
Regex::new(r"^/?(?:(?:c/|user/)?[A-z0-9]{1,100})|(?:@[A-z0-9-_.]{1,100})$").unwrap() Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap());
});
/// Separator string for YouTube Music subtitles /// Separator string for YouTube Music subtitles
pub const DOT_SEPARATOR: &str = ""; pub const DOT_SEPARATOR: &str = "";
@ -51,16 +49,6 @@ pub struct MappingError(pub(crate) Cow<'static, str>);
pub fn get_cg_from_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String> pub fn get_cg_from_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String>
where where
I: Iterator<Item = &'a Regex>, I: Iterator<Item = &'a Regex>,
{
regexes
.find_map(|pattern| pattern.captures(text))
.map(|c| c.get(cg).unwrap().as_str().to_owned())
}
/// Return the given capture group that matches first in a list of fancy regexes
pub fn get_cg_from_fancy_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String>
where
I: Iterator<Item = &'a FancyRegex>,
{ {
regexes regexes
.find_map(|pattern| pattern.captures(text).ok().flatten()) .find_map(|pattern| pattern.captures(text).ok().flatten())
@ -144,7 +132,7 @@ where
pub fn parse_video_length(text: &str) -> Option<u32> { pub fn parse_video_length(text: &str) -> Option<u32> {
static VIDEO_LENGTH_REGEX: Lazy<Regex> = static VIDEO_LENGTH_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r#"(?:(\d+):)?(\d{1,2}):(\d{2})"#).unwrap()); Lazy::new(|| Regex::new(r#"(?:(\d+):)?(\d{1,2}):(\d{2})"#).unwrap());
VIDEO_LENGTH_REGEX.captures(text).map(|cap| { VIDEO_LENGTH_REGEX.captures(text).ok().flatten().map(|cap| {
let hrs = cap let hrs = cap
.get(1) .get(1)
.and_then(|x| x.as_str().parse::<u32>().ok()) .and_then(|x| x.as_str().parse::<u32>().ok())
@ -351,6 +339,8 @@ pub fn video_id_from_thumbnail_url(url: &str) -> Option<String> {
Lazy::new(|| Regex::new(r"^https://i.ytimg.com/vi/([A-Za-z0-9_-]{11})/").unwrap()); Lazy::new(|| Regex::new(r"^https://i.ytimg.com/vi/([A-Za-z0-9_-]{11})/").unwrap());
URL_REGEX URL_REGEX
.captures(url) .captures(url)
.ok()
.flatten()
.and_then(|cap| cap.get(1).map(|x| x.as_str().to_owned())) .and_then(|cap| cap.get(1).map(|x| x.as_str().to_owned()))
} }

View file

@ -1,179 +0,0 @@
//! # Input validation
//!
//! The extraction functions of RustyPipe will produce errors when fed with invalid input data
//! (e.g. YouTube ID's with invalid format). Therefore you will need to validate all untrusted
//! input data beforehand. The library offers two options for this:
//!
//! - The [URL resolver](crate::client::RustyPipeQuery::resolve_url) or
//! [string resolver](crate::client::RustyPipeQuery::resolve_string) is great for handling
//! arbitrary input and returns a [`UrlTarget`](crate::model::UrlTarget) enum that tells you
//! whether the given URL points to a video, channel, playlist, etc.
//! - The validation functions of this module are meant vor validating concrete data (video IDs,
//! channel IDs, playlist IDs) and return [`true`] if the given input is valid
use crate::util;
use once_cell::sync::Lazy;
use regex::Regex;
/// Validate the given video ID
///
/// YouTube video IDs are exactly 11 characters long and consist of the charactes `A-Za-z0-9_-`.
///
/// # Examples
/// ```
/// # use rustypipe::validate;
/// assert!(validate::video_id("dQw4w9WgXcQ"));
/// assert!(!validate::video_id("Abcd"));
/// assert!(!validate::video_id("dQw4w9WgXc@"));
/// ```
pub fn video_id<S: AsRef<str>>(video_id: S) -> bool {
util::VIDEO_ID_REGEX.is_match(video_id.as_ref())
}
/// Validate the given channel ID
///
/// YouTube channel IDs are exactly 24 characters long, start with the characters `UC`,
/// followed by 22 of these characters: `A-Za-z0-9_-`.
///
/// # Examples
/// ```
/// # use rustypipe::validate;
/// assert!(validate::channel_id("UC2DjFE7Xf11URZqWBigcVOQ"));
/// assert!(!validate::channel_id("Abcd"));
/// assert!(!validate::channel_id("XY2DjFE7Xf11URZqWBigcVOQ"));
/// ```
pub fn channel_id<S: AsRef<str>>(channel_id: S) -> bool {
util::CHANNEL_ID_REGEX.is_match(channel_id.as_ref())
}
/// Validate the given playlist ID
///
/// YouTube playlist IDs start with the characters `PL` (user-created playlist),
/// `RDCLAK` (YouTube Music-curated playlist) or `OLAK` (YouTube Music album),
/// followed by at least 30 of these characters: `A-Za-z0-9_-`.
///
/// # Examples
/// ```
/// # use rustypipe::validate;
/// assert!(validate::playlist_id("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI"));
/// assert!(validate::playlist_id("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"));
/// assert!(validate::playlist_id("OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE"));
///
/// assert!(!validate::playlist_id("Abcd"));
/// ```
pub fn playlist_id<S: AsRef<str>>(playlist_id: S) -> bool {
util::PLAYLIST_ID_REGEX.is_match(playlist_id.as_ref())
}
/// Validate the given album ID
///
/// YouTube Music album IDs are exactly 17 characters long, start with the characters `MPREB_`,
/// followed by 11 of these characters: `A-Za-z0-9_-`.
///
/// # Examples
/// ```
/// # use rustypipe::validate;
/// assert!(validate::album_id("MPREb_GyH43gCvdM5"));
/// assert!(!validate::album_id("Abcd_GyH43gCvdM5"));
/// ```
///
/// # Note
///
/// Albums on YouTube Music have an album ID (`MPREB_...`) and a playlist ID
/// (`OLAK...`). If you open an album on the YouTube Music website, the address bar shows
/// the playlist ID, not the album ID.
///
/// If you have the playlist ID of an album and need the album ID, you can use the
/// [string resolver](crate::client::RustyPipeQuery::resolve_string) with the `resolve_albums`
/// option enabled.
pub fn album_id<S: AsRef<str>>(album_id: S) -> bool {
util::ALBUM_ID_REGEX.is_match(album_id.as_ref())
}
/// Validate the given radio ID
///
/// YouTube radio IDs start with the characters `RD`,
/// followed by at least 22 of these characters: `A-Za-z0-9_-`.
///
/// # Radio types
///
/// - Artist radio: `RDEMSuoM_jxfse1_g8uCO7MCtg`
/// - Genre radio: `RDQM1xqCV6EdPUw`
/// - Shuffle radio: `RDAOVeZA-2uzuUKdoB81Ha3srw`
/// - Playlist radio (`RDAMPL` + playlist ID): `RDAMPLPL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI`
/// - Track radio (`RDAMVM` + video ID): `RDAMVMZeerrnuLi5E`
///
/// # Examples
///
/// ```
/// # use rustypipe::validate;
/// assert!(validate::radio_id("RDEMSuoM_jxfse1_g8uCO7MCtg"));
/// assert!(!validate::radio_id("Abcd"));
/// assert!(!validate::radio_id("XYEMSuoM_jxfse1_g8uCO7MCtg"));
/// ```
pub fn radio_id<S: AsRef<str>>(radio_id: S) -> bool {
static RADIO_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^RD[A-Za-z0-9_-]{22,50}$").unwrap());
RADIO_ID_REGEX.is_match(radio_id.as_ref())
}
/// Validate the given genre ID
///
/// YouTube genre IDs are exactly 24 characters long, start with the characters `ggMPO`,
/// followed by 19 of these characters: `A-Za-z0-9_-`.
///
/// # Examples
///
/// ```
/// # use rustypipe::validate;
/// assert!(validate::genre_id("ggMPOg1uX1JOQWZFeDByc2Jm"));
/// assert!(!validate::genre_id("Abcd"));
/// assert!(!validate::genre_id("ggAbcg1uX1JOQWZFeDByc2Jm"));
/// ```
pub fn genre_id<S: AsRef<str>>(genre_id: S) -> bool {
static GENRE_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^ggMPO[A-Za-z0-9_-]{19}$").unwrap());
GENRE_ID_REGEX.is_match(genre_id.as_ref())
}
/// Validate the given related ID
///
/// YouTube related IDs are exactly 17 characters long, start with the characters `MPTRt_`,
/// followed by 11 of these characters: `A-Za-z0-9_-`.
///
/// # Examples
///
/// ```
/// # use rustypipe::validate;
/// assert!(validate::track_related_id("MPTRt_wrKjTn9hmry"));
/// assert!(!validate::track_related_id("Abcd"));
/// assert!(!validate::track_related_id("Abcdt_wrKjTn9hmry"));
/// ```
pub fn track_related_id<S: AsRef<str>>(related_id: S) -> bool {
static RELATED_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^MPTRt_[A-Za-z0-9_-]{11}$").unwrap());
RELATED_ID_REGEX.is_match(related_id.as_ref())
}
/// Validate the given lyrics ID
///
/// YouTube lyrics IDs are exactly 17 characters long, start with the characters `MPLYt_`,
/// followed by 11 of these characters: `A-Za-z0-9_-`.
///
/// # Examples
///
/// ```
/// # use rustypipe::validate;
/// assert!(validate::track_lyrics_id("MPLYt_wrKjTn9hmry"));
/// assert!(!validate::track_lyrics_id("Abcd"));
/// assert!(!validate::track_lyrics_id("Abcdt_wrKjTn9hmry"));
/// ```
pub fn track_lyrics_id<S: AsRef<str>>(lyrics_id: S) -> bool {
static LYRICS_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^MPLYt_[A-Za-z0-9_-]{11}$").unwrap());
LYRICS_ID_REGEX.is_match(lyrics_id.as_ref())
}

View file

@ -1,8 +1,9 @@
use std::collections::HashSet; use std::collections::HashSet;
use std::fmt::Display; use std::fmt::Display;
use fancy_regex::Regex;
use once_cell::sync::Lazy;
use rstest::rstest; use rstest::rstest;
use rustypipe::validate;
use time::macros::date; use time::macros::date;
use time::OffsetDateTime; use time::OffsetDateTime;
@ -2120,7 +2121,6 @@ async fn music_genres() {
assert!(!pop.is_mood); assert!(!pop.is_mood);
genres.iter().for_each(|g| { genres.iter().for_each(|g| {
assert!(validate::genre_id(&g.id));
assert_gte(g.color, 0xff000000, "color"); assert_gte(g.color, 0xff000000, "color");
}); });
} }
@ -2270,17 +2270,44 @@ async fn assert_next_items<T: FromYtItem, Q: AsRef<RustyPipeQuery>>(
} }
fn assert_video_id(id: &str) { fn assert_video_id(id: &str) {
assert!(validate::video_id(id), "invalid video id: `{}`", id) static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap());
assert!(
VIDEO_ID_REGEX.is_match(id).unwrap_or_default(),
"invalid video id: `{}`",
id
);
} }
fn assert_channel_id(id: &str) { fn assert_channel_id(id: &str) {
assert!(validate::channel_id(id), "invalid channel id: `{}`", id); static CHANNEL_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap());
assert!(
CHANNEL_ID_REGEX.is_match(id).unwrap_or_default(),
"invalid channel id: `{}`",
id
);
} }
fn assert_album_id(id: &str) { fn assert_album_id(id: &str) {
assert!(validate::album_id(id), "invalid album id: `{}`", id); static ALBUM_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap());
assert!(
ALBUM_ID_REGEX.is_match(id).unwrap_or_default(),
"invalid album id: `{}`",
id
);
} }
fn assert_playlist_id(id: &str) { fn assert_playlist_id(id: &str) {
assert!(validate::playlist_id(id), "invalid playlist id: `{}`", id); static PLAYLIST_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap());
assert!(
PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default(),
"invalid album id: `{}`",
id
);
} }