Compare commits
	
		
			2 commits
		
	
	
		
			
				4cc069fba2
			
			...
			
				f7fbf40721
			
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| f7fbf40721 | |||
| 92a358a079 | 
					 14 changed files with 253 additions and 126 deletions
				
			
		| 
						 | 
				
			
			@ -26,6 +26,7 @@ rustls-tls-native-roots = ["reqwest/rustls-tls-native-roots"]
 | 
			
		|||
# quick-js = "0.4.1"
 | 
			
		||||
quick-js = { path = "../quickjs-rs", default-features = false }
 | 
			
		||||
once_cell = "1.12.0"
 | 
			
		||||
regex = "1.6.0"
 | 
			
		||||
fancy-regex = "0.11.0"
 | 
			
		||||
thiserror = "1.0.36"
 | 
			
		||||
url = "2.2.2"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -25,10 +25,10 @@ mod channel_rss;
 | 
			
		|||
use std::sync::Arc;
 | 
			
		||||
use std::{borrow::Cow, fmt::Debug};
 | 
			
		||||
 | 
			
		||||
use fancy_regex::Regex;
 | 
			
		||||
use log::{debug, error, warn};
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use rand::Rng;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
use reqwest::{header, Client, ClientBuilder, Request, RequestBuilder, Response};
 | 
			
		||||
use serde::{de::DeserializeOwned, Deserialize, Serialize};
 | 
			
		||||
use time::{Duration, OffsetDateTime};
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,8 +1,8 @@
 | 
			
		|||
use std::{borrow::Cow, rc::Rc};
 | 
			
		||||
 | 
			
		||||
use fancy_regex::Regex;
 | 
			
		||||
use futures::{stream, StreamExt};
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
use serde::Serialize;
 | 
			
		||||
 | 
			
		||||
use crate::{
 | 
			
		||||
| 
						 | 
				
			
			@ -268,8 +268,6 @@ fn map_artist_page(
 | 
			
		|||
    let wikipedia_url = header.description.as_deref().and_then(|h| {
 | 
			
		||||
        WIKIPEDIA_REGEX
 | 
			
		||||
            .captures(h)
 | 
			
		||||
            .ok()
 | 
			
		||||
            .flatten()
 | 
			
		||||
            .and_then(|c| c.get(0))
 | 
			
		||||
            .map(|m| m.as_str().to_owned())
 | 
			
		||||
    });
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -3,8 +3,8 @@ use std::{
 | 
			
		|||
    collections::{BTreeMap, HashMap},
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
use fancy_regex::Regex;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
use serde::Serialize;
 | 
			
		||||
use url::Url;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -530,8 +530,6 @@ fn map_audio_stream(
 | 
			
		|||
                    Some(t) => {
 | 
			
		||||
                        let lang = LANG_PATTERN
 | 
			
		||||
                            .captures(&t.id)
 | 
			
		||||
                            .ok()
 | 
			
		||||
                            .flatten()
 | 
			
		||||
                            .map(|m| m.get(1).unwrap().as_str().to_owned());
 | 
			
		||||
 | 
			
		||||
                        Some(AudioTrack {
 | 
			
		||||
| 
						 | 
				
			
			@ -557,7 +555,7 @@ fn parse_mime(mime: &str) -> Option<(&str, Vec<&str>)> {
 | 
			
		|||
    static PATTERN: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r#"(\w+/\w+);\scodecs="([a-zA-Z-0-9.,\s]*)""#).unwrap());
 | 
			
		||||
 | 
			
		||||
    let captures = some_or_bail!(PATTERN.captures(mime).ok().flatten(), None);
 | 
			
		||||
    let captures = some_or_bail!(PATTERN.captures(mime), None);
 | 
			
		||||
    Some((
 | 
			
		||||
        captures.get(1).unwrap().as_str(),
 | 
			
		||||
        captures
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -97,7 +97,7 @@ impl From<ChannelRss> for crate::model::ChannelRss {
 | 
			
		|||
                        .uri
 | 
			
		||||
                        .strip_prefix("https://www.youtube.com/channel/")
 | 
			
		||||
                        .and_then(|id| {
 | 
			
		||||
                            if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
                            if util::CHANNEL_ID_REGEX.is_match(id) {
 | 
			
		||||
                                Some(id.to_owned())
 | 
			
		||||
                            } else {
 | 
			
		||||
                                None
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,5 +1,5 @@
 | 
			
		|||
use fancy_regex::Regex;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
use serde::Deserialize;
 | 
			
		||||
use serde_with::{
 | 
			
		||||
    json::JsonString, rust::deserialize_ignore_any, serde_as, DefaultOnError, VecSkipError,
 | 
			
		||||
| 
						 | 
				
			
			@ -503,17 +503,9 @@ impl<T> YouTubeListMapper<T> {
 | 
			
		|||
            id: video.video_id,
 | 
			
		||||
            name: video.headline,
 | 
			
		||||
            length: video.accessibility.and_then(|acc| {
 | 
			
		||||
                ACCESSIBILITY_SEP_REGEX
 | 
			
		||||
                    .captures(&acc)
 | 
			
		||||
                    .ok()
 | 
			
		||||
                    .flatten()
 | 
			
		||||
                    .and_then(|cap| {
 | 
			
		||||
                ACCESSIBILITY_SEP_REGEX.captures(&acc).and_then(|cap| {
 | 
			
		||||
                    cap.get(1).and_then(|c| {
 | 
			
		||||
                            timeago::parse_timeago_or_warn(
 | 
			
		||||
                                self.lang,
 | 
			
		||||
                                c.as_str(),
 | 
			
		||||
                                &mut self.warnings,
 | 
			
		||||
                            )
 | 
			
		||||
                        timeago::parse_timeago_or_warn(self.lang, c.as_str(), &mut self.warnings)
 | 
			
		||||
                            .map(|ta| Duration::from(ta).whole_seconds() as u32)
 | 
			
		||||
                    })
 | 
			
		||||
                })
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -112,9 +112,9 @@ impl RustyPipeQuery {
 | 
			
		|||
            // Album or channel
 | 
			
		||||
            Some("browse") => match path_split.next() {
 | 
			
		||||
                Some(id) => {
 | 
			
		||||
                    if util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
                    if util::CHANNEL_ID_REGEX.is_match(id) {
 | 
			
		||||
                        Ok(UrlTarget::Channel { id: id.to_owned() })
 | 
			
		||||
                    } else if util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
                    } else if util::ALBUM_ID_REGEX.is_match(id) {
 | 
			
		||||
                        Ok(UrlTarget::Album { id: id.to_owned() })
 | 
			
		||||
                    } else {
 | 
			
		||||
                        Err(Error::Other("invalid url: no browse id".into()))
 | 
			
		||||
| 
						 | 
				
			
			@ -153,10 +153,7 @@ impl RustyPipeQuery {
 | 
			
		|||
                        // If there is a timestamp parameter, it has to be a video
 | 
			
		||||
                        // First check the innertube API if this is a channel vanity url
 | 
			
		||||
                        // If no channel is found and the identifier has the video ID format, assume it is a video
 | 
			
		||||
                        if !params.contains_key("t")
 | 
			
		||||
                            && util::VANITY_PATH_REGEX
 | 
			
		||||
                                .is_match(url.path())
 | 
			
		||||
                                .unwrap_or_default()
 | 
			
		||||
                        if !params.contains_key("t") && util::VANITY_PATH_REGEX.is_match(url.path())
 | 
			
		||||
                        {
 | 
			
		||||
                            match self
 | 
			
		||||
                                ._navigation_resolve_url(url.path(), ClientType::Desktop)
 | 
			
		||||
| 
						 | 
				
			
			@ -164,7 +161,7 @@ impl RustyPipeQuery {
 | 
			
		|||
                            {
 | 
			
		||||
                                Ok(target) => Ok(target),
 | 
			
		||||
                                Err(Error::Extraction(ExtractionError::ContentUnavailable(e))) => {
 | 
			
		||||
                                    match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
                                    match util::VIDEO_ID_REGEX.is_match(id) {
 | 
			
		||||
                                        true => Ok(UrlTarget::Video {
 | 
			
		||||
                                            id: id.to_owned(),
 | 
			
		||||
                                            start_time: get_start_time(),
 | 
			
		||||
| 
						 | 
				
			
			@ -176,7 +173,7 @@ impl RustyPipeQuery {
 | 
			
		|||
                                }
 | 
			
		||||
                                Err(e) => Err(e),
 | 
			
		||||
                            }
 | 
			
		||||
                        } else if util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
                        } else if util::VIDEO_ID_REGEX.is_match(id) {
 | 
			
		||||
                            Ok(UrlTarget::Video {
 | 
			
		||||
                                id: id.to_owned(),
 | 
			
		||||
                                start_time: get_start_time(),
 | 
			
		||||
| 
						 | 
				
			
			@ -232,16 +229,16 @@ impl RustyPipeQuery {
 | 
			
		|||
                .await
 | 
			
		||||
        }
 | 
			
		||||
        // ID only
 | 
			
		||||
        else if util::VIDEO_ID_REGEX.is_match(string).unwrap_or_default() {
 | 
			
		||||
        else if util::VIDEO_ID_REGEX.is_match(string) {
 | 
			
		||||
            Ok(UrlTarget::Video {
 | 
			
		||||
                id: string.to_owned(),
 | 
			
		||||
                start_time: 0,
 | 
			
		||||
            })
 | 
			
		||||
        } else if util::CHANNEL_ID_REGEX.is_match(string).unwrap_or_default() {
 | 
			
		||||
        } else if util::CHANNEL_ID_REGEX.is_match(string) {
 | 
			
		||||
            Ok(UrlTarget::Channel {
 | 
			
		||||
                id: string.to_owned(),
 | 
			
		||||
            })
 | 
			
		||||
        } else if util::PLAYLIST_ID_REGEX.is_match(string).unwrap_or_default() {
 | 
			
		||||
        } else if util::PLAYLIST_ID_REGEX.is_match(string) {
 | 
			
		||||
            if resolve_albums && string.starts_with(util::PLAYLIST_ID_ALBUM_PREFIX) {
 | 
			
		||||
                self._navigation_resolve_url(
 | 
			
		||||
                    &format!("/playlist?list={}", string),
 | 
			
		||||
| 
						 | 
				
			
			@ -253,13 +250,13 @@ impl RustyPipeQuery {
 | 
			
		|||
                    id: string.to_owned(),
 | 
			
		||||
                })
 | 
			
		||||
            }
 | 
			
		||||
        } else if util::ALBUM_ID_REGEX.is_match(string).unwrap_or_default() {
 | 
			
		||||
        } else if util::ALBUM_ID_REGEX.is_match(string) {
 | 
			
		||||
            Ok(UrlTarget::Album {
 | 
			
		||||
                id: string.to_owned(),
 | 
			
		||||
            })
 | 
			
		||||
        }
 | 
			
		||||
        // Channel name only
 | 
			
		||||
        else if util::VANITY_PATH_REGEX.is_match(string).unwrap_or_default() {
 | 
			
		||||
        else if util::VANITY_PATH_REGEX.is_match(string) {
 | 
			
		||||
            self._navigation_resolve_url(
 | 
			
		||||
                &format!("/{}", string.trim_start_matches('/')),
 | 
			
		||||
                ClientType::Desktop,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,6 +1,7 @@
 | 
			
		|||
use fancy_regex::Regex;
 | 
			
		||||
use fancy_regex::Regex as FancyRegex;
 | 
			
		||||
use log::debug;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
use reqwest::Client;
 | 
			
		||||
use serde::{Deserialize, Serialize};
 | 
			
		||||
use std::result::Result::Ok;
 | 
			
		||||
| 
						 | 
				
			
			@ -68,18 +69,18 @@ impl From<DeobfData> for Deobfuscator {
 | 
			
		|||
const DEOBFUSCATION_FUNC_NAME: &str = "deobfuscate";
 | 
			
		||||
 | 
			
		||||
fn get_sig_fn_name(player_js: &str) -> Result<String> {
 | 
			
		||||
    static FUNCTION_REGEXES: Lazy<[Regex; 6]> = Lazy::new(|| {
 | 
			
		||||
    static FUNCTION_REGEXES: Lazy<[FancyRegex; 6]> = Lazy::new(|| {
 | 
			
		||||
        [
 | 
			
		||||
        Regex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(),
 | 
			
		||||
        Regex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(),
 | 
			
		||||
        Regex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(),
 | 
			
		||||
        Regex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(),
 | 
			
		||||
        Regex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(),
 | 
			
		||||
        Regex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(),
 | 
			
		||||
        FancyRegex::new("(?:\\b|[^a-zA-Z0-9$])([a-zA-Z0-9$]{2,})\\s*=\\s*function\\(\\s*a\\s*\\)\\s*\\{\\s*a\\s*=\\s*a\\.split\\(\\s*\"\"\\s*\\)").unwrap(),
 | 
			
		||||
        FancyRegex::new("\\bm=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(h\\.s\\)\\)").unwrap(),
 | 
			
		||||
        FancyRegex::new("\\bc&&\\(c=([a-zA-Z0-9$]{2,})\\(decodeURIComponent\\(c\\)\\)").unwrap(),
 | 
			
		||||
        FancyRegex::new("([\\w$]+)\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(),
 | 
			
		||||
        FancyRegex::new("\\b([\\w$]{2,})\\s*=\\s*function\\((\\w+)\\)\\{\\s*\\2=\\s*\\2\\.split\\(\"\"\\)\\s*;").unwrap(),
 | 
			
		||||
        FancyRegex::new("\\bc\\s*&&\\s*d\\.set\\([^,]+\\s*,\\s*(:encodeURIComponent\\s*\\()([a-zA-Z0-9$]+)\\(").unwrap(),
 | 
			
		||||
    ]
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    util::get_cg_from_regexes(FUNCTION_REGEXES.iter(), player_js, 1)
 | 
			
		||||
    util::get_cg_from_fancy_regexes(FUNCTION_REGEXES.iter(), player_js, 1)
 | 
			
		||||
        .ok_or(DeobfError::Extraction("deobf function name"))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -98,8 +99,6 @@ fn get_sig_fn(player_js: &str) -> Result<String> {
 | 
			
		|||
    let deobfuscate_function = "var ".to_owned()
 | 
			
		||||
        + function_pattern
 | 
			
		||||
            .captures(player_js)
 | 
			
		||||
            .ok()
 | 
			
		||||
            .flatten()
 | 
			
		||||
            .ok_or(DeobfError::Extraction("deobf function"))?
 | 
			
		||||
            .get(1)
 | 
			
		||||
            .unwrap()
 | 
			
		||||
| 
						 | 
				
			
			@ -110,8 +109,6 @@ fn get_sig_fn(player_js: &str) -> Result<String> {
 | 
			
		|||
        Lazy::new(|| Regex::new(";([A-Za-z0-9_\\$]{2})\\...\\(").unwrap());
 | 
			
		||||
    let helper_object_name = HELPER_OBJECT_NAME_REGEX
 | 
			
		||||
        .captures(&deobfuscate_function)
 | 
			
		||||
        .ok()
 | 
			
		||||
        .flatten()
 | 
			
		||||
        .ok_or(DeobfError::Extraction("helper object name"))?
 | 
			
		||||
        .get(1)
 | 
			
		||||
        .unwrap()
 | 
			
		||||
| 
						 | 
				
			
			@ -124,8 +121,6 @@ fn get_sig_fn(player_js: &str) -> Result<String> {
 | 
			
		|||
    let player_js_nonl = player_js.replace('\n', "");
 | 
			
		||||
    let helper_object = helper_pattern
 | 
			
		||||
        .captures(&player_js_nonl)
 | 
			
		||||
        .ok()
 | 
			
		||||
        .flatten()
 | 
			
		||||
        .ok_or(DeobfError::Extraction("helper object"))?
 | 
			
		||||
        .get(1)
 | 
			
		||||
        .unwrap()
 | 
			
		||||
| 
						 | 
				
			
			@ -154,8 +149,6 @@ fn get_nsig_fn_name(player_js: &str) -> Result<String> {
 | 
			
		|||
 | 
			
		||||
    let fname_match = FUNCTION_NAME_REGEX
 | 
			
		||||
        .captures(player_js)
 | 
			
		||||
        .ok()
 | 
			
		||||
        .flatten()
 | 
			
		||||
        .ok_or(DeobfError::Extraction("n_deobf function"))?;
 | 
			
		||||
 | 
			
		||||
    let function_name = fname_match.get(1).unwrap().as_str();
 | 
			
		||||
| 
						 | 
				
			
			@ -171,15 +164,13 @@ fn get_nsig_fn_name(player_js: &str) -> Result<String> {
 | 
			
		|||
        .parse::<usize>()
 | 
			
		||||
        .or(Err(DeobfError::Other("could not parse array_num")))?;
 | 
			
		||||
    let array_pattern_str =
 | 
			
		||||
        "var ".to_owned() + &fancy_regex::escape(function_name) + "\\s*=\\s*\\[(.+?)];";
 | 
			
		||||
        "var ".to_owned() + ®ex::escape(function_name) + "\\s*=\\s*\\[(.+?)];";
 | 
			
		||||
    let array_pattern = Regex::new(&array_pattern_str).or(Err(DeobfError::Other(
 | 
			
		||||
        "could not parse helper pattern regex",
 | 
			
		||||
    )))?;
 | 
			
		||||
 | 
			
		||||
    let array_str = array_pattern
 | 
			
		||||
        .captures(player_js)
 | 
			
		||||
        .ok()
 | 
			
		||||
        .flatten()
 | 
			
		||||
        .ok_or(DeobfError::Extraction("n_deobf array_str"))?
 | 
			
		||||
        .get(1)
 | 
			
		||||
        .unwrap()
 | 
			
		||||
| 
						 | 
				
			
			@ -274,13 +265,10 @@ async fn get_player_js_url(http: &Client) -> Result<String> {
 | 
			
		|||
    let text = resp.text().await?;
 | 
			
		||||
 | 
			
		||||
    static PLAYER_HASH_PATTERN: Lazy<Regex> = Lazy::new(|| {
 | 
			
		||||
        Regex::new(r#"https:\\\/\\\/www\.youtube\.com\\\/s\\\/player\\\/([a-z0-9]{8})\\\/"#)
 | 
			
		||||
            .unwrap()
 | 
			
		||||
        Regex::new(r#"https:\\/\\/www\.youtube\.com\\/s\\/player\\/([a-z0-9]{8})\\/"#).unwrap()
 | 
			
		||||
    });
 | 
			
		||||
    let player_hash = PLAYER_HASH_PATTERN
 | 
			
		||||
        .captures(&text)
 | 
			
		||||
        .ok()
 | 
			
		||||
        .flatten()
 | 
			
		||||
        .ok_or(DeobfError::Extraction("player hash"))?
 | 
			
		||||
        .get(1)
 | 
			
		||||
        .unwrap()
 | 
			
		||||
| 
						 | 
				
			
			@ -303,8 +291,6 @@ fn get_sts(player_js: &str) -> Result<String> {
 | 
			
		|||
 | 
			
		||||
    Ok(STS_PATTERN
 | 
			
		||||
        .captures(player_js)
 | 
			
		||||
        .ok()
 | 
			
		||||
        .flatten()
 | 
			
		||||
        .ok_or(DeobfError::Extraction("sts"))?
 | 
			
		||||
        .get(1)
 | 
			
		||||
        .unwrap()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -15,3 +15,4 @@ pub mod model;
 | 
			
		|||
pub mod param;
 | 
			
		||||
pub mod report;
 | 
			
		||||
pub mod timeago;
 | 
			
		||||
pub mod validate;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -111,30 +111,22 @@ impl UrlTarget {
 | 
			
		|||
    /// Validate the YouTube ID from the URL target
 | 
			
		||||
    pub(crate) fn validate(&self) -> Result<(), Error> {
 | 
			
		||||
        match self {
 | 
			
		||||
            UrlTarget::Video { id, .. } => {
 | 
			
		||||
                match util::VIDEO_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
            UrlTarget::Video { id, .. } => match util::VIDEO_ID_REGEX.is_match(id) {
 | 
			
		||||
                true => Ok(()),
 | 
			
		||||
                false => Err(Error::Other("invalid video id".into())),
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            UrlTarget::Channel { id } => {
 | 
			
		||||
                match util::CHANNEL_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
            },
 | 
			
		||||
            UrlTarget::Channel { id } => match util::CHANNEL_ID_REGEX.is_match(id) {
 | 
			
		||||
                true => Ok(()),
 | 
			
		||||
                false => Err(Error::Other("invalid channel id".into())),
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            UrlTarget::Playlist { id } => {
 | 
			
		||||
                match util::PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
            },
 | 
			
		||||
            UrlTarget::Playlist { id } => match util::PLAYLIST_ID_REGEX.is_match(id) {
 | 
			
		||||
                true => Ok(()),
 | 
			
		||||
                false => Err(Error::Other("invalid playlist id".into())),
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            UrlTarget::Album { id } => {
 | 
			
		||||
                match util::ALBUM_ID_REGEX.is_match(id).unwrap_or_default() {
 | 
			
		||||
            },
 | 
			
		||||
            UrlTarget::Album { id } => match util::ALBUM_ID_REGEX.is_match(id) {
 | 
			
		||||
                true => Ok(()),
 | 
			
		||||
                false => Err(Error::Other("invalid album id".into())),
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
            },
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,7 +1,7 @@
 | 
			
		|||
use std::convert::TryFrom;
 | 
			
		||||
 | 
			
		||||
use fancy_regex::Regex;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
use serde::{Deserialize, Deserializer};
 | 
			
		||||
use serde_with::{serde_as, DeserializeAs};
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -13,9 +13,10 @@ use std::{
 | 
			
		|||
};
 | 
			
		||||
 | 
			
		||||
use base64::Engine;
 | 
			
		||||
use fancy_regex::Regex;
 | 
			
		||||
use fancy_regex::Regex as FancyRegex;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use rand::Rng;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
use url::Url;
 | 
			
		||||
 | 
			
		||||
use crate::{error::Error, param::Language};
 | 
			
		||||
| 
						 | 
				
			
			@ -24,11 +25,12 @@ pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-
 | 
			
		|||
pub static CHANNEL_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
    Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap());
 | 
			
		||||
pub static PLAYLIST_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
    Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap());
 | 
			
		||||
    Lazy::new(|| Regex::new(r"^(?:PL|RDCLAK|OLAK)[A-Za-z0-9_-]{30,50}$").unwrap());
 | 
			
		||||
pub static ALBUM_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
    Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap());
 | 
			
		||||
pub static VANITY_PATH_REGEX: Lazy<Regex> =
 | 
			
		||||
    Lazy::new(|| Regex::new(r"^/?(?:(?:c\/|user\/)?[A-z0-9]+)|(?:@[A-z0-9-_.]+)$").unwrap());
 | 
			
		||||
pub static VANITY_PATH_REGEX: Lazy<Regex> = Lazy::new(|| {
 | 
			
		||||
    Regex::new(r"^/?(?:(?:c/|user/)?[A-z0-9]{1,100})|(?:@[A-z0-9-_.]{1,100})$").unwrap()
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
/// Separator string for YouTube Music subtitles
 | 
			
		||||
pub const DOT_SEPARATOR: &str = " • ";
 | 
			
		||||
| 
						 | 
				
			
			@ -49,6 +51,16 @@ pub struct MappingError(pub(crate) Cow<'static, str>);
 | 
			
		|||
pub fn get_cg_from_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String>
 | 
			
		||||
where
 | 
			
		||||
    I: Iterator<Item = &'a Regex>,
 | 
			
		||||
{
 | 
			
		||||
    regexes
 | 
			
		||||
        .find_map(|pattern| pattern.captures(text))
 | 
			
		||||
        .map(|c| c.get(cg).unwrap().as_str().to_owned())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Return the given capture group that matches first in a list of fancy regexes
 | 
			
		||||
pub fn get_cg_from_fancy_regexes<'a, I>(mut regexes: I, text: &str, cg: usize) -> Option<String>
 | 
			
		||||
where
 | 
			
		||||
    I: Iterator<Item = &'a FancyRegex>,
 | 
			
		||||
{
 | 
			
		||||
    regexes
 | 
			
		||||
        .find_map(|pattern| pattern.captures(text).ok().flatten())
 | 
			
		||||
| 
						 | 
				
			
			@ -132,7 +144,7 @@ where
 | 
			
		|||
pub fn parse_video_length(text: &str) -> Option<u32> {
 | 
			
		||||
    static VIDEO_LENGTH_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r#"(?:(\d+):)?(\d{1,2}):(\d{2})"#).unwrap());
 | 
			
		||||
    VIDEO_LENGTH_REGEX.captures(text).ok().flatten().map(|cap| {
 | 
			
		||||
    VIDEO_LENGTH_REGEX.captures(text).map(|cap| {
 | 
			
		||||
        let hrs = cap
 | 
			
		||||
            .get(1)
 | 
			
		||||
            .and_then(|x| x.as_str().parse::<u32>().ok())
 | 
			
		||||
| 
						 | 
				
			
			@ -339,8 +351,6 @@ pub fn video_id_from_thumbnail_url(url: &str) -> Option<String> {
 | 
			
		|||
        Lazy::new(|| Regex::new(r"^https://i.ytimg.com/vi/([A-Za-z0-9_-]{11})/").unwrap());
 | 
			
		||||
    URL_REGEX
 | 
			
		||||
        .captures(url)
 | 
			
		||||
        .ok()
 | 
			
		||||
        .flatten()
 | 
			
		||||
        .and_then(|cap| cap.get(1).map(|x| x.as_str().to_owned()))
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										179
									
								
								src/validate.rs
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										179
									
								
								src/validate.rs
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
				
			
			@ -0,0 +1,179 @@
 | 
			
		|||
//! # Input validation
 | 
			
		||||
//!
 | 
			
		||||
//! The extraction functions of RustyPipe will produce errors when fed with invalid input data
 | 
			
		||||
//! (e.g. YouTube ID's with invalid format). Therefore you will need to validate all untrusted
 | 
			
		||||
//! input data beforehand. The library offers two options for this:
 | 
			
		||||
//!
 | 
			
		||||
//! - The [URL resolver](crate::client::RustyPipeQuery::resolve_url) or
 | 
			
		||||
//!   [string resolver](crate::client::RustyPipeQuery::resolve_string) is great for handling
 | 
			
		||||
//!   arbitrary input and returns a [`UrlTarget`](crate::model::UrlTarget) enum that tells you
 | 
			
		||||
//!   whether the given URL points to a video, channel, playlist, etc.
 | 
			
		||||
//! - The validation functions of this module are meant vor validating concrete data (video IDs,
 | 
			
		||||
//!   channel IDs, playlist IDs) and return [`true`] if the given input is valid
 | 
			
		||||
 | 
			
		||||
use crate::util;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use regex::Regex;
 | 
			
		||||
 | 
			
		||||
/// Validate the given video ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube video IDs are exactly 11 characters long and consist of the charactes `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::video_id("dQw4w9WgXcQ"));
 | 
			
		||||
/// assert!(!validate::video_id("Abcd"));
 | 
			
		||||
/// assert!(!validate::video_id("dQw4w9WgXc@"));
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn video_id<S: AsRef<str>>(video_id: S) -> bool {
 | 
			
		||||
    util::VIDEO_ID_REGEX.is_match(video_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Validate the given channel ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube channel IDs are exactly 24 characters long, start with the characters `UC`,
 | 
			
		||||
/// followed by 22 of these characters: `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::channel_id("UC2DjFE7Xf11URZqWBigcVOQ"));
 | 
			
		||||
/// assert!(!validate::channel_id("Abcd"));
 | 
			
		||||
/// assert!(!validate::channel_id("XY2DjFE7Xf11URZqWBigcVOQ"));
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn channel_id<S: AsRef<str>>(channel_id: S) -> bool {
 | 
			
		||||
    util::CHANNEL_ID_REGEX.is_match(channel_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Validate the given playlist ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube playlist IDs start with the characters `PL` (user-created playlist),
 | 
			
		||||
/// `RDCLAK` (YouTube Music-curated playlist) or `OLAK` (YouTube Music album),
 | 
			
		||||
/// followed by at least 30 of these characters: `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::playlist_id("PL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI"));
 | 
			
		||||
/// assert!(validate::playlist_id("RDCLAK5uy_kFQXdnqMaQCVx2wpUM4ZfbsGCDibZtkJk"));
 | 
			
		||||
/// assert!(validate::playlist_id("OLAK5uy_k0yFrZlFRgCf3rLPza-lkRmCrtLPbK9pE"));
 | 
			
		||||
///
 | 
			
		||||
/// assert!(!validate::playlist_id("Abcd"));
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn playlist_id<S: AsRef<str>>(playlist_id: S) -> bool {
 | 
			
		||||
    util::PLAYLIST_ID_REGEX.is_match(playlist_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Validate the given album ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube Music album IDs are exactly 17 characters long, start with the characters `MPREB_`,
 | 
			
		||||
/// followed by 11 of these characters: `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::album_id("MPREb_GyH43gCvdM5"));
 | 
			
		||||
/// assert!(!validate::album_id("Abcd_GyH43gCvdM5"));
 | 
			
		||||
/// ```
 | 
			
		||||
///
 | 
			
		||||
/// # Note
 | 
			
		||||
///
 | 
			
		||||
/// Albums on YouTube Music have an album ID (`MPREB_...`) and a playlist ID
 | 
			
		||||
/// (`OLAK...`). If you open an album on the YouTube Music website, the address bar shows
 | 
			
		||||
/// the playlist ID, not the album ID.
 | 
			
		||||
///
 | 
			
		||||
/// If you have the playlist ID of an album and need the album ID, you can use the
 | 
			
		||||
/// [string resolver](crate::client::RustyPipeQuery::resolve_string) with the `resolve_albums`
 | 
			
		||||
/// option enabled.
 | 
			
		||||
pub fn album_id<S: AsRef<str>>(album_id: S) -> bool {
 | 
			
		||||
    util::ALBUM_ID_REGEX.is_match(album_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Validate the given radio ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube radio IDs start with the characters `RD`,
 | 
			
		||||
/// followed by at least 22 of these characters: `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Radio types
 | 
			
		||||
///
 | 
			
		||||
/// - Artist radio: `RDEMSuoM_jxfse1_g8uCO7MCtg`
 | 
			
		||||
/// - Genre radio: `RDQM1xqCV6EdPUw`
 | 
			
		||||
/// - Shuffle radio: `RDAOVeZA-2uzuUKdoB81Ha3srw`
 | 
			
		||||
/// - Playlist radio (`RDAMPL` + playlist ID): `RDAMPLPL4lEESSgxM_5O81EvKCmBIm_JT5Q7JeaI`
 | 
			
		||||
/// - Track radio (`RDAMVM` + video ID): `RDAMVMZeerrnuLi5E`
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::radio_id("RDEMSuoM_jxfse1_g8uCO7MCtg"));
 | 
			
		||||
/// assert!(!validate::radio_id("Abcd"));
 | 
			
		||||
/// assert!(!validate::radio_id("XYEMSuoM_jxfse1_g8uCO7MCtg"));
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn radio_id<S: AsRef<str>>(radio_id: S) -> bool {
 | 
			
		||||
    static RADIO_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r"^RD[A-Za-z0-9_-]{22,50}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    RADIO_ID_REGEX.is_match(radio_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Validate the given genre ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube genre IDs are exactly 24 characters long, start with the characters `ggMPO`,
 | 
			
		||||
/// followed by 19 of these characters: `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::genre_id("ggMPOg1uX1JOQWZFeDByc2Jm"));
 | 
			
		||||
/// assert!(!validate::genre_id("Abcd"));
 | 
			
		||||
/// assert!(!validate::genre_id("ggAbcg1uX1JOQWZFeDByc2Jm"));
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn genre_id<S: AsRef<str>>(genre_id: S) -> bool {
 | 
			
		||||
    static GENRE_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r"^ggMPO[A-Za-z0-9_-]{19}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    GENRE_ID_REGEX.is_match(genre_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Validate the given related ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube related IDs are exactly 17 characters long, start with the characters `MPTRt_`,
 | 
			
		||||
/// followed by 11 of these characters: `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::track_related_id("MPTRt_wrKjTn9hmry"));
 | 
			
		||||
/// assert!(!validate::track_related_id("Abcd"));
 | 
			
		||||
/// assert!(!validate::track_related_id("Abcdt_wrKjTn9hmry"));
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn track_related_id<S: AsRef<str>>(related_id: S) -> bool {
 | 
			
		||||
    static RELATED_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r"^MPTRt_[A-Za-z0-9_-]{11}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    RELATED_ID_REGEX.is_match(related_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// Validate the given lyrics ID
 | 
			
		||||
///
 | 
			
		||||
/// YouTube lyrics IDs are exactly 17 characters long, start with the characters `MPLYt_`,
 | 
			
		||||
/// followed by 11 of these characters: `A-Za-z0-9_-`.
 | 
			
		||||
///
 | 
			
		||||
/// # Examples
 | 
			
		||||
///
 | 
			
		||||
/// ```
 | 
			
		||||
/// # use rustypipe::validate;
 | 
			
		||||
/// assert!(validate::track_lyrics_id("MPLYt_wrKjTn9hmry"));
 | 
			
		||||
/// assert!(!validate::track_lyrics_id("Abcd"));
 | 
			
		||||
/// assert!(!validate::track_lyrics_id("Abcdt_wrKjTn9hmry"));
 | 
			
		||||
/// ```
 | 
			
		||||
pub fn track_lyrics_id<S: AsRef<str>>(lyrics_id: S) -> bool {
 | 
			
		||||
    static LYRICS_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r"^MPLYt_[A-Za-z0-9_-]{11}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    LYRICS_ID_REGEX.is_match(lyrics_id.as_ref())
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -1,9 +1,8 @@
 | 
			
		|||
use std::collections::HashSet;
 | 
			
		||||
use std::fmt::Display;
 | 
			
		||||
 | 
			
		||||
use fancy_regex::Regex;
 | 
			
		||||
use once_cell::sync::Lazy;
 | 
			
		||||
use rstest::rstest;
 | 
			
		||||
use rustypipe::validate;
 | 
			
		||||
use time::macros::date;
 | 
			
		||||
use time::OffsetDateTime;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -2121,6 +2120,7 @@ async fn music_genres() {
 | 
			
		|||
    assert!(!pop.is_mood);
 | 
			
		||||
 | 
			
		||||
    genres.iter().for_each(|g| {
 | 
			
		||||
        assert!(validate::genre_id(&g.id));
 | 
			
		||||
        assert_gte(g.color, 0xff000000, "color");
 | 
			
		||||
    });
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -2270,44 +2270,17 @@ async fn assert_next_items<T: FromYtItem, Q: AsRef<RustyPipeQuery>>(
 | 
			
		|||
}
 | 
			
		||||
 | 
			
		||||
fn assert_video_id(id: &str) {
 | 
			
		||||
    static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-]{11}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    assert!(
 | 
			
		||||
        VIDEO_ID_REGEX.is_match(id).unwrap_or_default(),
 | 
			
		||||
        "invalid video id: `{}`",
 | 
			
		||||
        id
 | 
			
		||||
    );
 | 
			
		||||
    assert!(validate::video_id(id), "invalid video id: `{}`", id)
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn assert_channel_id(id: &str) {
 | 
			
		||||
    static CHANNEL_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    assert!(
 | 
			
		||||
        CHANNEL_ID_REGEX.is_match(id).unwrap_or_default(),
 | 
			
		||||
        "invalid channel id: `{}`",
 | 
			
		||||
        id
 | 
			
		||||
    );
 | 
			
		||||
    assert!(validate::channel_id(id), "invalid channel id: `{}`", id);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn assert_album_id(id: &str) {
 | 
			
		||||
    static ALBUM_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    assert!(
 | 
			
		||||
        ALBUM_ID_REGEX.is_match(id).unwrap_or_default(),
 | 
			
		||||
        "invalid album id: `{}`",
 | 
			
		||||
        id
 | 
			
		||||
    );
 | 
			
		||||
    assert!(validate::album_id(id), "invalid album id: `{}`", id);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
fn assert_playlist_id(id: &str) {
 | 
			
		||||
    static PLAYLIST_ID_REGEX: Lazy<Regex> =
 | 
			
		||||
        Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK)[A-Za-z0-9_-]{30,}$").unwrap());
 | 
			
		||||
 | 
			
		||||
    assert!(
 | 
			
		||||
        PLAYLIST_ID_REGEX.is_match(id).unwrap_or_default(),
 | 
			
		||||
        "invalid album id: `{}`",
 | 
			
		||||
        id
 | 
			
		||||
    );
 | 
			
		||||
    assert!(validate::playlist_id(id), "invalid playlist id: `{}`", id);
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue