From 6cc927031a678e1b30a37e1318a42b25f6fd7385 Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Mon, 12 Sep 2022 16:26:09 +0200 Subject: [PATCH 1/9] refactored client, added reports --- .gitignore | 2 + Cargo.toml | 1 + src/client2/mod.rs | 382 ++++ src/client2/playlist.rs | 412 ++++ src/client2/response/channel.rs | 74 + src/client2/response/mod.rs | 216 ++ src/client2/response/player.rs | 231 ++ src/client2/response/playlist.rs | 179 ++ src/client2/response/playlist_music.rs | 95 + src/client2/response/video.rs | 432 ++++ ...layer__tests__map_player_data_android.snap | 334 +++ ...layer__tests__map_player_data_desktop.snap | 445 ++++ ...__tests__map_player_data_desktopmusic.snap | 319 +++ ...2__player__tests__map_player_data_ios.snap | 131 ++ ...__tests__map_player_data_tvhtml5embed.snap | 445 ++++ ...aylist__tests__map_playlist_data_long.snap | 1929 +++++++++++++++++ ...ist__tests__map_playlist_data_nomusic.snap | 1283 +++++++++++ ...ylist__tests__map_playlist_data_short.snap | 1867 ++++++++++++++++ src/lib.rs | 4 +- src/report.rs | 176 ++ src/serializer/mod.rs | 3 + src/serializer/vec_log_err.rs | 132 ++ 22 files changed, 9091 insertions(+), 1 deletion(-) create mode 100644 src/client2/mod.rs create mode 100644 src/client2/playlist.rs create mode 100644 src/client2/response/channel.rs create mode 100644 src/client2/response/mod.rs create mode 100644 src/client2/response/player.rs create mode 100644 src/client2/response/playlist.rs create mode 100644 src/client2/response/playlist_music.rs create mode 100644 src/client2/response/video.rs create mode 100644 src/client2/snapshots/rustypipe__client2__player__tests__map_player_data_android.snap create mode 100644 src/client2/snapshots/rustypipe__client2__player__tests__map_player_data_desktop.snap create mode 100644 src/client2/snapshots/rustypipe__client2__player__tests__map_player_data_desktopmusic.snap create mode 100644 src/client2/snapshots/rustypipe__client2__player__tests__map_player_data_ios.snap create mode 100644 src/client2/snapshots/rustypipe__client2__player__tests__map_player_data_tvhtml5embed.snap create mode 100644 src/client2/snapshots/rustypipe__client2__playlist__tests__map_playlist_data_long.snap create mode 100644 src/client2/snapshots/rustypipe__client2__playlist__tests__map_playlist_data_nomusic.snap create mode 100644 src/client2/snapshots/rustypipe__client2__playlist__tests__map_playlist_data_short.snap create mode 100644 src/report.rs create mode 100644 src/serializer/vec_log_err.rs diff --git a/.gitignore b/.gitignore index 23e8123..cb98b06 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ /target /Cargo.lock +RustyPipeReports +RustyPipeCache.json rusty-tube.json diff --git a/Cargo.toml b/Cargo.toml index 7ad4d82..9875e77 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -31,6 +31,7 @@ indicatif = "0.17.0" filenamify = "0.1.0" ress = "0.11.4" phf = "0.11.1" +serde_yaml = "0.9.11" [dev-dependencies] env_logger = "0.9.0" diff --git a/src/client2/mod.rs b/src/client2/mod.rs new file mode 100644 index 0000000..db311e7 --- /dev/null +++ b/src/client2/mod.rs @@ -0,0 +1,382 @@ +pub mod playlist; + +mod response; + +use std::fmt::Debug; +use std::sync::Arc; + +use anyhow::{anyhow, Context, Result}; +use fancy_regex::Regex; +use once_cell::sync::Lazy; +use rand::Rng; +use reqwest::{header, Client, ClientBuilder, Method, RequestBuilder}; +use serde::{de::DeserializeOwned, Deserialize, Serialize}; + +use crate::{ + cache::Cache, + model::{Country, Language}, + report::{YamlFileReporter, Level, Report, Reporter}, +}; + +#[derive(Clone, Copy, Debug, Serialize, Deserialize, PartialEq, Eq, Hash)] +#[serde(rename_all = "snake_case")] +pub enum ClientType { + Desktop, + DesktopMusic, + TvHtml5Embed, + Android, + Ios, +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +pub struct ContextYT { + client: ClientInfo, + /// only used on desktop + #[serde(skip_serializing_if = "Option::is_none")] + request: Option, + user: User, + /// only used for the embedded player + #[serde(skip_serializing_if = "Option::is_none")] + third_party: Option, +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct ClientInfo { + client_name: String, + client_version: String, + #[serde(skip_serializing_if = "Option::is_none")] + client_screen: Option, + #[serde(skip_serializing_if = "Option::is_none")] + device_model: Option, + platform: String, + #[serde(skip_serializing_if = "Option::is_none")] + original_url: Option, + hl: Language, + gl: Country, +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct RequestYT { + internal_experiment_flags: Vec, + use_ssl: bool, +} + +impl Default for RequestYT { + fn default() -> Self { + Self { + internal_experiment_flags: vec![], + use_ssl: true, + } + } +} + +#[derive(Clone, Debug, Serialize, Default)] +#[serde(rename_all = "camelCase")] +struct User { + // TO DO: provide a way to enable restricted mode with: + // "enableSafetyMode": true + locked_safety_mode: bool, +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct ThirdParty { + embed_url: String, +} + +const DEFAULT_UA: &str = "Mozilla/5.0 (X11; Linux x86_64; rv:102.0) Gecko/20100101 Firefox/102.0"; + +const CONSENT_COOKIE: &str = "CONSENT"; +const CONSENT_COOKIE_YES: &str = "YES+yt.462272069.de+FX+"; + +const YOUTUBEI_V1_URL: &str = "https://www.youtube.com/youtubei/v1/"; +const YOUTUBEI_V1_GAPIS_URL: &str = "https://youtubei.googleapis.com/youtubei/v1/"; +const YOUTUBE_MUSIC_V1_URL: &str = "https://music.youtube.com/youtubei/v1/"; + +const DISABLE_PRETTY_PRINT_PARAMETER: &str = "&prettyPrint=false"; + +const DESKTOP_CLIENT_VERSION: &str = "2.20220909.00.00"; +const DESKTOP_API_KEY: &str = "AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8"; +const TVHTML5_CLIENT_VERSION: &str = "2.0"; +const DESKTOP_MUSIC_API_KEY: &str = "AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30"; +const DESKTOP_MUSIC_CLIENT_VERSION: &str = "1.20220831.01.02"; + +const MOBILE_CLIENT_VERSION: &str = "17.29.35"; +const ANDROID_API_KEY: &str = "AIzaSyA8eiZmM1FaDVjRy-df2KTyQ_vz_yYM39w"; +const IOS_API_KEY: &str = "AIzaSyB-63vPrdThhKuerbB2N_l7Kwwcxj6yUAc"; +const IOS_DEVICE_MODEL: &str = "iPhone14,5"; + +static CLIENT_VERSION_REGEXES: Lazy<[Regex; 1]> = + Lazy::new(|| [Regex::new("INNERTUBE_CONTEXT_CLIENT_VERSION\":\"([0-9\\.]+?)\"").unwrap()]); + +#[derive(Clone)] +pub struct RustyPipe { + inner: Arc, + opts: RustyPipeOpts, +} + +struct RustyPipeRef { + http: Client, + cache: Cache, + reporter: Option>, + user_agent: String, + consent_cookie: String, +} + +#[derive(Clone)] +struct RustyPipeOpts { + lang: Language, + country: Country, + report: bool, +} + +impl Default for RustyPipe { + fn default() -> Self { + Self::new( + Some(Cache::from_json_file("RustyPipeCache.json")), + Some(Box::new(YamlFileReporter::default())), + None, + ) + } +} + +impl Default for RustyPipeOpts { + fn default() -> Self { + Self { + lang: Language::En, + country: Country::Us, + report: false, + } + } +} + +impl RustyPipe { + pub fn new( + cache: Option, + reporter: Option>, + user_agent: Option, + ) -> Self { + let cache = cache.unwrap_or_else(|| Cache::default()); + let user_agent = user_agent.unwrap_or(DEFAULT_UA.to_owned()); + + let http = ClientBuilder::new() + .gzip(true) + .brotli(true) + .build() + .expect("unable to build the HTTP client"); + + RustyPipe { + inner: Arc::new(RustyPipeRef { + http, + cache, + reporter, + user_agent, + consent_cookie: format!( + "{}={}{}", + CONSENT_COOKIE, + CONSENT_COOKIE_YES, + rand::thread_rng().gen_range(100..1000) + ), + }), + opts: RustyPipeOpts::default(), + } + } + + pub fn lang(mut self, lang: Language) -> Self { + self.opts.lang = lang; + self + } + + pub fn country(mut self, country: Country) -> Self { + self.opts.country = country; + self + } + + pub fn report(mut self, report: bool) -> Self { + self.opts.report = report; + self + } + + async fn get_context(&self, ctype: ClientType, localized: bool) -> ContextYT { + match ctype { + ClientType::Desktop => ContextYT { + client: ClientInfo { + client_name: "WEB".to_owned(), + client_version: DESKTOP_CLIENT_VERSION.to_owned(), + client_screen: None, + device_model: None, + platform: "DESKTOP".to_owned(), + original_url: Some("https://www.youtube.com/".to_owned()), + hl: match localized { + true => self.opts.lang, + false => Language::En, + }, + gl: match localized { + true => self.opts.country, + false => Country::Us, + }, + }, + request: Some(RequestYT::default()), + user: User::default(), + third_party: None, + }, + ClientType::DesktopMusic => todo!(), + ClientType::TvHtml5Embed => todo!(), + ClientType::Android => todo!(), + ClientType::Ios => todo!(), + } + } + + async fn request_builder( + &self, + ctype: ClientType, + method: Method, + endpoint: &str, + ) -> RequestBuilder { + match ctype { + ClientType::Desktop => self + .inner + .http + .request( + method, + format!( + "{}{}?key={}{}", + YOUTUBEI_V1_URL, endpoint, DESKTOP_API_KEY, DISABLE_PRETTY_PRINT_PARAMETER + ), + ) + .header(header::ORIGIN, "https://www.youtube.com") + .header(header::REFERER, "https://www.youtube.com") + .header(header::COOKIE, self.inner.consent_cookie.to_owned()) + .header("X-YouTube-Client-Name", "1") + .header("X-YouTube-Client-Version", DESKTOP_CLIENT_VERSION), + ClientType::DesktopMusic => todo!(), + ClientType::TvHtml5Embed => todo!(), + ClientType::Android => todo!(), + ClientType::Ios => todo!(), + } + } + + async fn execute_request< + R: DeserializeOwned + MapResponse + Debug, + M, + B: Serialize + ?Sized, + >( + &self, + ctype: ClientType, + operation: &str, + method: Method, + endpoint: &str, + id: &str, + body: &B, + ) -> Result { + let request = self + .request_builder(ctype, method.clone(), endpoint) + .await + .json(body) + .build()?; + + let request_url = request.url().to_string(); + let request_headers = request.headers().to_owned(); + + let response = self.inner.http.execute(request).await?; + + let status = response.status(); + let resp_str = response.text().await?; + + let create_report = + |level: Level, error: Option, msgs: Vec, deserialized: Option<&R>| { + if let Some(reporter) = &self.inner.reporter { + let report = Report { + package: "rustypipe".to_owned(), + version: "0.1.0".to_owned(), + date: chrono::Local::now(), + level, + operation: operation.to_owned(), + error, + msgs, + http_request: crate::report::HTTPRequest { + url: request_url, + method: method.to_string(), + req_header: request_headers + .iter() + .map(|(k, v)| { + (k.to_string(), v.to_str().unwrap_or_default().to_owned()) + }) + .collect(), + req_body: serde_json::to_string(body).unwrap_or_default(), + status: status.into(), + resp_body: resp_str.to_owned(), + }, + deserialized: deserialized.map(|d| format!("{:?}", d)), + }; + + reporter.report(&report); + } + }; + + if status.is_client_error() || status.is_server_error() { + let e = anyhow!("Server responded with error code {}", status); + create_report(Level::ERR, Some(e.to_string()), vec![], None); + return Err(e); + } + + match serde_json::from_str::(&resp_str) { + Ok(deserialized) => match deserialized.map_response(self.opts.lang, id) { + Ok(mapres) => { + if !mapres.warnings.is_empty() { + create_report( + Level::WRN, + Some("Warnings during deserialization/mapping".to_owned()), + mapres.warnings, + Some(&deserialized), + ); + } else if self.opts.report { + create_report(Level::DBG, None, vec![], Some(&deserialized)); + } + Ok(mapres.c) + } + Err(e) => { + let emsg = "Could not map reponse"; + create_report( + Level::ERR, + Some(emsg.to_owned()), + vec![e.to_string()], + Some(&deserialized), + ); + Err(e).context(emsg) + } + }, + Err(e) => { + let emsg = "Could not deserialize response"; + create_report(Level::ERR, Some(emsg.to_owned()), vec![e.to_string()], None); + Err(e).context(emsg) + } + } + } +} + +pub trait MapResponse { + fn map_response(&self, lang: Language, id: &str) -> Result>; +} + +#[derive(Clone)] +pub struct MapResult { + pub c: T, + pub warnings: Vec, +} + +impl Debug for MapResult where T: Debug { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + self.c.fmt(f) + } +} + +/* +#[cfg(test)] +mod tests { + use super::*; +} +*/ diff --git a/src/client2/playlist.rs b/src/client2/playlist.rs new file mode 100644 index 0000000..077ef9f --- /dev/null +++ b/src/client2/playlist.rs @@ -0,0 +1,412 @@ +use anyhow::{anyhow, Result}; +use reqwest::Method; +use serde::Serialize; + +use crate::{ + model::{Channel, Language, Playlist, Thumbnail, Video}, + serializer::text::{PageType, TextLink}, + timeago, util, +}; + +use super::{response, ClientType, ContextYT, MapResponse, MapResult, RustyPipe}; + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct QPlaylist { + context: ContextYT, + browse_id: String, +} + +#[derive(Clone, Debug, Serialize)] +#[serde(rename_all = "camelCase")] +struct QPlaylistCont { + context: ContextYT, + continuation: String, +} + +impl RustyPipe { + pub async fn get_playlist(&self, playlist_id: &str) -> Result { + let context = self.get_context(ClientType::Desktop, true).await; + let request_body = QPlaylist { + context, + browse_id: "VL".to_owned() + playlist_id, + }; + + self.execute_request::( + ClientType::Desktop, + "get_playlist", + Method::POST, + "browse", + playlist_id, + &request_body, + ) + .await + } + + pub async fn get_playlist_cont(&self, playlist: &mut Playlist) -> Result<()> { + match &playlist.ctoken { + Some(ctoken) => { + let context = self.get_context(ClientType::Desktop, true).await; + let request_body = QPlaylistCont { + context, + continuation: ctoken.to_owned(), + }; + + let (mut videos, ctoken) = self + .execute_request::( + ClientType::Desktop, + "get_playlist_cont", + Method::POST, + "browse", + &playlist.id, + &request_body, + ) + .await?; + + playlist.videos.append(&mut videos); + playlist.ctoken = ctoken; + + if playlist.ctoken.is_none() { + playlist.n_videos = playlist.videos.len() as u32; + } + + Ok(()) + } + None => Err(anyhow!("no ctoken")), + } + } +} + +impl MapResponse for response::Playlist { + fn map_response(&self, lang: Language, id: &str) -> Result> { + let video_items = &some_or_bail!( + some_or_bail!( + some_or_bail!( + self.contents + .two_column_browse_results_renderer + .contents + .get(0), + Err(anyhow!("twoColumnBrowseResultsRenderer empty")) + ) + .tab_renderer + .content + .section_list_renderer + .contents + .get(0), + Err(anyhow!("sectionListRenderer empty")) + ) + .item_section_renderer + .contents + .get(0), + Err(anyhow!("itemSectionRenderer empty")) + ) + .playlist_video_list_renderer + .contents; + + let (videos, ctoken) = map_playlist_items(&video_items.c); + + let (thumbnails, last_update_txt) = match &self.sidebar { + Some(sidebar) => { + let primary = some_or_bail!( + sidebar.playlist_sidebar_renderer.items.get(0), + Err(anyhow!("no primary sidebar")) + ); + + ( + &primary + .playlist_sidebar_primary_info_renderer + .thumbnail_renderer + .playlist_video_thumbnail_renderer + .thumbnail + .thumbnails, + primary + .playlist_sidebar_primary_info_renderer + .stats + .get(2) + .map(|t| t.to_owned()), + ) + } + None => { + let header_banner = some_or_bail!( + &self.header.playlist_header_renderer.playlist_header_banner, + Err(anyhow!("no thumbnail found")) + ); + + let last_update_txt = self + .header + .playlist_header_renderer + .byline + .get(1) + .map(|b| b.playlist_byline_renderer.text.to_owned()); + + ( + &header_banner + .hero_playlist_thumbnail_renderer + .thumbnail + .thumbnails, + last_update_txt, + ) + } + }; + + let thumbnails = thumbnails + .iter() + .map(|t| Thumbnail { + url: t.url.to_owned(), + width: t.width, + height: t.height, + }) + .collect::>(); + + let n_videos = match ctoken { + Some(_) => { + ok_or_bail!( + util::parse_numeric(&self.header.playlist_header_renderer.num_videos_text), + Err(anyhow!("no video count")) + ) + } + None => videos.len() as u32, + }; + + let playlist_id = self.header.playlist_header_renderer.playlist_id.to_owned(); + if playlist_id != id { + return Err(anyhow!("got wrong playlist id {}, expected {}", playlist_id, id)); + } + + let name = self.header.playlist_header_renderer.title.to_owned(); + let description = self + .header + .playlist_header_renderer + .description_text + .to_owned(); + + let channel = match &self.header.playlist_header_renderer.owner_text { + Some(owner_text) => match owner_text { + TextLink::Browse { + text, + page_type, + browse_id, + } => match page_type { + PageType::Channel => Some(Channel { + id: browse_id.to_owned(), + name: text.to_owned(), + }), + _ => None, + }, + _ => None, + }, + None => None, + }; + + Ok(MapResult { + c: Playlist { + id: playlist_id, + name, + videos, + n_videos, + ctoken, + thumbnails, + description, + channel, + last_update: match &last_update_txt { + Some(textual_date) => timeago::parse_textual_date_to_dt(lang, textual_date), + None => None, + }, + last_update_txt, + }, + warnings: video_items.warnings.to_owned(), + }) + } +} + +impl MapResponse<(Vec