Compare commits

...

4 commits

Author SHA1 Message Date
182f9ebfb8 fix: extracting artist discography with without page type
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2023-05-31 11:38:50 +02:00
0cd018e37a fix: add dictionary support for short timeago strings 2023-05-31 01:41:46 +02:00
cc2cadc309 fix: add support for A/B test 7 (short date format) 2023-05-28 21:07:03 +02:00
cca9838b7e fix: playlist id regex 2023-05-28 19:28:33 +02:00
32 changed files with 6511 additions and 1767 deletions

View file

@ -4,6 +4,8 @@ use anyhow::{bail, Result};
use futures::{stream, StreamExt};
use indicatif::{ProgressBar, ProgressStyle};
use num_enum::TryFromPrimitive;
use once_cell::sync::Lazy;
use regex::Regex;
use rustypipe::client::{ClientType, RustyPipe, RustyPipeQuery, YTContext};
use rustypipe::model::YouTubeItem;
use rustypipe::param::search_filter::{ItemType, SearchFilter};
@ -21,6 +23,7 @@ pub enum ABTest {
TrendsVideoTab = 4,
TrendsPageHeaderRenderer = 5,
DiscographyPage = 6,
ShortDateFormat = 7,
}
const TESTS_TO_RUN: [ABTest; 3] = [
@ -90,6 +93,7 @@ pub async fn run_test(
ABTest::TrendsVideoTab => trends_video_tab(&query).await,
ABTest::TrendsPageHeaderRenderer => trends_page_header_renderer(&query).await,
ABTest::DiscographyPage => discography_page(&query).await,
ABTest::ShortDateFormat => short_date_format(&query).await,
}
.unwrap();
pb.inc(1);
@ -223,10 +227,19 @@ pub async fn trends_page_header_renderer(rp: &RustyPipeQuery) -> Result<bool> {
}
pub async fn discography_page(rp: &RustyPipeQuery) -> Result<bool> {
let artist = rp
.music_artist("UC7cl4MmM6ZZ2TcFyMk_b4pg", false)
.await
.unwrap();
let artist = rp.music_artist("UC7cl4MmM6ZZ2TcFyMk_b4pg", false).await?;
Ok(artist.albums.len() <= 10)
}
pub async fn short_date_format(rp: &RustyPipeQuery) -> Result<bool> {
static SHORT_DATE: Lazy<Regex> = Lazy::new(|| Regex::new("\\d(?:y|mo|w|d|h|min) ").unwrap());
let channel = rp.channel_videos("UC2DjFE7Xf11URZqWBigcVOQ").await?;
Ok(channel.content.items.iter().any(|itm| {
itm.publish_date_txt
.as_deref()
.map(|d| SHORT_DATE.is_match(d))
.unwrap_or_default()
}))
}

View file

@ -0,0 +1,83 @@
use std::{
collections::{BTreeMap, HashSet},
fs::File,
};
use futures::{stream, StreamExt};
use path_macro::path;
use rustypipe::{
client::{RustyPipe, RustyPipeQuery},
param::{Language, LANGUAGES},
};
use crate::util::DICT_DIR;
pub async fn collect_video_dates(concurrency: usize) {
let json_path = path!(*DICT_DIR / "timeago_samples_short.json");
let rp = RustyPipe::builder()
.visitor_data("Cgtwel9tMkh2eHh0USiyzc6jBg%3D%3D")
.build();
let channels = [
"UCeY0bbntWzzVIaj2z3QigXg",
"UCcmpeVbSSQlZRvHfdC-CRwg",
"UC65afEgL62PGFWXY7n6CUbA",
"UCEOXxzW2vU0P-0THehuIIeg",
];
let mut lang_strings: BTreeMap<Language, Vec<String>> = BTreeMap::new();
for lang in LANGUAGES {
println!("{lang}");
let query = rp.query().lang(lang);
let strings = stream::iter(channels)
.map(|id| get_channel_datestrings(&query, id))
.buffered(concurrency)
.collect::<Vec<_>>()
.await
.into_iter()
.flatten()
.collect::<Vec<_>>();
lang_strings.insert(lang, strings);
}
let mut en_strings_uniq: HashSet<&str> = HashSet::new();
let mut uniq_ids: HashSet<usize> = HashSet::new();
lang_strings[&Language::En]
.iter()
.enumerate()
.for_each(|(n, s)| {
if en_strings_uniq.insert(s) {
uniq_ids.insert(n);
}
});
let strings_map = lang_strings
.iter()
.map(|(lang, strings)| {
(
lang,
strings
.iter()
.enumerate()
.filter(|(n, _)| uniq_ids.contains(n))
.map(|(_, s)| s)
.collect::<Vec<_>>(),
)
})
.collect::<BTreeMap<_, _>>();
let file = File::create(json_path).unwrap();
serde_json::to_writer_pretty(file, &strings_map).unwrap();
}
async fn get_channel_datestrings(rp: &RustyPipeQuery, id: &str) -> Vec<String> {
let channel = rp.channel_videos(id).await.unwrap();
channel
.content
.items
.into_iter()
.filter_map(|itm| itm.publish_date_txt)
.collect()
}

View file

@ -4,6 +4,7 @@ mod abtest;
mod collect_album_types;
mod collect_large_numbers;
mod collect_playlist_dates;
mod collect_video_dates;
mod collect_video_durations;
mod download_testfiles;
mod gen_dictionary;
@ -27,6 +28,7 @@ enum Commands {
CollectLargeNumbers,
CollectAlbumTypes,
CollectVideoDurations,
CollectVideoDates,
ParsePlaylistDates,
ParseLargeNumbers,
ParseAlbumTypes,
@ -60,6 +62,9 @@ async fn main() {
Commands::CollectVideoDurations => {
collect_video_durations::collect_video_durations(cli.concurrency).await;
}
Commands::CollectVideoDates => {
collect_video_dates::collect_video_dates(cli.concurrency).await;
}
Commands::ParsePlaylistDates => collect_playlist_dates::write_samples_to_dict(),
Commands::ParseLargeNumbers => collect_large_numbers::write_samples_to_dict(),
Commands::ParseAlbumTypes => collect_album_types::write_samples_to_dict(),

View file

@ -376,3 +376,11 @@ visitor data cookie to be set, as it was the case with the old system.
**NEW**
![A/B test 4 old screenshot](./_img/ab_6_new.png)
## [7] Short timeago format
- **Encountered on:** 28.05.2023
- **Impact:** 🟡 Medium
YouTube changed their date format from the long format (*21 hours ago*, *3 days ago*) to
a short format (*21h ago*, *3d ago*).

View file

@ -200,15 +200,20 @@ impl MapResponse<Channel<Paginator<VideoItem>>> for response::Channel {
id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<Channel<Paginator<VideoItem>>>, ExtractionError> {
let content = map_channel_content(id, self.contents, self.alerts)?;
let visitor_data = self
.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned));
let channel_data = map_channel(
MapChannelData {
header: self.header,
metadata: self.metadata,
microformat: self.microformat,
visitor_data: self.response_context.visitor_data.clone(),
visitor_data: visitor_data.clone(),
has_shorts: content.has_shorts,
has_live: content.has_live,
},
@ -226,7 +231,7 @@ impl MapResponse<Channel<Paginator<VideoItem>>> for response::Channel {
None,
mapper.items,
mapper.ctoken,
self.response_context.visitor_data,
visitor_data,
crate::model::paginator::ContinuationEndpoint::Browse,
);
@ -243,15 +248,20 @@ impl MapResponse<Channel<Paginator<PlaylistItem>>> for response::Channel {
id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<Channel<Paginator<PlaylistItem>>>, ExtractionError> {
let content = map_channel_content(id, self.contents, self.alerts)?;
let visitor_data = self
.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned));
let channel_data = map_channel(
MapChannelData {
header: self.header,
metadata: self.metadata,
microformat: self.microformat,
visitor_data: self.response_context.visitor_data,
visitor_data,
has_shorts: content.has_shorts,
has_live: content.has_live,
},
@ -280,6 +290,7 @@ impl MapResponse<Channel<ChannelInfo>> for response::Channel {
id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<Channel<ChannelInfo>>, ExtractionError> {
let content = map_channel_content(id, self.contents, self.alerts)?;
let channel_data = map_channel(
@ -287,7 +298,10 @@ impl MapResponse<Channel<ChannelInfo>> for response::Channel {
header: self.header,
metadata: self.metadata,
microformat: self.microformat,
visitor_data: self.response_context.visitor_data,
visitor_data: self
.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned)),
has_shorts: content.has_shorts,
has_live: content.has_live,
},
@ -605,7 +619,7 @@ mod tests {
let channel: response::Channel =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Channel<Paginator<VideoItem>>> =
channel.map_response(id, Language::En, None).unwrap();
channel.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -632,7 +646,7 @@ mod tests {
let channel: response::Channel =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Channel<Paginator<PlaylistItem>>> = channel
.map_response("UC2DjFE7Xf11URZqWBigcVOQ", Language::En, None)
.map_response("UC2DjFE7Xf11URZqWBigcVOQ", Language::En, None, None)
.unwrap();
assert!(
@ -651,7 +665,7 @@ mod tests {
let channel: response::Channel =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Channel<ChannelInfo>> = channel
.map_response("UC2DjFE7Xf11URZqWBigcVOQ", Language::En, None)
.map_response("UC2DjFE7Xf11URZqWBigcVOQ", Language::En, None, None)
.unwrap();
assert!(

View file

@ -1247,7 +1247,12 @@ impl RustyPipeQuery {
})
} else {
match serde_json::from_str::<R>(&body) {
Ok(deserialized) => match deserialized.map_response(id, self.opts.lang, deobf) {
Ok(deserialized) => match deserialized.map_response(
id,
self.opts.lang,
deobf,
self.opts.visitor_data.as_deref(),
) {
Ok(mapres) => Ok(mapres),
Err(e) => Err(e.into()),
},
@ -1453,11 +1458,13 @@ trait MapResponse<T> {
/// that the returned entity matches this ID and return an error instead.
/// - `lang`: Language of the request. Used for mapping localized information like dates.
/// - `deobf`: Deobfuscator (if passed to the `execute_request_deobf` method)
/// - `vdata`: Visitor data option of the client
fn map_response(
self,
id: &str,
lang: Language,
deobf: Option<&DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<T>, ExtractionError>;
}

View file

@ -4,7 +4,7 @@ use once_cell::sync::Lazy;
use regex::Regex;
use crate::{
client::response::url_endpoint::{MusicPageType, NavigationEndpoint},
client::response::url_endpoint::NavigationEndpoint,
error::{Error, ExtractionError},
model::{AlbumItem, ArtistId, MusicArtist},
serializer::MapResult,
@ -96,6 +96,7 @@ impl MapResponse<MusicArtist> for response::MusicArtist {
id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<MusicArtist>, ExtractionError> {
let mapped = map_artist_page(self, id, lang, false)?;
Ok(MapResult {
@ -111,6 +112,7 @@ impl MapResponse<(MusicArtist, bool)> for response::MusicArtist {
id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<(MusicArtist, bool)>, ExtractionError> {
map_artist_page(self, id, lang, true)
}
@ -189,20 +191,29 @@ fn map_artist_page(
.music_carousel_shelf_basic_header_renderer
.more_content_button
{
match button.button_renderer.navigation_endpoint.music_page() {
if let NavigationEndpoint::Browse {
browse_endpoint, ..
} = button.button_renderer.navigation_endpoint
{
// Music videos
Some((MusicPageType::Playlist, id)) => {
if browse_endpoint
.browse_endpoint_context_supported_configs
.map(|cfg| {
cfg.browse_endpoint_context_music_config.page_type
== PageType::Playlist
})
.unwrap_or_default()
{
if videos_playlist_id.is_none() {
videos_playlist_id = Some(id);
videos_playlist_id = Some(browse_endpoint.browse_id);
}
}
// Albums
Some((MusicPageType::ArtistDiscography, _)) => {
} else if browse_endpoint
.browse_id
.starts_with(util::ARTIST_DISCOGRAPHY_PREFIX)
{
can_fetch_more = true;
extendable_albums = true;
}
// Albums or playlists
Some((MusicPageType::Artist, _)) => {
} else {
// Peek at the first item to determine type
if let Some(response::music_item::MusicResponseItem::MusicTwoRowItemRenderer(item)) = shelf.contents.c.first() {
if let Some(PageType::Album) = item.navigation_endpoint.page_type() {
@ -211,7 +222,6 @@ fn map_artist_page(
}
}
}
_ => {}
}
}
}
@ -286,6 +296,7 @@ impl MapResponse<Vec<AlbumItem>> for response::MusicArtistAlbums {
id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<Vec<AlbumItem>>, ExtractionError> {
// dbg!(&self);
@ -356,7 +367,7 @@ mod tests {
let resp: response::MusicArtist =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<(MusicArtist, bool)> =
resp.map_response(id, Language::En, None).unwrap();
resp.map_response(id, Language::En, None, None).unwrap();
let (mut artist, can_fetch_more) = map_res.c;
assert!(
@ -371,7 +382,7 @@ mod tests {
let resp: response::MusicArtistAlbums =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let mut map_res: MapResult<Vec<AlbumItem>> =
resp.map_response(id, Language::En, None).unwrap();
resp.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -392,7 +403,7 @@ mod tests {
let artist: response::MusicArtist =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicArtist> = artist
.map_response("UClmXPfaYhXOYsNn_QUyheWQ", Language::En, None)
.map_response("UClmXPfaYhXOYsNn_QUyheWQ", Language::En, None, None)
.unwrap();
assert!(
@ -411,7 +422,7 @@ mod tests {
let artist: response::MusicArtist =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let res: Result<MapResult<MusicArtist>, ExtractionError> =
artist.map_response("UCLkAepWjdylmXSltofFvsYQ", Language::En, None);
artist.map_response("UCLkAepWjdylmXSltofFvsYQ", Language::En, None, None);
let e = res.unwrap_err();
match e {

View file

@ -60,6 +60,7 @@ impl MapResponse<MusicCharts> for response::MusicCharts {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<crate::serializer::MapResult<MusicCharts>, crate::error::ExtractionError> {
let countries = self
.framework_updates
@ -164,7 +165,8 @@ mod tests {
let charts: response::MusicCharts =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicCharts> = charts.map_response("", Language::En, None).unwrap();
let map_res: MapResult<MusicCharts> =
charts.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -157,6 +157,7 @@ impl MapResponse<TrackDetails> for response::MusicDetails {
id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<TrackDetails>, ExtractionError> {
let tabs = self
.contents
@ -237,6 +238,7 @@ impl MapResponse<Paginator<TrackItem>> for response::MusicDetails {
id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<Paginator<TrackItem>>, ExtractionError> {
let tabs = self
.contents
@ -297,6 +299,7 @@ impl MapResponse<Lyrics> for response::MusicLyrics {
id: &str,
_lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<Lyrics>, ExtractionError> {
let lyrics = self
.contents
@ -330,6 +333,7 @@ impl MapResponse<MusicRelated> for response::MusicRelated {
_id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<MusicRelated>, ExtractionError> {
// Find artist
let artist_id = self
@ -422,7 +426,7 @@ mod tests {
let details: response::MusicDetails =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<model::TrackDetails> =
details.map_response(id, Language::En, None).unwrap();
details.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -442,7 +446,7 @@ mod tests {
let radio: response::MusicDetails =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Paginator<TrackItem>> =
radio.map_response(id, Language::En, None).unwrap();
radio.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -459,7 +463,7 @@ mod tests {
let lyrics: response::MusicLyrics =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Lyrics> = lyrics.map_response("", Language::En, None).unwrap();
let map_res: MapResult<Lyrics> = lyrics.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -476,7 +480,8 @@ mod tests {
let lyrics: response::MusicRelated =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicRelated> = lyrics.map_response("", Language::En, None).unwrap();
let map_res: MapResult<MusicRelated> =
lyrics.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -57,6 +57,7 @@ impl MapResponse<Vec<MusicGenreItem>> for response::MusicGenres {
_id: &str,
_lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<crate::serializer::MapResult<Vec<MusicGenreItem>>, ExtractionError> {
let content = self
.contents
@ -110,6 +111,7 @@ impl MapResponse<MusicGenre> for response::MusicGenre {
id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<crate::serializer::MapResult<MusicGenre>, ExtractionError> {
// dbg!(&self);
@ -214,7 +216,7 @@ mod tests {
let playlist: response::MusicGenres =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Vec<model::MusicGenreItem>> =
playlist.map_response("", Language::En, None).unwrap();
playlist.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -234,7 +236,7 @@ mod tests {
let playlist: response::MusicGenre =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<model::MusicGenre> =
playlist.map_response(id, Language::En, None).unwrap();
playlist.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -52,6 +52,7 @@ impl<T: FromYtItem> MapResponse<Vec<T>> for response::MusicNew {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<crate::serializer::MapResult<Vec<T>>, ExtractionError> {
let items = self
.contents
@ -96,8 +97,9 @@ mod tests {
let new_albums: response::MusicNew =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Vec<AlbumItem>> =
new_albums.map_response("", Language::En, None).unwrap();
let map_res: MapResult<Vec<AlbumItem>> = new_albums
.map_response("", Language::En, None, None)
.unwrap();
assert!(
map_res.warnings.is_empty(),
@ -115,8 +117,9 @@ mod tests {
let new_albums: response::MusicNew =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Vec<TrackItem>> =
new_albums.map_response("", Language::En, None).unwrap();
let map_res: MapResult<Vec<TrackItem>> = new_albums
.map_response("", Language::En, None, None)
.unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -122,6 +122,7 @@ impl MapResponse<MusicPlaylist> for response::MusicPlaylist {
id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<MusicPlaylist>, ExtractionError> {
// dbg!(&self);
@ -267,6 +268,7 @@ impl MapResponse<MusicAlbum> for response::MusicPlaylist {
id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<MusicAlbum>, ExtractionError> {
// dbg!(&self);
@ -418,7 +420,7 @@ mod tests {
let playlist: response::MusicPlaylist =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<model::MusicPlaylist> =
playlist.map_response(id, Language::En, None).unwrap();
playlist.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -443,7 +445,7 @@ mod tests {
let playlist: response::MusicPlaylist =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<model::MusicAlbum> =
playlist.map_response(id, Language::En, None).unwrap();
playlist.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -231,6 +231,7 @@ impl MapResponse<MusicSearchResult> for response::MusicSearch {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<MusicSearchResult>, crate::error::ExtractionError> {
// dbg!(&self);
@ -296,6 +297,7 @@ impl<T: FromYtItem> MapResponse<MusicSearchFiltered<T>> for response::MusicSearc
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<MusicSearchFiltered<T>>, ExtractionError> {
// dbg!(&self);
@ -356,6 +358,7 @@ impl MapResponse<MusicSearchSuggestion> for response::MusicSearchSuggestion {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<MusicSearchSuggestion>, ExtractionError> {
let mut mapper = MusicListMapper::new(lang);
let mut terms = Vec::new();
@ -419,7 +422,7 @@ mod tests {
let search: response::MusicSearch =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicSearchResult> =
search.map_response("", Language::En, None).unwrap();
search.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -442,7 +445,7 @@ mod tests {
let search: response::MusicSearch =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicSearchFiltered<TrackItem>> =
search.map_response("", Language::En, None).unwrap();
search.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -461,7 +464,7 @@ mod tests {
let search: response::MusicSearch =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicSearchFiltered<AlbumItem>> =
search.map_response("", Language::En, None).unwrap();
search.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -480,7 +483,7 @@ mod tests {
let search: response::MusicSearch =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicSearchFiltered<ArtistItem>> =
search.map_response("", Language::En, None).unwrap();
search.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -501,7 +504,7 @@ mod tests {
let search: response::MusicSearch =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicSearchFiltered<MusicPlaylistItem>> =
search.map_response("", Language::En, None).unwrap();
search.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -521,8 +524,9 @@ mod tests {
let suggestion: response::MusicSearchSuggestion =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<MusicSearchSuggestion> =
suggestion.map_response("", Language::En, None).unwrap();
let map_res: MapResult<MusicSearchSuggestion> = suggestion
.map_response("", Language::En, None, None)
.unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -96,6 +96,7 @@ impl MapResponse<Paginator<YouTubeItem>> for response::Continuation {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<Paginator<YouTubeItem>>, ExtractionError> {
let items = self
.on_response_received_actions
@ -131,6 +132,7 @@ impl MapResponse<Paginator<MusicItem>> for response::MusicContinuation {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<Paginator<MusicItem>>, ExtractionError> {
let mut mapper = MusicListMapper::new(lang);
let mut continuations = Vec::new();
@ -353,7 +355,7 @@ mod tests {
let items: response::Continuation =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Paginator<YouTubeItem>> =
items.map_response("", Language::En, None).unwrap();
items.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -375,7 +377,7 @@ mod tests {
let items: response::Continuation =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Paginator<YouTubeItem>> =
items.map_response("", Language::En, None).unwrap();
items.map_response("", Language::En, None, None).unwrap();
let paginator: Paginator<VideoItem> =
map_yt_paginator(map_res.c, None, ContinuationEndpoint::Browse);
@ -398,7 +400,7 @@ mod tests {
let items: response::Continuation =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Paginator<YouTubeItem>> =
items.map_response("", Language::En, None).unwrap();
items.map_response("", Language::En, None, None).unwrap();
let paginator: Paginator<PlaylistItem> =
map_yt_paginator(map_res.c, None, ContinuationEndpoint::Browse);
@ -421,7 +423,7 @@ mod tests {
let items: response::MusicContinuation =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Paginator<MusicItem>> =
items.map_response("", Language::En, None).unwrap();
items.map_response("", Language::En, None, None).unwrap();
let paginator: Paginator<TrackItem> =
map_ytm_paginator(map_res.c, None, ContinuationEndpoint::MusicBrowse);
@ -442,7 +444,7 @@ mod tests {
let items: response::MusicContinuation =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Paginator<MusicItem>> =
items.map_response("", Language::En, None).unwrap();
items.map_response("", Language::En, None, None).unwrap();
let paginator: Paginator<MusicPlaylistItem> =
map_ytm_paginator(map_res.c, None, ContinuationEndpoint::MusicBrowse);

View file

@ -143,6 +143,7 @@ impl MapResponse<VideoPlayer> for response::Player {
id: &str,
_lang: Language,
deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<super::MapResult<VideoPlayer>, ExtractionError> {
let deobf = Deobfuscator::new(deobf.unwrap())?;
let mut warnings = vec![];
@ -372,7 +373,10 @@ impl MapResponse<VideoPlayer> for response::Player {
hls_manifest_url: streaming_data.hls_manifest_url,
dash_manifest_url: streaming_data.dash_manifest_url,
preview_frames,
visitor_data: self.response_context.visitor_data,
visitor_data: self
.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned)),
},
warnings,
})
@ -717,7 +721,7 @@ mod tests {
let resp: response::Player = serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res = resp
.map_response("pPvd8UxmSbQ", Language::En, Some(&DEOBF_DATA))
.map_response("pPvd8UxmSbQ", Language::En, Some(&DEOBF_DATA), None)
.unwrap();
assert!(

View file

@ -37,6 +37,7 @@ impl MapResponse<Playlist> for response::Playlist {
id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<Playlist>, ExtractionError> {
let (Some(contents), Some(header)) = (self.contents, self.header) else {
return Err(response::alerts_to_err(id, self.alerts));
@ -152,7 +153,10 @@ impl MapResponse<Playlist> for response::Playlist {
channel,
last_update,
last_update_txt,
visitor_data: self.response_context.visitor_data,
visitor_data: self
.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned)),
},
warnings: mapper.warnings,
})
@ -181,7 +185,7 @@ mod tests {
let playlist: response::Playlist =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res = playlist.map_response(id, Language::En, None).unwrap();
let map_res = playlist.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -759,7 +759,7 @@ impl MusicListMapper {
}));
Ok(Some(MusicItemType::Playlist))
}
MusicPageType::None | MusicPageType::ArtistDiscography => {
MusicPageType::None => {
// There may be broken YT channels from the artist search. They can be skipped.
Ok(None)
}
@ -901,7 +901,7 @@ impl MusicListMapper {
}));
Ok(Some(MusicItemType::Playlist))
}
MusicPageType::None | MusicPageType::ArtistDiscography => Ok(None),
MusicPageType::None => Ok(None),
MusicPageType::Unknown => {
self.has_unknown = true;
Ok(None)
@ -1039,7 +1039,7 @@ impl MusicListMapper {
}));
Some(MusicItemType::Playlist)
}
MusicPageType::None | MusicPageType::ArtistDiscography => None,
MusicPageType::None => None,
MusicPageType::Unknown => {
self.has_unknown = true;
None

View file

@ -102,9 +102,12 @@ pub(crate) struct BrowseEndpointConfig {
pub browse_endpoint_context_music_config: BrowseEndpointMusicConfig,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct BrowseEndpointMusicConfig {
#[serde(default)]
#[serde_as(as = "DefaultOnError")]
pub page_type: PageType,
}
@ -114,9 +117,12 @@ pub(crate) struct CommandMetadata {
pub web_command_metadata: WebCommandMetadata,
}
#[serde_as]
#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub(crate) struct WebCommandMetadata {
#[serde(default)]
#[serde_as(as = "DefaultOnError")]
pub web_page_type: PageType,
}
@ -144,15 +150,13 @@ pub(crate) enum MusicVideoType {
Track,
}
#[derive(Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
#[derive(Default, Debug, Clone, Copy, Deserialize, PartialEq, Eq)]
pub(crate) enum PageType {
#[serde(
rename = "MUSIC_PAGE_TYPE_ARTIST",
alias = "MUSIC_PAGE_TYPE_AUDIOBOOK_ARTIST"
)]
Artist,
#[serde(rename = "MUSIC_PAGE_TYPE_ARTIST_DISCOGRAPHY")]
ArtistDiscography,
#[serde(rename = "MUSIC_PAGE_TYPE_ALBUM", alias = "MUSIC_PAGE_TYPE_AUDIOBOOK")]
Album,
#[serde(
@ -162,7 +166,7 @@ pub(crate) enum PageType {
Channel,
#[serde(rename = "MUSIC_PAGE_TYPE_PLAYLIST", alias = "WEB_PAGE_TYPE_PLAYLIST")]
Playlist,
#[serde(rename = "MUSIC_PAGE_TYPE_UNKNOWN")]
#[default]
Unknown,
}
@ -170,9 +174,6 @@ impl PageType {
pub(crate) fn to_url_target(self, id: String) -> Option<UrlTarget> {
match self {
PageType::Artist | PageType::Channel => Some(UrlTarget::Channel { id }),
PageType::ArtistDiscography => id
.strip_prefix(util::ARTIST_DISCOGRAPHY_PREFIX)
.map(|id| UrlTarget::Channel { id: id.to_owned() }),
PageType::Album => Some(UrlTarget::Album { id }),
PageType::Playlist => Some(UrlTarget::Playlist { id }),
PageType::Unknown => None,
@ -183,7 +184,6 @@ impl PageType {
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub(crate) enum MusicPageType {
Artist,
ArtistDiscography,
Album,
Playlist,
Track { is_video: bool },
@ -195,7 +195,6 @@ impl From<PageType> for MusicPageType {
fn from(t: PageType) -> Self {
match t {
PageType::Artist => MusicPageType::Artist,
PageType::ArtistDiscography => MusicPageType::ArtistDiscography,
PageType::Album => MusicPageType::Album,
PageType::Playlist => MusicPageType::Playlist,
PageType::Channel => MusicPageType::None,

View file

@ -92,6 +92,7 @@ impl MapResponse<SearchResult> for response::Search {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<SearchResult>, ExtractionError> {
let items = self
.contents
@ -113,7 +114,10 @@ impl MapResponse<SearchResult> for response::Search {
crate::model::paginator::ContinuationEndpoint::Search,
),
corrected_query: mapper.corrected_query,
visitor_data: self.response_context.visitor_data,
visitor_data: self
.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned)),
},
warnings: mapper.warnings,
})
@ -145,7 +149,8 @@ mod tests {
let json_file = File::open(json_path).unwrap();
let search: response::Search = serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<SearchResult> = search.map_response("", Language::En, None).unwrap();
let map_res: MapResult<SearchResult> =
search.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -54,6 +54,7 @@ impl MapResponse<Paginator<VideoItem>> for response::Startpage {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<Paginator<VideoItem>>, ExtractionError> {
let grid = self
.contents
@ -70,7 +71,9 @@ impl MapResponse<Paginator<VideoItem>> for response::Startpage {
Ok(map_startpage_videos(
grid,
lang,
self.response_context.visitor_data,
self.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned)),
))
}
}
@ -81,6 +84,7 @@ impl MapResponse<Vec<VideoItem>> for response::Trending {
_id: &str,
lang: crate::param::Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<Vec<VideoItem>>, ExtractionError> {
let items = self
.contents
@ -146,8 +150,9 @@ mod tests {
let startpage: response::Startpage =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Paginator<VideoItem>> =
startpage.map_response("", Language::En, None).unwrap();
let map_res: MapResult<Paginator<VideoItem>> = startpage
.map_response("", Language::En, None, None)
.unwrap();
assert!(
map_res.warnings.is_empty(),
@ -169,8 +174,9 @@ mod tests {
let startpage: response::Trending =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res: MapResult<Vec<VideoItem>> =
startpage.map_response("", Language::En, None).unwrap();
let map_res: MapResult<Vec<VideoItem>> = startpage
.map_response("", Language::En, None, None)
.unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -328,6 +328,7 @@ impl MapResponse<UrlTarget> for response::ResolvedUrl {
_id: &str,
_lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<UrlTarget>, ExtractionError> {
let pt = self.endpoint.page_type();
if let NavigationEndpoint::Browse {

View file

@ -82,6 +82,7 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
vdata: Option<&str>,
) -> Result<MapResult<VideoDetails>, ExtractionError> {
let mut warnings = Vec::new();
@ -256,7 +257,10 @@ impl MapResponse<VideoDetails> for response::VideoDetails {
_ => return Err(ExtractionError::InvalidData("invalid channel link".into())),
};
let visitor_data = self.response_context.visitor_data;
let visitor_data = self
.response_context
.visitor_data
.or_else(|| vdata.map(str::to_owned));
let recommended = contents
.two_column_watch_next_results
.secondary_results
@ -369,6 +373,7 @@ impl MapResponse<Paginator<Comment>> for response::VideoComments {
_id: &str,
lang: Language,
_deobf: Option<&crate::deobfuscate::DeobfData>,
_vdata: Option<&str>,
) -> Result<MapResult<Paginator<Comment>>, ExtractionError> {
let received_endpoints = self.on_response_received_endpoints;
let mut warnings = received_endpoints.warnings;
@ -561,7 +566,7 @@ mod tests {
let details: response::VideoDetails =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res = details.map_response(id, Language::En, None).unwrap();
let map_res = details.map_response(id, Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),
@ -581,7 +586,9 @@ mod tests {
let details: response::VideoDetails =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let err = details.map_response("", Language::En, None).unwrap_err();
let err = details
.map_response("", Language::En, None, None)
.unwrap_err();
assert!(matches!(
err,
crate::error::ExtractionError::NotFound { .. }
@ -597,7 +604,7 @@ mod tests {
let comments: response::VideoComments =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
let map_res = comments.map_response("", Language::En, None).unwrap();
let map_res = comments.map_response("", Language::En, None, None).unwrap();
assert!(
map_res.warnings.is_empty(),

View file

@ -80,32 +80,42 @@ SAttributed {
Text {
text: "\n\n",
},
Text {
Browse {
text: "#aespa",
page_type: Unknown,
browse_id: "FEhashtag",
},
Text {
text: " ",
},
Text {
Browse {
text: "#æspa",
page_type: Unknown,
browse_id: "FEhashtag",
},
Text {
text: " ",
},
Text {
Browse {
text: "#BlackMamba",
page_type: Unknown,
browse_id: "FEhashtag",
},
Text {
text: " ",
},
Text {
Browse {
text: "#블랙맘바",
page_type: Unknown,
browse_id: "FEhashtag",
},
Text {
text: " ",
},
Text {
Browse {
text: "#에스파",
page_type: Unknown,
browse_id: "FEhashtag",
},
Text {
text: "\naespa 에스파 'Black Mamba' MV ℗ SM Entertainment",

File diff suppressed because it is too large Load diff

View file

@ -26,7 +26,7 @@ pub static VIDEO_ID_REGEX: Lazy<Regex> = Lazy::new(|| Regex::new(r"^[A-Za-z0-9_-
pub static CHANNEL_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^UC[A-Za-z0-9_-]{22}$").unwrap());
pub static PLAYLIST_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^(?:PL|RDCLAK|OLAK|UU)[A-Za-z0-9_-]{16,50}$").unwrap());
Lazy::new(|| Regex::new(r"^(?:PL|RD|OLAK|UU)[A-Za-z0-9_-]{16,50}$").unwrap());
pub static ALBUM_ID_REGEX: Lazy<Regex> =
Lazy::new(|| Regex::new(r"^MPREb_[A-Za-z0-9_-]{11}$").unwrap());
pub static VANITY_PATH_REGEX: Lazy<Regex> = Lazy::new(|| {
@ -128,7 +128,35 @@ where
buf.parse()
}
/// Parse all numbers occurring in a string and reurn them as a vec
/// Parse a string after removing all non-numeric characters.
///
/// If the string contains multiple numbers, it returns the product of them.
pub fn parse_numeric_prod<F>(string: &str) -> Option<F>
where
F: FromStr + Copy + std::ops::Mul<Output = F>,
{
let mut n = None;
let mut buf = String::new();
for c in string.chars() {
if c.is_ascii_digit() {
buf.push(c);
} else if !buf.is_empty() {
if let Ok(x) = buf.parse::<F>() {
n = n.map(|n| n * x).or(Some(x));
}
buf.clear();
}
}
if !buf.is_empty() {
if let Ok(x) = buf.parse::<F>() {
n = n.map(|n| n * x).or(Some(x));
}
}
n
}
/// Parse all numbers occurring in a string and return them as a vec
pub fn parse_numeric_vec<F>(string: &str) -> Vec<F>
where
F: FromStr,

View file

@ -199,7 +199,20 @@ pub fn parse_timeago(lang: Language, textual_date: &str) -> Option<TimeAgo> {
let entry = dictionary::entry(lang);
let filtered_str = filter_str(textual_date);
let qu: u8 = util::parse_numeric(textual_date).unwrap_or(1);
let qu: u8 = util::parse_numeric_prod(textual_date).unwrap_or(1);
// French uses 'a' as a short form of years.
// Since 'a' is also a word in French, it cannot be parsed as a token.
if matches!(
lang,
Language::Fr | Language::FrCa | Language::Es | Language::Es419 | Language::EsUs
) && textual_date.ends_with(" a")
{
return Some(TimeAgo {
n: qu,
unit: TimeUnit::Year,
});
}
TaTokenParser::new(&entry, util::lang_by_char(lang), false, &filtered_str)
.next()
@ -403,10 +416,10 @@ mod tests {
use crate::util::tests::TESTFILES;
#[rstest]
#[case(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
#[case(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
#[case::de(Language::De, "vor 1 Sekunde", Some(TimeAgo { n: 1, unit: TimeUnit::Second }))]
#[case::ar(Language::Ar, "قبل ساعة واحدة", Some(TimeAgo { n: 1, unit: TimeUnit::Hour }))]
// No-break space
#[case(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
#[case::nbsp(Language::De, "Vor 3\u{a0}Tagen aktualisiert", Some(TimeAgo { n: 3, unit: TimeUnit::Day }))]
fn t_parse(
#[case] lang: Language,
#[case] textual_date: &str,
@ -581,7 +594,196 @@ mod tests {
assert_eq!(
parse_timeago(*lang, s),
Some(expect[n]),
"Language: {lang}, n: {n}"
"Language: {lang}, txt: `{s}`"
);
});
})
}
#[test]
fn t_testfile_short() {
let json_path = path!(*TESTFILES / "dict" / "timeago_samples_short.json");
let expect = [
TimeAgo {
n: 35,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 50,
unit: TimeUnit::Minute,
},
TimeAgo {
n: 1,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 2,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 3,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 4,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 5,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 6,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 7,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 8,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 9,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 12,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 17,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 18,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 19,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 20,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 10,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 11,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 13,
unit: TimeUnit::Hour,
},
TimeAgo {
n: 1,
unit: TimeUnit::Day,
},
TimeAgo {
n: 2,
unit: TimeUnit::Day,
},
TimeAgo {
n: 3,
unit: TimeUnit::Day,
},
TimeAgo {
n: 4,
unit: TimeUnit::Day,
},
TimeAgo {
n: 6,
unit: TimeUnit::Day,
},
TimeAgo {
n: 8,
unit: TimeUnit::Day,
},
TimeAgo {
n: 10,
unit: TimeUnit::Day,
},
TimeAgo {
n: 11,
unit: TimeUnit::Day,
},
TimeAgo {
n: 12,
unit: TimeUnit::Day,
},
TimeAgo {
n: 13,
unit: TimeUnit::Day,
},
TimeAgo {
n: 2,
unit: TimeUnit::Week,
},
TimeAgo {
n: 3,
unit: TimeUnit::Week,
},
TimeAgo {
n: 1,
unit: TimeUnit::Month,
},
TimeAgo {
n: 4,
unit: TimeUnit::Week,
},
TimeAgo {
n: 7,
unit: TimeUnit::Month,
},
TimeAgo {
n: 10,
unit: TimeUnit::Month,
},
TimeAgo {
n: 1,
unit: TimeUnit::Year,
},
TimeAgo {
n: 2,
unit: TimeUnit::Year,
},
TimeAgo {
n: 3,
unit: TimeUnit::Year,
},
TimeAgo {
n: 4,
unit: TimeUnit::Year,
},
TimeAgo {
n: 5,
unit: TimeUnit::Year,
},
];
let json_file = File::open(json_path).unwrap();
let strings_map: BTreeMap<Language, Vec<String>> =
serde_json::from_reader(BufReader::new(json_file)).unwrap();
strings_map.iter().for_each(|(lang, strings)| {
assert_eq!(strings.len(), expect.len(), "Language: {lang}");
strings.iter().enumerate().for_each(|(n, s)| {
let mut exp = expect[n];
if *lang == Language::Mn && exp.unit == TimeUnit::Week {
exp.unit = TimeUnit::Day;
exp.n *= 7;
}
assert_eq!(
parse_timeago(*lang, s),
Some(exp),
"Language: {lang}, txt: `{s}`"
);
});
})

2
testfiles/dict/cldr_data/.gitignore vendored Normal file
View file

@ -0,0 +1,2 @@
node_modules
package-lock.json

View file

@ -0,0 +1,162 @@
const fs = require("fs");
const DICT_PATH = "../dictionary.json";
function translateLang(lang) {
switch (lang) {
case "iw": // Hebrew
return "he";
case "zh-CN": // Simplified Chinese
return "zh-Hans";
case "zh-HK":
return "zh-Hant-HK";
case "zh-TW":
return "zh-Hant";
default:
return lang;
}
}
function prepString(s, by_char) {
const replaced = s.toLowerCase().replace("{0}", "").replace("-", " ");
if (by_char) {
return replaced.replace(/\s/, "").split("");
} else {
return replaced.split(/\s+/);
}
}
function storeToken(tokens, word, unit) {
if (word) {
if (word in tokens && tokens[word] != unit) {
tokens[word] = null;
} else {
tokens[word] = unit;
}
}
}
function validateTokens(tokens, lang) {
const units = { Y: 1, M: 1, W: 1, D: 1, h: 1, m: 1, s: 1 };
if (lang === "iw") {
tokens["שתי"] = "2";
}
for (const [key, val] of Object.entries(tokens)) {
if (val === null) {
delete tokens[key];
} else {
delete units[val];
}
}
if (Object.keys(units).length > 0) {
console.log(
`missing units ${JSON.stringify(
Object.keys(units)
)} for lang: ${lang}; tokens: ${JSON.stringify(tokens)}`
);
}
}
function validateNdTokens(tokens, lang) {
const units = { "0D": 1, "1D": 1 };
for (const [key, val] of Object.entries(tokens)) {
if (val === null) {
delete tokens[key];
} else {
delete units[val];
}
}
if (Object.keys(units).length > 0) {
console.log(
`missing nd tokens ${JSON.stringify(
Object.keys(units)
)} for lang: ${lang}; tokens: ${JSON.stringify(tokens)}`
);
} else if (Object.keys(tokens).length > 2) {
console.log(
`too many nd tokens for lang: ${lang}; tokens: ${JSON.stringify(tokens)}`
);
}
}
const sortObject = (obj) =>
Object.keys(obj)
.sort()
.reduce((res, key) => ((res[key] = obj[key]), res), {});
function collectTimeago(lang, by_char, timeagoTokens, timeagoNdTokens) {
const cldrLang = translateLang(lang);
const dates = require(`cldr-dates-modern/main/${cldrLang}/dateFields.json`);
const dateFields = dates.main[cldrLang].dates.fields;
for (const [unitStr, unit] of Object.entries(units)) {
for (const unitFields of [dateFields[unitStr], dateFields[`${unitStr}-short`]]) {
for (const [sKey, s] of Object.entries(unitFields["relativeTime-type-past"])) {
let u = unit;
if (s.indexOf("{0}") === -1) {
if (sKey.endsWith("-zero")) {
u = "0" + u;
} else if (sKey.endsWith("-one")) {
u = "1" + u;
} else if (sKey.endsWith("-two")) {
u = "2" + u;
} else {
throw new Error(`Invalid time pattern. lang: ${lang} key: ${sKey}`);
}
}
const words = prepString(s, by_char);
for (const word of words) {
storeToken(timeagoTokens, word, u);
}
}
}
}
if (dateFields.day["relative-type-0"]) {
const words = prepString(dateFields.day["relative-type-0"], by_char);
for (const word of words) {
storeToken(timeagoNdTokens, word, "0D");
}
}
if (dateFields.day["relative-type--1"]) {
const words = prepString(dateFields.day["relative-type--1"], by_char);
for (const word of words) {
storeToken(timeagoNdTokens, word, "1D");
}
}
}
const dict = JSON.parse(fs.readFileSync(DICT_PATH));
const units = {
second: "s",
minute: "m",
hour: "h",
day: "D",
week: "W",
month: "M",
year: "Y",
};
for (const [mainLang, entry] of Object.entries(dict)) {
const langs = [mainLang, ...entry["equivalent"]];
const timeagoTokens = {};
const timeagoNdTokens = {};
for (lang of langs) {
collectTimeago(lang, entry["by_char"], timeagoTokens, timeagoNdTokens);
}
validateTokens(timeagoTokens, mainLang);
// validateNdTokens(timeagoNdTokens, mainLang);
dict[mainLang]["timeago_tokens"] = timeagoTokens;
// dict[mainLang]["timeago_nd_tokens"] = timeagoNdTokens;
}
fs.writeFileSync(DICT_PATH, JSON.stringify(dict, null, 2));

View file

@ -0,0 +1,12 @@
{
"name": "cldr_data",
"version": "1.0.0",
"description": "Build the RustyPipe parsing dictionary using CLDR data",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"dependencies": {
"cldr-dates-modern": "^43.0.0",
"cldr-numbers-modern": "^43.0.0"
}
}

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2336,7 +2336,12 @@ fn lang() -> Language {
/// Get a new RustyPipe instance
#[fixture]
fn rp(lang: Language) -> RustyPipe {
RustyPipe::builder().strict().lang(lang).build()
let vdata = std::env::var("YT_VDATA").ok();
RustyPipe::builder()
.strict()
.lang(lang)
.visitor_data_opt(vdata)
.build()
}
/// Get a flag signaling if the language is set to English