use std::fmt::Write; use once_cell::sync::Lazy; use path_macro::path; use regex::Regex; use crate::{ model::TimeUnit, util::{self, SRC_DIR}, }; fn parse_tu(tu: &str) -> (u8, Option) { static TU_PATTERN: Lazy = Lazy::new(|| Regex::new(r"^(\d*)(\w*)$").unwrap()); match TU_PATTERN.captures(tu) { Some(cap) => ( cap.get(1).unwrap().as_str().parse().unwrap_or(1), match cap.get(2).unwrap().as_str() { "s" => Some(TimeUnit::Second), "m" => Some(TimeUnit::Minute), "h" => Some(TimeUnit::Hour), "D" => Some(TimeUnit::Day), "W" => Some(TimeUnit::Week), "M" => Some(TimeUnit::Month), "Y" => Some(TimeUnit::Year), "Wl" => Some(TimeUnit::LastWeek), "Wd" => Some(TimeUnit::LastWeekday), "" => None, _ => panic!("invalid time unit: {tu}"), }, ), None => panic!("invalid time unit: {tu}"), } } pub fn generate_dictionary() { let dict = util::read_dict(); let code_head = r#"// This file is automatically generated. DO NOT EDIT. // See codegen/gen_dictionary.rs for the generation code. #![allow(clippy::unreadable_literal)] //! The dictionary contains the information required to parse dates and numbers //! in all supported languages. use crate::{ model::AlbumType, param::Language, util::timeago::{TaToken, TimeUnit}, }; /// Dictionary entry containing language-specific parsing information pub(crate) struct Entry { /// Tokens for parsing timeago strings. /// /// Format: Parsed token -> \[Quantity\] Identifier /// /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay), /// `h`(our), `m`(inute), `s`(econd) pub timeago_tokens: phf::Map<&'static str, TaToken>, /// True if the month has to be parsed before the day /// /// Examples: /// /// - 03.01.2020 => DMY => false /// - 01/03/2020 => MDY => true pub month_before_day: bool, /// Tokens for parsing month names. /// /// Format: Parsed token -> Month number (starting from 1) pub months: phf::Map<&'static str, u8>, /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow) /// /// Format: Parsed token -> \[Quantity\] Identifier pub timeago_nd_tokens: phf::Map<&'static str, TaToken>, /// Are commas (instead of points) used as decimal separators? pub comma_decimal: bool, /// Tokens for parsing decimal prefixes (K, M, B, ...) /// /// Format: Parsed token -> decimal power pub number_tokens: phf::Map<&'static str, u8>, /// Tokens for parsing number strings with no digits (e.g. "No videos") /// /// Format: Parsed token -> value pub number_nd_tokens: phf::Map<&'static str, u8>, /// Names of album types (Album, Single, ...) /// /// Format: Parsed text -> Album type pub album_types: phf::Map<&'static str, AlbumType>, /// Channel name prefix on playlist pages (e.g. `by`) pub chan_prefix: &'static str, /// Channel name suffix on playlist pages pub chan_suffix: &'static str, /// "Other versions" title on album pages pub album_versions_title: &'static str, } "#; let mut code_timeago_tokens = r#"#[rustfmt::skip] pub(crate) fn entry(lang: Language) -> Entry { match lang { "# .to_owned(); for (lang, entry) in &dict { // Match selector let mut selector = format!("Language::{lang:?}"); entry.equivalent.iter().for_each(|eq| { write!(selector, " | Language::{eq:?}").unwrap(); }); // Timeago tokens let mut ta_tokens = phf_codegen::Map::<&str>::new(); entry.timeago_tokens.iter().for_each(|(txt, tu_str)| { let (n, unit) = parse_tu(tu_str); match unit { Some(unit) => ta_tokens.entry( txt, &format!("TaToken {{ n: {n}, unit: Some(TimeUnit::{unit:?}) }}"), ), None => ta_tokens.entry(txt, &format!("TaToken {{ n: {n}, unit: None }}")), }; }); // Months let mut months = phf_codegen::Map::<&str>::new(); entry.months.iter().for_each(|(txt, n_mon)| { months.entry(txt, &n_mon.to_string()); }); // Timeago(ND) tokens let mut ta_nd_tokens = phf_codegen::Map::<&str>::new(); entry.timeago_nd_tokens.iter().for_each(|(txt, tu_str)| { let (n, unit) = parse_tu(tu_str); match unit { Some(unit) => ta_nd_tokens.entry( txt, &format!("TaToken {{ n: {n}, unit: Some(TimeUnit::{unit:?}) }}"), ), None => ta_nd_tokens.entry(txt, &format!("TaToken {{ n: {n}, unit: None }}")), }; }); // Number tokens let mut number_tokens = phf_codegen::Map::<&str>::new(); entry.number_tokens.iter().for_each(|(txt, mag)| { number_tokens.entry(txt, &mag.to_string()); }); // Number nd tokens let mut number_nd_tokens = phf_codegen::Map::<&str>::new(); entry.number_nd_tokens.iter().for_each(|(txt, mag)| { number_nd_tokens.entry(txt, &mag.to_string()); }); // Album types let mut album_types = phf_codegen::Map::<&str>::new(); entry.album_types.iter().for_each(|(txt, album_type)| { album_types.entry(txt, &format!("AlbumType::{album_type:?}")); }); let code_ta_tokens = &ta_tokens .build() .to_string() .replace('\n', "\n "); let code_ta_nd_tokens = &ta_nd_tokens .build() .to_string() .replace('\n', "\n "); let code_months = &months.build().to_string().replace('\n', "\n "); let code_number_tokens = &number_tokens .build() .to_string() .replace('\n', "\n "); let code_number_nd_tokens = &number_nd_tokens .build() .to_string() .replace('\n', "\n "); let code_album_types = &album_types .build() .to_string() .replace('\n', "\n "); write!(code_timeago_tokens, "{} => Entry {{\n timeago_tokens: {},\n month_before_day: {:?},\n months: {},\n timeago_nd_tokens: {},\n comma_decimal: {:?},\n number_tokens: {},\n number_nd_tokens: {},\n album_types: {},\n chan_prefix: {:?},\n chan_suffix: {:?},\n album_versions_title: {:?},\n }},\n ", selector, code_ta_tokens, entry.month_before_day, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types, entry.chan_prefix, entry.chan_suffix, entry.album_versions_title).unwrap(); } code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n }\n}\n"; let code = format!("{code_head}\n{code_timeago_tokens}"); let target_path = path!(*SRC_DIR / "util" / "dictionary.rs"); std::fs::write(target_path, code).unwrap(); }