193 lines
		
	
	
	
		
			7.2 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			193 lines
		
	
	
	
		
			7.2 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
use std::fmt::Write;
 | 
						|
 | 
						|
use once_cell::sync::Lazy;
 | 
						|
use path_macro::path;
 | 
						|
use regex::Regex;
 | 
						|
 | 
						|
use crate::{
 | 
						|
    model::TimeUnit,
 | 
						|
    util::{self, SRC_DIR},
 | 
						|
};
 | 
						|
 | 
						|
fn parse_tu(tu: &str) -> (u8, Option<TimeUnit>) {
 | 
						|
    static TU_PATTERN: Lazy<Regex> = Lazy::new(|| Regex::new(r"^(\d*)(\w*)$").unwrap());
 | 
						|
    match TU_PATTERN.captures(tu) {
 | 
						|
        Some(cap) => (
 | 
						|
            cap.get(1).unwrap().as_str().parse().unwrap_or(1),
 | 
						|
            match cap.get(2).unwrap().as_str() {
 | 
						|
                "s" => Some(TimeUnit::Second),
 | 
						|
                "m" => Some(TimeUnit::Minute),
 | 
						|
                "h" => Some(TimeUnit::Hour),
 | 
						|
                "D" => Some(TimeUnit::Day),
 | 
						|
                "W" => Some(TimeUnit::Week),
 | 
						|
                "M" => Some(TimeUnit::Month),
 | 
						|
                "Y" => Some(TimeUnit::Year),
 | 
						|
                "Wl" => Some(TimeUnit::LastWeek),
 | 
						|
                "Wd" => Some(TimeUnit::LastWeekday),
 | 
						|
                "" => None,
 | 
						|
                _ => panic!("invalid time unit: {tu}"),
 | 
						|
            },
 | 
						|
        ),
 | 
						|
        None => panic!("invalid time unit: {tu}"),
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
pub fn generate_dictionary() {
 | 
						|
    let dict = util::read_dict();
 | 
						|
 | 
						|
    let code_head = r#"// This file is automatically generated. DO NOT EDIT.
 | 
						|
// See codegen/gen_dictionary.rs for the generation code.
 | 
						|
#![allow(clippy::unreadable_literal)]
 | 
						|
 | 
						|
//! The dictionary contains the information required to parse dates and numbers
 | 
						|
//! in all supported languages.
 | 
						|
 | 
						|
use crate::{
 | 
						|
    model::AlbumType,
 | 
						|
    param::Language,
 | 
						|
    util::timeago::{TaToken, TimeUnit},
 | 
						|
};
 | 
						|
 | 
						|
/// Dictionary entry containing language-specific parsing information
 | 
						|
pub(crate) struct Entry {
 | 
						|
    /// Tokens for parsing timeago strings.
 | 
						|
    ///
 | 
						|
    /// Format: Parsed token -> \[Quantity\] Identifier
 | 
						|
    ///
 | 
						|
    /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay),
 | 
						|
    /// `h`(our), `m`(inute), `s`(econd)
 | 
						|
    pub timeago_tokens: phf::Map<&'static str, TaToken>,
 | 
						|
    /// True if the month has to be parsed before the day
 | 
						|
    ///
 | 
						|
    /// Examples:
 | 
						|
    ///
 | 
						|
    /// - 03.01.2020 => DMY => false
 | 
						|
    /// - 01/03/2020 => MDY => true
 | 
						|
    pub month_before_day: bool,
 | 
						|
    /// Tokens for parsing month names.
 | 
						|
    ///
 | 
						|
    /// Format: Parsed token -> Month number (starting from 1)
 | 
						|
    pub months: phf::Map<&'static str, u8>,
 | 
						|
    /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow)
 | 
						|
    ///
 | 
						|
    /// Format: Parsed token -> \[Quantity\] Identifier
 | 
						|
    pub timeago_nd_tokens: phf::Map<&'static str, TaToken>,
 | 
						|
    /// Are commas (instead of points) used as decimal separators?
 | 
						|
    pub comma_decimal: bool,
 | 
						|
    /// Tokens for parsing decimal prefixes (K, M, B, ...)
 | 
						|
    ///
 | 
						|
    /// Format: Parsed token -> decimal power
 | 
						|
    pub number_tokens: phf::Map<&'static str, u8>,
 | 
						|
    /// Tokens for parsing number strings with no digits (e.g. "No videos")
 | 
						|
    ///
 | 
						|
    /// Format: Parsed token -> value
 | 
						|
    pub number_nd_tokens: phf::Map<&'static str, u8>,
 | 
						|
    /// Names of album types (Album, Single, ...)
 | 
						|
    ///
 | 
						|
    /// Format: Parsed text -> Album type
 | 
						|
    pub album_types: phf::Map<&'static str, AlbumType>,
 | 
						|
    /// Channel name prefix on playlist pages (e.g. `by`)
 | 
						|
    pub chan_prefix: &'static str,
 | 
						|
    /// Channel name suffix on playlist pages
 | 
						|
    pub chan_suffix: &'static str,
 | 
						|
    /// "Other versions" title on album pages
 | 
						|
    pub album_versions_title: &'static str,
 | 
						|
}
 | 
						|
"#;
 | 
						|
 | 
						|
    let mut code_timeago_tokens = r#"#[rustfmt::skip]
 | 
						|
pub(crate) fn entry(lang: Language) -> Entry {
 | 
						|
    match lang {
 | 
						|
        "#
 | 
						|
    .to_owned();
 | 
						|
 | 
						|
    for (lang, entry) in &dict {
 | 
						|
        // Match selector
 | 
						|
        let mut selector = format!("Language::{lang:?}");
 | 
						|
        entry.equivalent.iter().for_each(|eq| {
 | 
						|
            write!(selector, " | Language::{eq:?}").unwrap();
 | 
						|
        });
 | 
						|
 | 
						|
        // Timeago tokens
 | 
						|
        let mut ta_tokens = phf_codegen::Map::<&str>::new();
 | 
						|
        entry.timeago_tokens.iter().for_each(|(txt, tu_str)| {
 | 
						|
            let (n, unit) = parse_tu(tu_str);
 | 
						|
            match unit {
 | 
						|
                Some(unit) => ta_tokens.entry(
 | 
						|
                    txt,
 | 
						|
                    &format!("TaToken {{ n: {n}, unit: Some(TimeUnit::{unit:?}) }}"),
 | 
						|
                ),
 | 
						|
                None => ta_tokens.entry(txt, &format!("TaToken {{ n: {n}, unit: None }}")),
 | 
						|
            };
 | 
						|
        });
 | 
						|
 | 
						|
        // Months
 | 
						|
        let mut months = phf_codegen::Map::<&str>::new();
 | 
						|
        entry.months.iter().for_each(|(txt, n_mon)| {
 | 
						|
            months.entry(txt, &n_mon.to_string());
 | 
						|
        });
 | 
						|
 | 
						|
        // Timeago(ND) tokens
 | 
						|
        let mut ta_nd_tokens = phf_codegen::Map::<&str>::new();
 | 
						|
        entry.timeago_nd_tokens.iter().for_each(|(txt, tu_str)| {
 | 
						|
            let (n, unit) = parse_tu(tu_str);
 | 
						|
            match unit {
 | 
						|
                Some(unit) => ta_nd_tokens.entry(
 | 
						|
                    txt,
 | 
						|
                    &format!("TaToken {{ n: {n}, unit: Some(TimeUnit::{unit:?}) }}"),
 | 
						|
                ),
 | 
						|
                None => ta_nd_tokens.entry(txt, &format!("TaToken {{ n: {n}, unit: None }}")),
 | 
						|
            };
 | 
						|
        });
 | 
						|
 | 
						|
        // Number tokens
 | 
						|
        let mut number_tokens = phf_codegen::Map::<&str>::new();
 | 
						|
        entry.number_tokens.iter().for_each(|(txt, mag)| {
 | 
						|
            number_tokens.entry(txt, &mag.to_string());
 | 
						|
        });
 | 
						|
 | 
						|
        // Number nd tokens
 | 
						|
        let mut number_nd_tokens = phf_codegen::Map::<&str>::new();
 | 
						|
        entry.number_nd_tokens.iter().for_each(|(txt, mag)| {
 | 
						|
            number_nd_tokens.entry(txt, &mag.to_string());
 | 
						|
        });
 | 
						|
 | 
						|
        // Album types
 | 
						|
        let mut album_types = phf_codegen::Map::<&str>::new();
 | 
						|
        entry.album_types.iter().for_each(|(txt, album_type)| {
 | 
						|
            album_types.entry(txt, &format!("AlbumType::{album_type:?}"));
 | 
						|
        });
 | 
						|
 | 
						|
        let code_ta_tokens = &ta_tokens
 | 
						|
            .build()
 | 
						|
            .to_string()
 | 
						|
            .replace('\n', "\n            ");
 | 
						|
        let code_ta_nd_tokens = &ta_nd_tokens
 | 
						|
            .build()
 | 
						|
            .to_string()
 | 
						|
            .replace('\n', "\n            ");
 | 
						|
        let code_months = &months.build().to_string().replace('\n', "\n            ");
 | 
						|
        let code_number_tokens = &number_tokens
 | 
						|
            .build()
 | 
						|
            .to_string()
 | 
						|
            .replace('\n', "\n            ");
 | 
						|
        let code_number_nd_tokens = &number_nd_tokens
 | 
						|
            .build()
 | 
						|
            .to_string()
 | 
						|
            .replace('\n', "\n            ");
 | 
						|
        let code_album_types = &album_types
 | 
						|
            .build()
 | 
						|
            .to_string()
 | 
						|
            .replace('\n', "\n            ");
 | 
						|
 | 
						|
        write!(code_timeago_tokens, "{} => Entry {{\n            timeago_tokens: {},\n            month_before_day: {:?},\n            months: {},\n            timeago_nd_tokens: {},\n            comma_decimal: {:?},\n            number_tokens: {},\n            number_nd_tokens: {},\n            album_types: {},\n            chan_prefix: {:?},\n            chan_suffix: {:?},\n            album_versions_title: {:?},\n        }},\n        ",
 | 
						|
        selector, code_ta_tokens, entry.month_before_day, code_months, code_ta_nd_tokens, entry.comma_decimal, code_number_tokens, code_number_nd_tokens, code_album_types, entry.chan_prefix, entry.chan_suffix, entry.album_versions_title).unwrap();
 | 
						|
    }
 | 
						|
 | 
						|
    code_timeago_tokens = code_timeago_tokens.trim_end().to_owned() + "\n    }\n}\n";
 | 
						|
 | 
						|
    let code = format!("{code_head}\n{code_timeago_tokens}");
 | 
						|
 | 
						|
    let target_path = path!(*SRC_DIR / "util" / "dictionary.rs");
 | 
						|
    std::fs::write(target_path, code).unwrap();
 | 
						|
}
 |