use std::{ collections::BTreeMap, fs::File, io::BufReader, path::{Path, PathBuf}, str::FromStr, }; use once_cell::sync::Lazy; use path_macro::path; use rustypipe::{model::AlbumType, param::Language}; use serde::{Deserialize, Serialize}; static DICT_PATH: Lazy = Lazy::new(|| path!("testfiles" / "dict" / "dictionary.json")); type Dictionary = BTreeMap; #[derive(Debug, Default, Serialize, Deserialize)] #[serde(default)] pub struct DictEntry { /// List of languages that should be treated equally (e.g. EnUs/EnGb/EnIn) pub equivalent: Vec, /// Should the language be parsed by character instead of by word? /// (e.g. Chinese/Japanese) pub by_char: bool, /// Tokens for parsing timeago strings. /// /// Format: Parsed token -> \[Quantity\] Identifier /// /// Identifiers: `Y`(ear), `M`(month), `W`(eek), `D`(ay), /// `h`(our), `m`(inute), `s`(econd) pub timeago_tokens: BTreeMap, /// Order in which to parse numeric date components. Formatted as /// a string of date identifiers (Y, M, D). /// /// Examples: /// /// - 03.01.2020 => `"DMY"` /// - Jan 3, 2020 => `"DY"` pub date_order: String, /// Order in which to parse datetimes. Formatted as a string of /// date/time identifiers (Y, y, M, D, H, h, m). /// /// Examples: /// /// - 2023-04-14 15:00 => `"YMDHm"` /// - 4/14/23, 3:00 PM => `"MDyhm"` pub datetime_order: String, /// Tokens for parsing month names. /// /// Format: Parsed token -> Month number (starting from 1) pub months: BTreeMap, /// Tokens for parsing date strings with no digits (e.g. Today, Tomorrow) /// /// Format: Parsed token -> \[Quantity\] Identifier pub timeago_nd_tokens: BTreeMap, /// Are commas (instead of points) used as decimal separators? pub comma_decimal: bool, /// Tokens for parsing decimal prefixes (K, M, B, ...) /// /// Format: Parsed token -> decimal power pub number_tokens: BTreeMap, /// Names of album types (Album, Single, ...) /// /// Format: Parsed text -> Album type pub album_types: BTreeMap, } #[derive(Clone, Debug, Deserialize)] pub struct TextRuns { pub runs: Vec, } #[derive(Clone, Debug, Deserialize)] pub struct Text { #[serde(alias = "simpleText")] pub text: String, } pub fn read_dict(project_root: &Path) -> Dictionary { let json_path = path!(project_root / *DICT_PATH); let json_file = File::open(json_path).unwrap(); serde_json::from_reader(BufReader::new(json_file)).unwrap() } pub fn write_dict(project_root: &Path, dict: &Dictionary) { let json_path = path!(project_root / *DICT_PATH); let json_file = File::create(json_path).unwrap(); serde_json::to_writer_pretty(json_file, dict).unwrap(); } pub fn filter_datestr(string: &str) -> String { string .to_lowercase() .chars() .filter_map(|c| { if c == '\u{200b}' || c.is_ascii_digit() { None } else if c == '-' { Some(' ') } else { Some(c) } }) .collect() } pub fn filter_largenumstr(string: &str) -> String { string .chars() .filter(|c| !matches!(c, '\u{200b}' | '.' | ',') && !c.is_ascii_digit()) .collect() } /// Parse a string after removing all non-numeric characters pub fn parse_numeric(string: &str) -> Result where F: FromStr, { let mut buf = String::new(); for c in string.chars() { if c.is_ascii_digit() { buf.push(c); } } buf.parse() } /// Parse all numbers occurring in a string and reurn them as a vec pub fn parse_numeric_vec(string: &str) -> Vec where F: FromStr, { let mut numbers = vec![]; let mut buf = String::new(); for c in string.chars() { if c.is_ascii_digit() { buf.push(c); } else if !buf.is_empty() { buf.parse::().map_or((), |n| numbers.push(n)); buf.clear(); } } if !buf.is_empty() { buf.parse::().map_or((), |n| numbers.push(n)); } numbers }