84 lines
		
	
	
	
		
			2.2 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			84 lines
		
	
	
	
		
			2.2 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
use std::{
 | 
						|
    collections::{BTreeMap, HashSet},
 | 
						|
    fs::File,
 | 
						|
};
 | 
						|
 | 
						|
use futures_util::{stream, StreamExt};
 | 
						|
use path_macro::path;
 | 
						|
use rustypipe::{
 | 
						|
    client::{RustyPipe, RustyPipeQuery},
 | 
						|
    param::{Language, LANGUAGES},
 | 
						|
};
 | 
						|
 | 
						|
use crate::util::DICT_DIR;
 | 
						|
 | 
						|
pub async fn collect_video_dates(concurrency: usize) {
 | 
						|
    let json_path = path!(*DICT_DIR / "timeago_samples_short.json");
 | 
						|
    let rp = RustyPipe::builder()
 | 
						|
        .visitor_data("Cgtwel9tMkh2eHh0USiyzc6jBg%3D%3D")
 | 
						|
        .build()
 | 
						|
        .unwrap();
 | 
						|
 | 
						|
    let channels = [
 | 
						|
        "UCeY0bbntWzzVIaj2z3QigXg",
 | 
						|
        "UCcmpeVbSSQlZRvHfdC-CRwg",
 | 
						|
        "UC65afEgL62PGFWXY7n6CUbA",
 | 
						|
        "UCEOXxzW2vU0P-0THehuIIeg",
 | 
						|
    ];
 | 
						|
 | 
						|
    let mut lang_strings: BTreeMap<Language, Vec<String>> = BTreeMap::new();
 | 
						|
    for lang in LANGUAGES {
 | 
						|
        println!("{lang}");
 | 
						|
        let query = rp.query().lang(lang);
 | 
						|
        let strings = stream::iter(channels)
 | 
						|
            .map(|id| get_channel_datestrings(&query, id))
 | 
						|
            .buffered(concurrency)
 | 
						|
            .collect::<Vec<_>>()
 | 
						|
            .await
 | 
						|
            .into_iter()
 | 
						|
            .flatten()
 | 
						|
            .collect::<Vec<_>>();
 | 
						|
        lang_strings.insert(lang, strings);
 | 
						|
    }
 | 
						|
 | 
						|
    let mut en_strings_uniq: HashSet<&str> = HashSet::new();
 | 
						|
    let mut uniq_ids: HashSet<usize> = HashSet::new();
 | 
						|
 | 
						|
    lang_strings[&Language::En]
 | 
						|
        .iter()
 | 
						|
        .enumerate()
 | 
						|
        .for_each(|(n, s)| {
 | 
						|
            if en_strings_uniq.insert(s) {
 | 
						|
                uniq_ids.insert(n);
 | 
						|
            }
 | 
						|
        });
 | 
						|
 | 
						|
    let strings_map = lang_strings
 | 
						|
        .iter()
 | 
						|
        .map(|(lang, strings)| {
 | 
						|
            (
 | 
						|
                lang,
 | 
						|
                strings
 | 
						|
                    .iter()
 | 
						|
                    .enumerate()
 | 
						|
                    .filter(|(n, _)| uniq_ids.contains(n))
 | 
						|
                    .map(|(_, s)| s)
 | 
						|
                    .collect::<Vec<_>>(),
 | 
						|
            )
 | 
						|
        })
 | 
						|
        .collect::<BTreeMap<_, _>>();
 | 
						|
 | 
						|
    let file = File::create(json_path).unwrap();
 | 
						|
    serde_json::to_writer_pretty(file, &strings_map).unwrap();
 | 
						|
}
 | 
						|
 | 
						|
async fn get_channel_datestrings(rp: &RustyPipeQuery, id: &str) -> Vec<String> {
 | 
						|
    let channel = rp.channel_videos(id).await.unwrap();
 | 
						|
 | 
						|
    channel
 | 
						|
        .content
 | 
						|
        .items
 | 
						|
        .into_iter()
 | 
						|
        .filter_map(|itm| itm.publish_date_txt)
 | 
						|
        .collect()
 | 
						|
}
 |