From 2c4d70cc0d79ca5df32946833cefbf89f5a89c1e Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Mon, 22 May 2023 15:17:05 +0200 Subject: [PATCH 1/2] fix tests --- tests/youtube.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/youtube.rs b/tests/youtube.rs index 95baa2d..bd5e1e3 100644 --- a/tests/youtube.rs +++ b/tests/youtube.rs @@ -1001,7 +1001,7 @@ fn channel_order( )) .unwrap(); // Upload dates should be in descending order - if tab != ChannelVideoTab::Shorts { + if tab == ChannelVideoTab::Videos { let mut latest_items = latest.items.iter().peekable(); while let (Some(v), Some(next_v)) = (latest_items.next(), latest_items.peek()) { if !v.is_upcoming && !v.is_live && !next_v.is_upcoming && !next_v.is_live { From da8b2a27fceeab07257f2f9c1da18c34a8da5d5f Mon Sep 17 00:00:00 2001 From: ThetaDev Date: Mon, 22 May 2023 17:44:14 +0200 Subject: [PATCH 2/2] fix: Swahili duration text parsing --- src/client/response/video_item.rs | 11 +++++++++-- src/util/timeago.rs | 20 +++++++++++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/src/client/response/video_item.rs b/src/client/response/video_item.rs index fa0577c..d030ed4 100644 --- a/src/client/response/video_item.rs +++ b/src/client/response/video_item.rs @@ -531,8 +531,15 @@ impl YouTubeListMapper { }); let length = video.accessibility.and_then(|acc| { - let parts = ACCESSIBILITY_SEP_REGEX.split(&acc).collect::>(); - if parts.len() > 2 { + // The video title has to be stripped from the beginning because in Swahili + // the duration follows the title with no separator (probably a bug). + // Example: `what I do with leftoversdakika 1 - cheza video` + let parts = ACCESSIBILITY_SEP_REGEX + .split(acc.trim_start_matches(&video.headline)) + .collect::>(); + if parts.len() > 1 { + // In Russian, the duration is the last part + // Example: `Воспроизвести видео – \"hangover food\". Его продолжительность – 58 секунд.` let i = match self.lang { Language::Ru => 1, _ => 2, diff --git a/src/util/timeago.rs b/src/util/timeago.rs index be490b6..8f9cebd 100644 --- a/src/util/timeago.rs +++ b/src/util/timeago.rs @@ -344,7 +344,21 @@ struct DurationTxtSegment { word: String, } -fn split_duration_txt(txt: &str, start_c: bool) -> Vec { +/// Split a video duration string into its segments. +/// +/// Each segment consists of a word and a string of digits (one of them may be empty). +/// +/// The `start_word` parameter determines whether the segments should start with a word +/// instead of a number. This is the case in Swahili and Singhalese. +/// +/// Example (start_word=false): +/// - `1 minute, 13 seconds` -> `{1;minute} {13;seconds}` +/// - `foo 1 minute, 13 seconds bar` -> `{foo} {1;minute} {13;seconds bar}` +/// +/// Example (start_word=true): +/// - `dakika 1 na sekunde 1` -> `{1;dakika} {1;na sekunde}` +/// - `foo dakika 1 na sekunde 1 bar` -> `{1;foo dakika} {1;na sekunde} {bar}` +fn split_duration_txt(txt: &str, start_word: bool) -> Vec { let mut segments = Vec::new(); // 1: parse digits, 2: parse word @@ -353,14 +367,14 @@ fn split_duration_txt(txt: &str, start_c: bool) -> Vec { for c in txt.trim().chars() { if c.is_ascii_digit() { - if state == 2 && (!seg.digits.is_empty() || (!start_c && segments.is_empty())) { + if state == 2 && (!seg.digits.is_empty() || (!start_word && segments.is_empty())) { segments.push(seg); seg = DurationTxtSegment::default(); } seg.digits.push(c); state = 1; } else { - if (state == 1) && (!seg.word.is_empty() || (start_c && segments.is_empty())) { + if (state == 1) && (!seg.word.is_empty() || (start_word && segments.is_empty())) { segments.push(seg); seg = DurationTxtSegment::default(); }