Compare commits

...

2 commits

Author SHA1 Message Date
235017ba29 feat: add ogg postprocessor to downloader
All checks were successful
ci/woodpecker/push/woodpecker Pipeline was successful
2023-05-15 12:11:38 +02:00
d9c45bb2e0 feat: add ogg_from_webm postprocessor 2023-05-15 12:11:38 +02:00
14 changed files with 1253 additions and 9 deletions

View file

@ -11,7 +11,7 @@ categories = ["api-bindings", "multimedia"]
include = ["/src", "README.md", "LICENSE", "!snapshots"]
[workspace]
members = [".", "codegen", "downloader", "cli"]
members = [".", "codegen", "downloader", "postprocessor", "cli"]
[features]
default = ["default-tls"]

View file

@ -30,6 +30,7 @@ rustls-tls-native-roots = [
[dependencies]
rustypipe = { path = "..", default-features = false }
rustypipe-postprocessor = { path = "../postprocessor" }
once_cell = "1.12.0"
regex = "1.6.0"
thiserror = "1.0.36"

View file

@ -14,7 +14,7 @@ use rand::Rng;
use regex::Regex;
use reqwest::{header, Client};
use rustypipe::{
model::{traits::FileFormat, AudioCodec, VideoCodec, VideoPlayer},
model::{traits::FileFormat, AudioCodec, AudioFormat, VideoCodec, VideoPlayer},
param::StreamFilter,
};
use tokio::{
@ -346,13 +346,34 @@ pub async fn download_video(
// Downloading combined video/audio stream (no conversion)
(Some(video), None) => {
pb.set_message(format!("Downloading {title}"));
download_single_file(
&video.url,
download_dir.join(output_fname).with_extension(&format),
http,
pb.clone(),
)
.await?;
download_single_file(&video.url, output_path, http, pb.clone()).await?;
}
// Downloading audio only
(None, Some(audio)) => {
pb.set_message(format!("Downloading {title}"));
let (dl_path, postprocessor) = match (audio.format, audio.codec) {
(AudioFormat::M4a, AudioCodec::Mp4a) => (output_path.clone(), None),
(AudioFormat::Webm, AudioCodec::Opus) => (
download_dir.join(format!(
"{}.audio{}",
output_fname,
audio.format.extension()
)),
Some(AudioFormat::Webm),
),
_ => (output_path.clone(), None),
};
download_single_file(&audio.url, &dl_path, http, pb.clone()).await?;
if let Some(postprocessor) = postprocessor {
pb.set_message(format!("Converting {title}"));
if postprocessor == AudioFormat::Webm {
rustypipe_postprocessor::ogg_from_webm::process(&dl_path, &output_path)?;
}
fs::remove_file(&dl_path).await?;
}
}
// Downloading split video/audio streams (requires conversion with ffmpeg)
_ => {

View file

@ -20,6 +20,8 @@ pub enum DownloadError {
Input(Cow<'static, str>),
#[error("error: {0}")]
Other(Cow<'static, str>),
#[error("Postprocessing error: {0}")]
Postprocessing(#[from] rustypipe_postprocessor::PostprocessingError),
}
/// Split an URL into its base string and parameter map

12
postprocessor/Cargo.toml Normal file
View file

@ -0,0 +1,12 @@
[package]
name = "rustypipe-postprocessor"
version = "0.1.0"
edition = "2021"
[dependencies]
thiserror = "1.0.40"
[dev-dependencies]
path_macro = "1.0.0"
once_cell = "1.12.0"
temp_testdir = "0.2.3"

75
postprocessor/src/crc.rs Normal file
View file

@ -0,0 +1,75 @@
// Ogg decoder and encoder written in Rust
//
// Original source: https://github.com/RustAudio/ogg
//
// Copyright (c) 2016-2017 est31 <MTest31@outlook.com>
// and contributors. All rights reserved.
// Redistribution or use only under the terms
// specified in the LICENSE file attached to this
// source distribution.
/*!
Implementation of the CRC algorithm with the
vorbis specific parameters and setup
*/
// Lookup table to enable bytewise CRC32 calculation
static CRC_LOOKUP_ARRAY: &[u32] = &lookup_array();
const fn get_tbl_elem(idx: u32) -> u32 {
let mut r: u32 = idx << 24;
let mut i = 0;
while i < 8 {
r = (r << 1) ^ (-(((r >> 31) & 1) as i32) as u32 & 0x04c11db7);
i += 1;
}
r
}
const fn lookup_array() -> [u32; 0x100] {
let mut lup_arr: [u32; 0x100] = [0; 0x100];
let mut i = 0;
while i < 0x100 {
lup_arr[i] = get_tbl_elem(i as u32);
i += 1;
}
lup_arr
}
pub fn crc32(array: &[u8]) -> u32 {
crc32_update(0, array)
}
pub fn crc32_update(cur: u32, array: &[u8]) -> u32 {
let mut ret: u32 = cur;
for av in array {
ret = (ret << 8) ^ CRC_LOOKUP_ARRAY[(*av as u32 ^ (ret >> 24)) as usize];
}
ret
}
#[test]
fn test_crc32() {
// Test page taken from real Ogg file
let test_arr = &[
0x4f, 0x67, 0x67, 0x53, 0x00, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x74,
0xa3, 0x90, 0x5b, 0x00, 0x00, 0x00, 0x00,
// The spec requires us to zero out the CRC field
/*0x6d, 0x94, 0x4e, 0x3d,*/
0x00, 0x00, 0x00, 0x00, 0x01, 0x1e, 0x01, 0x76, 0x6f, 0x72, 0x62, 0x69, 0x73, 0x00, 0x00,
0x00, 0x00, 0x02, 0x44, 0xac, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0xb5, 0x01, 0x00,
0x00, 0x00, 0x00, 0x00, 0xb8, 0x01,
];
println!();
println!(
"CRC of \"==!\" calculated as 0x{:08x} (expected 0x9f858776)",
crc32(&[61, 61, 33])
);
println!(
"Test page CRC calculated as 0x{:08x} (expected 0x3d4e946d)",
crc32(test_arr)
);
assert_eq!(crc32(&[61, 61, 33]), 0x9f858776);
assert_eq!(crc32(test_arr), 0x3d4e946d);
assert_eq!(crc32(&test_arr[0..27]), 0x7b374db8);
}

73
postprocessor/src/lib.rs Normal file
View file

@ -0,0 +1,73 @@
#![allow(dead_code)]
pub mod ogg_from_webm;
mod crc;
mod ogg;
mod webm;
/// Error from the postprocessor
#[derive(thiserror::Error, Debug)]
#[non_exhaustive]
pub enum PostprocessingError {
/// File IO error
#[error(transparent)]
Io(#[from] std::io::Error),
#[error("file format not recognized: {0}")]
InvalidFormat(&'static str),
#[error("no such element: expected {0:#x}, got {1:#x}")]
NoSuchElement(u32, u32),
#[error("bad element: {0}")]
BadElement(String),
#[error("invalid encoded length")]
InvalidLength,
#[error("invalid string")]
InvalidString,
#[error("unexpected element size")]
UnexpectedSize,
#[error("unsupported op: {0}")]
Unsupported(&'static str),
#[error("int conversion: {0}")]
Conversion(#[from] std::num::TryFromIntError),
}
pub(crate) type Result<T> = std::result::Result<T, PostprocessingError>;
#[cfg(test)]
pub(crate) mod tests {
use std::{
fs::File,
io::{BufReader, Read},
os::unix::prelude::MetadataExt,
path::{Path, PathBuf},
};
use once_cell::sync::Lazy;
use path_macro::path;
pub static TESTFILES: Lazy<PathBuf> = Lazy::new(|| {
path!(env!("CARGO_MANIFEST_DIR") / ".." / "testfiles")
.canonicalize()
.unwrap()
});
pub fn assert_files_eq<P: AsRef<Path>, P2: AsRef<Path>>(p1: P, p2: P2) {
let f1 = File::open(p1).unwrap();
let f2 = File::open(p2).unwrap();
let size = f1.metadata().unwrap().size();
assert_eq!(size, f2.metadata().unwrap().size(), "File sizes dont match");
let mut r1 = BufReader::new(f1);
let mut r2 = BufReader::new(f2);
for i in 0..size {
let mut b1 = [0; 1];
let mut b2 = [0; 1];
r1.read_exact(&mut b1).unwrap();
r2.read_exact(&mut b2).unwrap();
assert_eq!(b1[0], b2[0], "Byte {i} does not match");
}
}
}

131
postprocessor/src/ogg.rs Normal file
View file

@ -0,0 +1,131 @@
use crate::{crc, PostprocessingError, Result};
pub struct OggWriter {
sequence_count: u32,
stream_id: u32,
packet_flag: u8,
segment_table: [u8; 255],
segment_table_size: u8,
pub segment_table_next_timestamp: u64,
}
pub const FLAG_UNSET: u8 = 0x00;
pub const FLAG_FIRST: u8 = 0x02;
pub const FLAG_LAST: u8 = 0x04;
const HEADER_CHECKSUM_OFFSET: usize = 22;
const HEADER_SIZE: usize = 27;
pub const TIME_SCALE_NS: u64 = 1000000000;
pub const METADATA_VORBIS: [u8; 15] = [
0x03, 0x76, 0x6f, 0x72, 0x62, 0x69, 0x73, // "vorbis" binary string
0x00, 0x00, 0x00, 0x00, // writing application string size (not present)
0x00, 0x00, 0x00, 0x00, // additional tags count (zero means no tags)
];
pub const METADATA_OPUS: [u8; 16] = [
0x4F, 0x70, 0x75, 0x73, 0x54, 0x61, 0x67, 0x73, // "OpusTags" binary string
0x00, 0x00, 0x00, 0x00, // writing application string size (not present)
0x00, 0x00, 0x00, 0x00, // additional tags count (zero means no tags)
];
impl OggWriter {
pub fn new() -> Self {
Self {
sequence_count: 0,
stream_id: 1,
packet_flag: FLAG_FIRST,
segment_table: [0; 255],
segment_table_size: 0,
segment_table_next_timestamp: TIME_SCALE_NS,
}
}
pub fn make_packet_header(&mut self, gran_pos: u64, page: &[u8], immediate: bool) -> Vec<u8> {
let mut header = Vec::new();
let mut length = HEADER_SIZE;
header.extend_from_slice(b"OggS");
header.push(0); // version
header.push(self.packet_flag); // type
header.extend_from_slice(&gran_pos.to_le_bytes()); // granulate position
header.extend_from_slice(&self.stream_id.to_le_bytes()); // bitstream serial number
header.extend_from_slice(&self.sequence_count.to_le_bytes()); // page sequence number
self.sequence_count += 1;
header.extend_from_slice(&[0, 0, 0, 0]); // page checksum
header.push(self.segment_table_size);
header.extend_from_slice(&self.segment_table[0..self.segment_table_size as usize]);
length += self.segment_table_size as usize;
self.clear_segment_table();
let checksum = crc::crc32(&header[0..length]);
let checksum = crc::crc32_update(checksum, page);
Self::header_put_checksum(&mut header, checksum);
if immediate {
self.segment_table_next_timestamp -= TIME_SCALE_NS;
}
header
}
fn header_put_checksum(header: &mut Vec<u8>, checksum: u32) {
let cs_bts = checksum.to_le_bytes();
let to_fill = (HEADER_CHECKSUM_OFFSET + cs_bts.len()).saturating_sub(header.len());
for _ in 0..to_fill {
header.push(0);
}
header[HEADER_CHECKSUM_OFFSET..(HEADER_CHECKSUM_OFFSET + cs_bts.len())]
.copy_from_slice(&cs_bts[..]);
}
pub fn clear_segment_table(&mut self) {
self.segment_table_next_timestamp += TIME_SCALE_NS;
self.packet_flag = FLAG_UNSET;
self.segment_table_size = 0;
}
pub fn add_packet_segment(&mut self, size: u32) -> Result<bool> {
if size > 65025 {
return Err(PostprocessingError::Unsupported(
"page size cannot be larger than 65025",
));
}
let mut available =
(self.segment_table.len() as u32 - self.segment_table_size as u32) * 255;
let extra = (size % 255) == 0;
if extra {
// add a zero byte entry in the table
// required to indicate the sample size is multiple of 255
available -= 255;
}
// check if possible add the segment, without overflow the table
if available < size {
return Ok(false); // not enough space on the page
}
let mut seg = size;
while seg > 0 {
self.segment_table[self.segment_table_size as usize] = seg.min(255) as u8;
self.segment_table_size += 1;
seg = seg.saturating_sub(255);
}
if extra {
self.segment_table[self.segment_table_size as usize] = 0;
self.segment_table_size += 1;
}
Ok(true)
}
pub fn set_packet_flag(&mut self, flag: u8) {
self.packet_flag = flag;
}
}

View file

@ -0,0 +1,179 @@
use std::{
fs::File,
io::{BufWriter, Read, Write},
path::Path,
};
use crate::{
ogg::{OggWriter, FLAG_LAST, METADATA_OPUS, METADATA_VORBIS, TIME_SCALE_NS},
webm::{TrackKind, WebmReader},
PostprocessingError,
};
pub fn test(mut source: impl Read) -> Result<bool, PostprocessingError> {
let mut buf = [0; 4];
source.read_exact(&mut buf)?;
match u32::from_be_bytes(buf) {
0x1a45dfa3 => Ok(true), // webm/mkv
0x4f676753 => Ok(false), // ogg
_ => Err(PostprocessingError::InvalidFormat("unknown magic number")),
}
}
pub fn process<P: AsRef<Path>, P2: AsRef<Path>>(
source: P,
dest: P2,
) -> Result<(), PostprocessingError> {
let mut webm = WebmReader::new(File::open(source)?)?;
webm.parse()?;
if !webm.get_next_segment()? {
return Err(PostprocessingError::InvalidFormat("no segment"));
}
let mut ogg = OggWriter::new();
let mut output = BufWriter::new(File::create(dest)?);
// Select track
let track = match &webm.tracks {
Some(tracks) => {
let track_id = tracks
.iter()
.enumerate()
.find(|(_, track)| {
track.kind == TrackKind::Audio
&& (track.codec_id == "A_OPUS" || track.codec_id == "A_VORBIS")
})
.ok_or(PostprocessingError::InvalidFormat("no audio tracks"))?
.0;
webm.select_track(track_id).unwrap().clone()
}
None => return Err(PostprocessingError::InvalidFormat("no tracks")),
};
// Get sample rate
let res = get_sample_freq_from_track(&track.b_metadata);
// Create packet with codec init data
if !track.codec_private.is_empty() {
ogg.add_packet_segment(track.codec_private.len().try_into()?)?;
let header = ogg.make_packet_header(0, &track.codec_private, true);
output.write_all(&header)?;
output.write_all(&track.codec_private)?;
}
// Create packet with metadata
let metadata = match track.codec_id.as_str() {
"A_OPUS" => METADATA_OPUS.as_slice(),
"A_VORBIS" => METADATA_VORBIS.as_slice(),
_ => unreachable!(),
};
ogg.add_packet_segment(metadata.len().try_into()?)?;
let header = ogg.make_packet_header(0, metadata, true);
output.write_all(&header)?;
output.write_all(metadata)?;
// Calculate amount of packets
let mut page = Vec::new();
let mut webm_block = None;
while !webm.is_done() {
let block = match webm_block {
Some(block) => {
webm_block = None;
Some(block)
}
None => webm.get_next_block()?,
};
if let Some(block) = block {
let timestamp = block.absolute_time_code_ns + track.codec_delay;
let added = if timestamp >= ogg.segment_table_next_timestamp {
false
} else {
ogg.add_packet_segment(block.data_size.try_into()?)?
};
if added {
webm.get_block_data(&block, &mut page)?;
continue;
}
}
let mut elapsed_ns = track.codec_delay;
match block {
Some(block) => {
elapsed_ns += block.absolute_time_code_ns;
}
None => {
// TODO: move to ogg
ogg.set_packet_flag(FLAG_LAST);
elapsed_ns += webm.webm_block_last_timecode();
if track.default_duration > 0 {
elapsed_ns += track.default_duration;
} else {
elapsed_ns += webm.webm_block_near_duration();
}
}
}
// get the sample count in the page
let sample_count: f64 = (elapsed_ns as f64 / TIME_SCALE_NS as f64) * res as f64;
let sample_count = sample_count.ceil() as u64;
let header = ogg.make_packet_header(sample_count, &page, false);
// dump data
output.write_all(&header)?;
output.write_all(&page)?;
page = Vec::new();
webm_block = block;
}
output.flush()?;
Ok(())
}
fn get_sample_freq_from_track(b_metadata: &[u8]) -> f32 {
let mut i = 0;
while i < b_metadata.len().saturating_sub(5) {
let id_bts: [u8; 2] = b_metadata[i..i + 2].try_into().unwrap();
let id = u16::from_be_bytes(id_bts);
if id == 0xB584 {
let freq_bts: [u8; 4] = b_metadata[i + 2..i + 6].try_into().unwrap();
return f32::from_be_bytes(freq_bts);
}
i += 2;
}
0.0
}
#[cfg(test)]
mod tests {
use path_macro::path;
use crate::tests::{assert_files_eq, TESTFILES};
use super::*;
#[test]
fn t_test() {
let path = path!(*TESTFILES / "postprocessor" / "audio1.webm");
let mut file = File::open(&path).unwrap();
assert!(test(&mut file).unwrap());
}
#[test]
fn t_process() {
let temp = temp_testdir::TempDir::default();
let source = path!(*TESTFILES / "postprocessor" / "audio1.webm");
let dest = path!(temp / "audio1.ogg");
let expect = path!(*TESTFILES / "postprocessor" / "conv" / "audio1.ogg");
process(&source, &dest).unwrap();
assert_files_eq(&dest, &expect);
}
}

736
postprocessor/src/webm.rs Normal file
View file

@ -0,0 +1,736 @@
use std::{
fs::File,
io::{BufReader, Read, Seek, SeekFrom},
os::unix::prelude::MetadataExt,
};
use crate::{PostprocessingError, Result};
#[derive(Debug)]
pub struct WebmReader {
stream: BufReader<File>,
len: u64,
segment: Option<Segment>,
cluster: Option<Cluster>,
pub tracks: Option<Vec<WebmTrack>>,
selected_track: Option<usize>,
done: bool,
first_segment: bool,
webm_block_near_duration: u64,
webm_block_last_timecode: u64,
}
#[derive(Debug, Copy, Clone)]
struct Element {
typ: u32,
offset: u64,
content_size: u64,
size: u64,
}
#[derive(Debug, Copy, Clone)]
pub struct Info {
pub timecode_scale: u64,
pub duration: u64,
}
#[derive(Debug, Clone)]
pub struct WebmTrack {
pub track_number: u64,
track_type: u32,
pub codec_id: String,
pub codec_private: Vec<u8>,
pub b_metadata: Vec<u8>,
pub kind: TrackKind,
pub default_duration: u64,
pub codec_delay: u64,
pub seek_pre_roll: u64,
}
#[derive(Debug, Clone)]
pub struct Segment {
pub info: Option<Info>,
tracks: Option<Vec<WebmTrack>>,
current_cluster: Option<Element>,
rf: Element,
first_cluster_in_segment: bool,
}
impl Segment {
fn new(rf: Element) -> Segment {
Segment {
info: None,
tracks: None,
current_cluster: None,
rf,
first_cluster_in_segment: true,
}
}
}
#[derive(Debug, Copy, Clone)]
pub struct SimpleBlock {
pub created_from_block: bool,
pub track_number: u64,
pub relative_time_code: u16,
pub absolute_time_code_ns: u64,
pub flags: u8,
pub offset: u64,
pub data_size: u64,
rf: Element,
}
impl SimpleBlock {
fn new(rf: Element) -> Self {
Self {
created_from_block: false,
track_number: 0,
relative_time_code: 0,
absolute_time_code_ns: 0,
flags: 0,
offset: 0,
data_size: 0,
rf,
}
}
pub fn is_keyframe(&self) -> bool {
(self.flags & 0x80) == 0x80
}
}
#[derive(Debug, Copy, Clone)]
pub struct Cluster {
rf: Element,
current_simple_block: Option<SimpleBlock>,
current_block_group: Option<Element>,
timecode: u64,
}
impl Cluster {
fn new(rf: Element) -> Self {
Self {
rf,
current_simple_block: None,
current_block_group: None,
timecode: 0,
}
}
}
#[derive(Debug, Copy, Clone, PartialEq, Eq)]
pub enum TrackKind {
Audio,
Video,
Other,
}
const ID_EMBL: u32 = 0x0A45DFA3;
const ID_EMBL_READ_VERSION: u32 = 0x02F7;
const ID_EMBL_DOC_TYPE: u32 = 0x0282;
const ID_EMBL_DOC_TYPE_READ_VERSION: u32 = 0x0285;
const ID_SEGMENT: u32 = 0x08538067;
const ID_INFO: u32 = 0x0549A966;
const ID_TIMECODE_SCALE: u32 = 0x0AD7B1;
const ID_DURATION: u32 = 0x489;
const ID_TRACKS: u32 = 0x0654AE6B;
const ID_TRACK_ENTRY: u32 = 0x2E;
const ID_TRACK_NUMBER: u32 = 0x57;
const ID_TRACK_TYPE: u32 = 0x03;
const ID_CODEC_ID: u32 = 0x06;
const ID_CODEC_PRIVATE: u32 = 0x23A2;
const ID_VIDEO: u32 = 0x60;
const ID_AUDIO: u32 = 0x61;
const ID_DEFAULT_DURATION: u32 = 0x3E383;
const ID_FLAG_LACING: u32 = 0x1C;
const ID_CODEC_DELAY: u32 = 0x16AA;
const ID_SEEK_PRE_ROLL: u32 = 0x16BB;
const ID_CLUSTER: u32 = 0x0F43B675;
const ID_TIMECODE: u32 = 0x67;
const ID_SIMPLE_BLOCK: u32 = 0x23;
const ID_BLOCK: u32 = 0x21;
const ID_GROUP_BLOCK: u32 = 0x20;
impl WebmReader {
pub fn new(file: File) -> Result<Self> {
let md = file.metadata()?;
Ok(Self {
stream: BufReader::new(file),
len: md.size(),
segment: None,
cluster: None,
tracks: None,
selected_track: None,
done: false,
first_segment: false,
webm_block_near_duration: 0,
webm_block_last_timecode: 0,
})
}
/// Make sure the parser did not go beyond the current element and
/// skip to the start if the next element.
fn ensure(&mut self, rf: &Element) -> Result<()> {
let pos = self.stream.stream_position()?;
let elem_end = rf.offset + rf.size;
if pos > elem_end {
return Err(PostprocessingError::Io(std::io::Error::new(
std::io::ErrorKind::UnexpectedEof,
format!(
"parser go beyond limits of the Element. type={} offset={} size={} position={}",
rf.typ, rf.offset, rf.size, pos,
),
)));
}
self.stream.seek(SeekFrom::Start(elem_end))?;
Ok(())
}
fn read_byte(&mut self) -> Result<u8> {
let mut bt = [0; 1];
self.stream.read_exact(&mut bt)?;
Ok(bt[0])
}
fn read_u16(&mut self) -> Result<u16> {
let mut bt = [0; 2];
self.stream.read_exact(&mut bt)?;
Ok(u16::from_be_bytes(bt))
}
fn read_number(&mut self, parent: &Element) -> Result<u64> {
let mut value: u64 = 0;
for _ in 0..parent.content_size {
let rd = self.read_byte()?;
value = (value << 8) | u64::from(rd);
}
Ok(value)
}
fn read_string(&mut self, parent: &Element) -> Result<String> {
String::from_utf8(self.read_blob(parent)?).map_err(|_| PostprocessingError::InvalidString)
}
fn read_blob(&mut self, parent: &Element) -> Result<Vec<u8>> {
let length = parent.content_size as usize;
let mut buf = vec![0u8; length];
self.stream.read_exact(&mut buf)?;
Ok(buf)
}
fn read_encoded_number(&mut self) -> Result<u64> {
let mut value = u64::from(self.read_byte()?);
if value > 0 {
let mut mask: u64 = 0x80;
for size in 1..9 {
if (value & mask) == mask {
mask = 0xff;
mask >>= size;
let mut number = value & mask;
for _ in 1..size {
value = u64::from(self.read_byte()?);
number = (number << 8) | value;
}
return Ok(number);
}
mask >>= 1;
}
}
Err(PostprocessingError::InvalidLength)
}
fn read_element(&mut self) -> Result<Element> {
let offset = self.stream.stream_position()?;
let typ = self.read_encoded_number()? as u32;
let content_size = self.read_encoded_number()?;
let size = content_size + self.stream.stream_position()? - offset;
Ok(Element {
typ,
offset,
content_size,
size,
})
}
fn read_element_expected(&mut self, expected: u32) -> Result<Element> {
let elem = self.read_element()?;
if expected != 0 && elem.typ != expected {
return Err(PostprocessingError::NoSuchElement(expected, elem.typ));
}
Ok(elem)
}
fn until_element(&mut self, rf: Option<&Element>, expected: &[u32]) -> Result<Option<Element>> {
loop {
let brk = match rf {
Some(rf) => self.stream.stream_position()? >= (rf.offset + rf.size),
None => self.stream.stream_position()? >= self.len,
};
if brk {
break;
}
let elem = self.read_element()?;
if expected.is_empty() {
return Ok(Some(elem));
}
for t in expected {
if &elem.typ == t {
return Ok(Some(elem));
}
}
self.ensure(&elem)?;
}
Ok(None)
}
fn read_ebml(
&mut self,
rf: &Element,
min_read_version: u64,
min_doc_type_version: u64,
) -> Result<bool> {
let elem_v = self.until_element(Some(rf), &[ID_EMBL_READ_VERSION])?;
match elem_v {
Some(elem_v) => {
if self.read_number(&elem_v)? > min_read_version {
return Ok(false);
}
let elem_t = self.until_element(Some(rf), &[ID_EMBL_DOC_TYPE])?;
match elem_t {
Some(elem_t) => {
if self.read_string(&elem_t)? != "webm" {
return Ok(false);
}
let elem_tv =
self.until_element(Some(rf), &[ID_EMBL_DOC_TYPE_READ_VERSION])?;
match elem_tv {
Some(elem_tv) => {
Ok(self.read_number(&elem_tv)? <= min_doc_type_version)
}
None => Ok(false),
}
}
None => Ok(false),
}
}
None => Ok(false),
}
}
fn read_info(&mut self, rf: &Element) -> Result<Info> {
let mut info = Info {
timecode_scale: 0,
duration: 0,
};
while let Some(elem) = self.until_element(Some(rf), &[ID_TIMECODE_SCALE, ID_DURATION])? {
match elem.typ {
ID_TIMECODE_SCALE => {
info.timecode_scale = self.read_number(&elem)?;
}
ID_DURATION => {
info.duration = self.read_number(&elem)?;
}
_ => {}
}
self.ensure(&elem)?;
}
if info.timecode_scale == 0 {
return Err(PostprocessingError::BadElement(
"Element Timecode not found".to_owned(),
));
}
Ok(info)
}
fn read_segment(
&mut self,
rf: Element,
track_lacing_expected: u64,
metadata_expected: bool,
) -> Result<Segment> {
let mut seg = Segment::new(rf);
while let Some(elem) = self.until_element(Some(&rf), &[ID_INFO, ID_TRACKS, ID_CLUSTER])? {
match elem.typ {
ID_CLUSTER => {
seg.current_cluster = Some(elem);
break;
}
ID_INFO => {
seg.info = Some(self.read_info(&elem)?);
self.ensure(&elem)?;
}
ID_TRACKS => {
seg.tracks = Some(self.read_tracks(&elem, track_lacing_expected)?);
self.ensure(&elem)?;
}
_ => {}
}
}
if metadata_expected && (seg.info.is_none() || seg.tracks.is_none()) {
return Err(PostprocessingError::BadElement(format!(
"Cluster element found without Info and/or Tracks element at position {}",
rf.offset
)));
}
Ok(seg)
}
fn read_tracks(&mut self, rf: &Element, lacing_expected: u64) -> Result<Vec<WebmTrack>> {
let mut tracks = Vec::new();
while let Some(elem_te) = self.until_element(Some(rf), &[ID_TRACK_ENTRY])? {
let mut entry = WebmTrack {
track_number: 0,
track_type: 0,
codec_id: String::new(),
codec_private: Vec::new(),
b_metadata: Vec::new(),
kind: TrackKind::Other,
default_duration: 0,
codec_delay: 0,
seek_pre_roll: 0,
};
let mut drop = false;
while let Some(elem) = self.until_element(Some(&elem_te), &[])? {
match elem.typ {
ID_TRACK_NUMBER => {
entry.track_number = self.read_number(&elem)?;
}
ID_TRACK_TYPE => {
entry.track_type = self.read_number(&elem)? as u32;
}
ID_CODEC_ID => {
entry.codec_id = self.read_string(&elem)?;
}
ID_CODEC_PRIVATE => {
entry.codec_private = self.read_blob(&elem)?;
}
ID_AUDIO | ID_VIDEO => {
entry.b_metadata = self.read_blob(&elem)?;
}
ID_DEFAULT_DURATION => {
entry.default_duration = self.read_number(&elem)?;
}
ID_FLAG_LACING => {
drop = self.read_number(&elem)? != lacing_expected;
}
ID_CODEC_DELAY => {
entry.codec_delay = self.read_number(&elem)?;
}
ID_SEEK_PRE_ROLL => {
entry.seek_pre_roll = self.read_number(&elem)?;
}
_ => {}
}
self.ensure(&elem)?;
}
entry.kind = match entry.track_type {
1 => TrackKind::Video,
2 => TrackKind::Audio,
_ => TrackKind::Other,
};
if !drop {
tracks.push(entry);
}
self.ensure(&elem_te)?;
}
Ok(tracks)
}
fn read_simple_block(&mut self, rf: Element) -> Result<SimpleBlock> {
let mut sb = SimpleBlock::new(rf);
sb.track_number = self.read_encoded_number()?;
sb.relative_time_code = self.read_u16()?;
sb.flags = self.read_byte()?;
let pos = self.stream.stream_position()?;
sb.data_size = (rf.offset + rf.size)
.checked_sub(pos)
.ok_or(PostprocessingError::UnexpectedSize)?;
sb.offset = pos;
sb.created_from_block = rf.typ == ID_BLOCK;
Ok(sb)
}
fn read_cluster(&mut self, rf: Element) -> Result<Cluster> {
let elem = self.until_element(Some(&rf), &[ID_TIMECODE])?;
let mut cl = Cluster::new(rf);
match elem {
Some(elem) => {
cl.timecode = self.read_number(&elem)?;
}
None => {
return Err(PostprocessingError::BadElement(format!(
"Cluster at {} without Timecode element",
rf.offset
)))
}
}
Ok(cl)
}
fn inside_cluster_bounds(&mut self, cluster: &Cluster) -> Result<bool> {
Ok(self.stream.stream_position()? >= (cluster.rf.offset + cluster.rf.size))
}
pub fn get_next_cluster(&mut self) -> Result<Option<Cluster>> {
if self.done {
return Ok(None);
}
let (rf, cc) = if let Some(segment) = &mut self.segment {
if let Some(current_cluster) = segment.current_cluster {
if segment.first_cluster_in_segment {
segment.first_cluster_in_segment = false;
return Ok(Some(self.read_cluster(current_cluster)?));
}
(segment.rf, current_cluster)
} else {
return Ok(None);
}
} else {
return Ok(None);
};
self.ensure(&cc)?;
let elem = self.until_element(Some(&rf), &[ID_CLUSTER])?;
match elem {
Some(elem) => {
self.segment.as_mut().unwrap().current_cluster = Some(elem);
Ok(Some(self.read_cluster(elem)?))
}
None => Ok(None),
}
}
pub fn get_next_simple_block(&mut self, cluster: &mut Cluster) -> Result<Option<SimpleBlock>> {
if self.inside_cluster_bounds(cluster)? {
return Ok(None);
}
if let Some(current_block_group) = &cluster.current_block_group {
self.ensure(current_block_group)?;
cluster.current_block_group = None;
cluster.current_simple_block = None;
} else if let Some(current_simple_block) = &cluster.current_simple_block {
self.ensure(&current_simple_block.rf)?;
}
while !self.inside_cluster_bounds(cluster)? {
let elem = self.until_element(Some(&cluster.rf), &[ID_SIMPLE_BLOCK, ID_GROUP_BLOCK])?;
let nelem = match elem {
Some(elem) => {
if elem.typ == ID_GROUP_BLOCK {
cluster.current_block_group = Some(elem);
let block_elem = self.until_element(Some(&elem), &[ID_BLOCK])?;
match block_elem {
Some(block_elem) => block_elem,
None => {
self.ensure(&elem)?;
cluster.current_block_group = None;
continue;
}
}
} else {
elem
}
}
None => return Ok(None),
};
let mut csb = self.read_simple_block(nelem)?;
if self
.selected_track()
.map(|st| st.track_number == csb.track_number)
.unwrap_or_default()
{
csb.absolute_time_code_ns = (u64::from(csb.relative_time_code) + cluster.timecode)
* self
.segment
.as_ref()
.and_then(|seg| seg.info)
.map(|inf| inf.timecode_scale)
.unwrap_or(1);
cluster.current_simple_block = Some(csb);
return Ok(Some(csb));
}
cluster.current_simple_block = Some(csb);
self.ensure(&nelem)?;
}
Ok(None)
}
pub fn get_next_block(&mut self) -> Result<Option<SimpleBlock>> {
if self.segment.is_none() && !self.get_next_segment()? {
return Ok(None);
}
if self.cluster.is_none() {
self.cluster = self.get_next_cluster()?;
if self.cluster.is_none() {
self.segment = None;
return self.get_next_block();
}
}
let mut c = self.cluster.unwrap();
let res = self.get_next_simple_block(&mut c)?;
self.cluster = Some(c);
match res {
Some(res) => {
self.webm_block_near_duration =
res.absolute_time_code_ns - self.webm_block_last_timecode;
self.webm_block_last_timecode = res.absolute_time_code_ns;
Ok(Some(res))
}
None => {
self.cluster = None;
self.get_next_block()
}
}
}
pub fn selected_track(&self) -> Option<&WebmTrack> {
if let (Some(tracks), Some(st)) = (&self.tracks, self.selected_track) {
return tracks.get(st);
}
None
}
pub fn select_track(&mut self, index: usize) -> Option<&WebmTrack> {
if let Some(tracks) = &self.tracks {
match tracks.get(index) {
Some(track) => {
self.selected_track = Some(index);
Some(track)
}
None => None,
}
} else {
None
}
}
pub fn parse(&mut self) -> Result<()> {
let elem_ebml = self.read_element_expected(ID_EMBL)?;
if !self.read_ebml(&elem_ebml, 1, 2)? {
return Err(PostprocessingError::InvalidFormat(
"Unsupported EBML data (WebM)",
));
}
self.ensure(&elem_ebml)?;
let elem_seg = self.until_element(None, &[ID_SEGMENT])?;
match elem_seg {
Some(elem_seg) => {
let seg = self.read_segment(elem_seg, 0, true)?;
// TODO: avoid this clone
self.tracks = seg.tracks.clone();
self.segment = Some(seg);
self.selected_track = None;
self.done = false;
self.first_segment = true;
Ok(())
}
None => Err(PostprocessingError::InvalidFormat(
"Fragment element not found",
)),
}
}
pub fn get_next_segment(&mut self) -> Result<bool> {
if self.done {
return Ok(false);
}
if let Some(segment) = &self.segment {
if self.first_segment {
self.first_segment = false;
return Ok(true);
}
let srf = segment.rf;
self.ensure(&srf)?;
}
let elem = self.until_element(None, &[ID_SEGMENT])?;
match elem {
Some(elem) => {
self.segment = Some(self.read_segment(elem, 0, false)?);
Ok(true)
}
None => {
self.done = true;
Ok(false)
}
}
}
pub fn is_done(&self) -> bool {
self.done
}
pub fn webm_block_near_duration(&self) -> u64 {
self.webm_block_near_duration
}
pub fn webm_block_last_timecode(&self) -> u64 {
self.webm_block_last_timecode
}
pub fn get_block_data(&mut self, block: &SimpleBlock, bytes: &mut Vec<u8>) -> Result<()> {
let old_pos = self.stream.stream_position()?;
self.stream.seek(SeekFrom::Start(block.offset))?;
for _ in 0..block.data_size {
let mut bt = [0; 1];
self.stream.read_exact(&mut bt)?;
bytes.push(bt[0]);
}
self.stream.seek(SeekFrom::Start(old_pos))?;
Ok(())
}
}
#[cfg(test)]
mod tests {
use path_macro::path;
use crate::tests::TESTFILES;
use super::*;
#[test]
fn read() {
let path = path!(*TESTFILES / "postprocessor" / "audio1.webm");
let file = File::open(&path).unwrap();
let mut reader = WebmReader::new(file).unwrap();
reader.parse().unwrap();
reader.get_next_segment().unwrap();
// TODO: check types
reader.select_track(0).unwrap();
dbg!(&reader);
}
}

View file

@ -0,0 +1,14 @@
### `audio1`
This test file was downloaded from the [EffectsLibrary](https://www.youtube.com/@EffectsLibrary)
channel on YouTube
**Title:** Pen Clicking & Scribbling Sound Effects
**Source:** https://www.youtube.com/watch?v=cOtkrqo9Pjk
**License:** Creative Commons Attribution 3.0
This work is licensed under the Creative Commons Attribution 3.0 Unported License. To view a copy of
this license, visit http://creativecommons.org/licenses/by/3.0/ or send a letter to Creative
Commons, PO Box 1866, Mountain View, CA 94042, USA.

Binary file not shown.

Binary file not shown.

Binary file not shown.