diff --git a/Cargo.lock b/Cargo.lock index d8cbf64..7852659 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "alloc-no-stdlib" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" + +[[package]] +name = "alloc-stdlib" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +dependencies = [ + "alloc-no-stdlib", +] + [[package]] name = "async-trait" version = "0.1.64" @@ -55,6 +70,27 @@ dependencies = [ "generic-array", ] +[[package]] +name = "brotli" +version = "3.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1a0b1dbcc8ae29329621f8d4f0d835787c1c38bb1401979b49d13b0b305ff68" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", + "brotli-decompressor", +] + +[[package]] +name = "brotli-decompressor" +version = "2.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4b6561fd3f895a11e8f72af2cb7d22e08366bebc2b6b57f7744c4bda27034744" +dependencies = [ + "alloc-no-stdlib", + "alloc-stdlib", +] + [[package]] name = "byteorder" version = "1.4.3" @@ -103,6 +139,15 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "compressible" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fe7853faa8a7c37cddc40823bf5463d368a5207ebb4e7d4d83846da656f493d3" +dependencies = [ + "mime", +] + [[package]] name = "console" version = "0.15.5" @@ -572,6 +617,16 @@ version = "0.3.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2a60c7ce501c71e03a9c9c0d35b861413ae925bd979cc7a4e30d060069aaac8d" +[[package]] +name = "mime_guess" +version = "2.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" +dependencies = [ + "mime", + "unicase", +] + [[package]] name = "miniz_oxide" version = "0.6.2" @@ -1027,12 +1082,17 @@ dependencies = [ name = "talon" version = "0.1.0" dependencies = [ + "brotli", + "compressible", "flate2", "hex", "hex-literal", "insta", + "log", + "mime_guess", "path_macro", "poem", + "regex", "rmp-serde", "rstest", "serde", @@ -1270,6 +1330,15 @@ dependencies = [ "version_check", ] +[[package]] +name = "unicase" +version = "2.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" +dependencies = [ + "version_check", +] + [[package]] name = "unicode-ident" version = "1.0.6" diff --git a/Cargo.toml b/Cargo.toml index 9a07518..648985d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ sled = "0.34.7" serde = "1.0.152" serde_json = "1.0.93" rmp-serde = "1.1.1" -toml = "0.7.2" +toml = { version = "0.7.2", default-features = false, features = ["parse"] } thiserror = "1.0.38" time = { version = "0.3.15", features = [ "macros", @@ -30,6 +30,11 @@ zip = { version = "0.6.4", default-features = false, features = [ ] } tar = "0.4.38" flate2 = "1.0.25" +brotli = "3.3.4" +mime_guess = { version = "2.0.4", default-features = false } +compressible = "0.2.0" +regex = "1.7.1" +log = "0.4.17" [dev-dependencies] rstest = "0.16.0" diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 0000000..0e52b69 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,177 @@ +use std::{collections::BTreeMap, ops::Deref, path::Path, sync::Arc}; + +use regex::Regex; +use serde::{Deserialize, Serialize}; + +#[derive(Clone, Default)] +pub struct Config { + i: Arc, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct ConfigInner { + pub server: ServerCfg, + pub compression: CompressionCfg, + pub keys: BTreeMap, +} + +#[derive(thiserror::Error, Debug)] +pub enum ConfigError { + #[error("io error: {0}")] + Io(#[from] std::io::Error), + #[error("parsing error: {0}")] + Parse(#[from] toml::de::Error), +} + +type Result = std::result::Result; + +impl Deref for Config { + type Target = ConfigInner; + + fn deref(&self) -> &Self::Target { + &self.i + } +} + +impl Serialize for Config { + fn serialize(&self, serializer: S) -> std::result::Result + where + S: serde::Serializer, + { + ConfigInner::serialize(self, serializer) + } +} + +impl<'de> Deserialize<'de> for Config { + fn deserialize(deserializer: D) -> std::result::Result + where + D: serde::Deserializer<'de>, + { + ConfigInner::deserialize(deserializer).map(|c| Self { i: c.into() }) + } +} + +impl Config { + pub fn new(cfg: ConfigInner) -> Self { + Self { i: cfg.into() } + } + + pub fn from_file>(path: P) -> Result { + let cfg_str = std::fs::read_to_string(path)?; + Ok(toml::from_str::(&cfg_str)?) + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct ServerCfg { + pub address: String, + pub port: u32, +} + +impl Default for ServerCfg { + fn default() -> Self { + Self { + address: "0.0.0.0".to_owned(), + port: 8080, + } + } +} + +#[derive(Debug, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct CompressionCfg { + /// Enable gzip compression + pub gzip_en: bool, + /// Gzip compression level (0-9) + pub gzip_level: u8, + /// Enable brotli compression + pub brotli_en: bool, + /// Brozli compression level (0-11) + pub brotli_level: u8, +} + +impl Default for CompressionCfg { + fn default() -> Self { + Self { + gzip_en: false, + gzip_level: 6, + brotli_en: false, + brotli_level: 7, + } + } +} + +impl CompressionCfg { + pub fn enabled(&self) -> bool { + self.gzip_en || self.brotli_en + } +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +#[serde(default)] +pub struct KeyCfg { + #[serde(skip_serializing_if = "Domains::is_none")] + pub domains: Domains, +} + +#[derive(Debug, Default, Clone, Serialize, Deserialize)] +#[serde(untagged)] +pub enum Domains { + #[default] + None, + Single(String), + Multiple(Vec), +} + +impl Domains { + fn is_none(&self) -> bool { + matches!(self, Domains::None) + } + + fn pattern_matches_domain(pattern: &str, domain: &str) -> bool { + if pattern == "*" { + true + } else if pattern.starts_with('/') && pattern.ends_with('/') { + let regex_str = &pattern[1..pattern.len() - 1]; + let re = match Regex::new(regex_str) { + Ok(re) => re, + Err(e) => { + log::error!("could not parse regex `{regex_str}`, error: {e}"); + return false; + } + }; + re.is_match(domain) + } else { + domain == pattern + } + } + + pub fn matches_domain(&self, domain: &str) -> bool { + match self { + Domains::None => false, + Domains::Single(pattern) => Self::pattern_matches_domain(pattern, domain), + Domains::Multiple(patterns) => patterns + .iter() + .any(|pattern| Self::pattern_matches_domain(pattern, domain)), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use rstest::rstest; + + #[rstest] + #[case("*", "hello-world", true)] + #[case("hello-world", "hello-world", true)] + #[case("hello-world", "hello-world2", false)] + #[case("/^talon-\\d+/", "talon-1", true)] + #[case("/^talon-\\d+/", "talon-x", false)] + fn pattern_matches_domain(#[case] pattern: &str, #[case] domain: &str, #[case] expect: bool) { + assert_eq!(Domains::pattern_matches_domain(pattern, domain), expect); + } +} diff --git a/src/db/mod.rs b/src/db/mod.rs index 80552e2..6557366 100644 --- a/src/db/mod.rs +++ b/src/db/mod.rs @@ -378,11 +378,15 @@ impl Db { } fn file_key(version: u32, path: &str) -> String { - // Remove leading/trailing slashes from path - let path = path.trim_matches('/'); format!("{version}:{path}") } + /// Get the hash of a file in the database + pub fn get_file_opt(&self, version: u32, path: &str) -> Result>> { + let key = Self::file_key(version, path); + Ok(self.i.files.get(key)?.map(|hash| hash.to_vec())) + } + /// Get the hash of a file in the database pub fn get_file(&self, version: u32, path: &str) -> Result> { let key = Self::file_key(version, path); diff --git a/src/db/model.rs b/src/db/model.rs index 0ae94f2..a56e21b 100644 --- a/src/db/model.rs +++ b/src/db/model.rs @@ -60,8 +60,6 @@ pub struct WebsiteUpdate { pub source_url: Option>, /// Icon for the source link pub source_icon: Option>, - /// File hash of the fallback page (for single page applications) - pub spa_fallback: Option>, } /// Website version stored in the database diff --git a/src/lib.rs b/src/lib.rs index efe5a3d..e3d212c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,3 +1,4 @@ +pub mod config; pub mod db; pub mod model; pub mod storage; diff --git a/src/storage.rs b/src/storage.rs index 49442b1..4b6cce6 100644 --- a/src/storage.rs +++ b/src/storage.rs @@ -1,17 +1,21 @@ use std::{ borrow::Cow, + collections::BTreeMap, fs, - io::{Read, Seek}, + io::{BufReader, Read, Seek}, path::{Path, PathBuf}, sync::Arc, }; -use flate2::read::GzDecoder; +use flate2::{read::GzDecoder, write::GzEncoder}; use hex::ToHex; +use mime_guess::Mime; +use poem::http::HeaderMap; use temp_dir::TempDir; use zip::ZipArchive; use crate::{ + config::Config, db::{Db, DbError}, util, }; @@ -23,6 +27,38 @@ pub struct Storage { struct StorageInner { path: PathBuf, db: Db, + cfg: Config, +} + +#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum CompressionAlg { + #[default] + None, + Gzip, + Brotli, +} + +impl CompressionAlg { + /// Get value of the http encoding header + fn encoding(&self) -> Option<&'static str> { + match self { + CompressionAlg::None => None, + CompressionAlg::Gzip => Some("gzip"), + CompressionAlg::Brotli => Some("br"), + } + } +} + +#[derive(Debug)] +pub struct GotFile { + /// File path on disk + pub file_path: PathBuf, + /// File encoding + pub encoding: Option<&'static str>, + /// MIME type + pub mime: Option, + /// Website path to redirect to + pub rd_path: Option, } #[derive(thiserror::Error, Debug)] @@ -35,6 +71,10 @@ pub enum StorageError { InvalidFile(PathBuf), #[error("zip archive error: {0}")] Zip(#[from] zip::result::ZipError), + #[error("page {0} not found")] + NotFound(String), + #[error("file {0} of page {1} missing from storage")] + MissingFile(String, String), } type Result = std::result::Result; @@ -43,11 +83,12 @@ const TMPDIR_PREFIX: &str = "talon"; impl Storage { /// Create a new file storage using the root folder and the database - pub fn new>(path: P, db: Db) -> Self { + pub fn new>(path: P, db: Db, cfg: Config) -> Self { Self { i: StorageInner { path: path.into(), db, + cfg, } .into(), } @@ -68,13 +109,36 @@ impl Storage { let file_path = file_path.as_ref(); let hash = util::hash_file(file_path)?; - let hash_str = hash.encode_hex::(); + let stored_file = self.file_path_mkdir(&hash)?; - let subdir = self.i.path.join(&hash_str[..2]); - if !subdir.is_dir() { - fs::create_dir(&subdir)?; + fs::copy(file_path, &stored_file)?; + + if self.i.cfg.compression.enabled() + && mime_guess::from_path(file_path) + .first() + .map(|t| compressible::is_compressible(t.essence_str())) + .unwrap_or_default() + { + if self.i.cfg.compression.gzip_en { + let mut encoder = GzEncoder::new( + fs::File::create(stored_file.with_extension("gz"))?, + flate2::Compression::new(self.i.cfg.compression.gzip_level.into()), + ); + let mut input = BufReader::new(fs::File::open(&stored_file)?); + std::io::copy(&mut input, &mut encoder)?; + } + + if self.i.cfg.compression.brotli_en { + let mut encoder = brotli::CompressorWriter::new( + fs::File::create(stored_file.with_extension("br"))?, + 4096, + self.i.cfg.compression.brotli_level.into(), + 20, + ); + let mut input = BufReader::new(fs::File::open(&stored_file)?); + std::io::copy(&mut input, &mut encoder)?; + } } - fs::copy(file_path, subdir.join(&hash_str))?; self.i.db.insert_file(version, site_path, &hash)?; @@ -158,4 +222,99 @@ impl Storage { let import_path = Self::fix_archive_path(temp.path())?; self.insert_dir(import_path, version) } + + fn file_path_mkdir(&self, hash: &[u8]) -> Result { + let hash_str = hash.encode_hex::(); + + let subdir = self.i.path.join(&hash_str[..2]); + if !subdir.is_dir() { + fs::create_dir(&subdir)?; + } + Ok(subdir.join(&hash_str)) + } + + fn file_path(&self, hash: &[u8]) -> PathBuf { + let hash_str = hash.encode_hex::(); + let subdir = self.i.path.join(&hash_str[..2]); + subdir.join(&hash_str) + } + + fn files_compressed(&self, hash: &[u8]) -> BTreeMap { + let path = self.file_path(hash); + let mut res = BTreeMap::new(); + + if self.i.cfg.compression.gzip_en { + let path_gz = path.with_extension("gz"); + if path_gz.is_file() { + res.insert(CompressionAlg::Gzip, path_gz); + } + } + if self.i.cfg.compression.brotli_en { + let path_br = path.with_extension("br"); + if path_br.is_file() { + res.insert(CompressionAlg::Brotli, path_br); + } + } + if path.is_file() { + res.insert(CompressionAlg::None, path); + } + res + } + + /// Get a file using the raw site path and the website version + /// + /// HTTP headers are used to determine if the compressed version of a file should be returned. + pub fn get_file(&self, version: u32, site_path: &str, headers: &HeaderMap) -> Result { + let site_path = util::trim_site_path(site_path); + let mut new_path: Cow = site_path.into(); + let mut rd_path = None; + let mut hash = None; + + if site_path.is_empty() { + // Special case, open index.html directly + new_path = "index.html".into(); + } else { + // Attempt to access the following pages + // 1. Site path directly + // 2. Site path + `/index.html` + match self.i.db.get_file_opt(version, site_path)? { + Some(h) => { + hash = Some(h); + } + None => { + if util::site_path_ext(site_path).is_none() { + new_path = format!("{site_path}/index.html").into(); + rd_path = Some(format!("{site_path}/")); + } + } + } + } + + let hash = match hash { + Some(hash) => hash, + None => self + .i + .db + .get_file_opt(version, &new_path)? + .ok_or_else(|| StorageError::NotFound(site_path.to_owned()))?, + }; + + let mime = util::site_path_mime(&new_path); + + let files = self.files_compressed(&hash); + let file = util::parse_accept_encoding(headers, &files); + + match file { + Some((compression, file)) => Ok(GotFile { + file_path: file.to_owned(), + encoding: compression.encoding(), + mime, + rd_path, + }), + None => Err(StorageError::MissingFile( + hash.encode_hex::(), + new_path.into(), + )), + } + } } diff --git a/src/util.rs b/src/util.rs index 249c9d1..4bf1995 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,7 +1,11 @@ -use std::{fs::File, path::Path}; +use std::{collections::BTreeMap, fs::File, path::Path, str::FromStr}; +use mime_guess::Mime; +use poem::http::{header, HeaderMap}; use sha2::{Digest, Sha256}; +use crate::storage::CompressionAlg; + /// Get SHA256 hash of file pub fn hash_file>(file: P) -> Result<[u8; 32], std::io::Error> { let mut hasher = Sha256::new(); @@ -9,3 +13,130 @@ pub fn hash_file>(file: P) -> Result<[u8; 32], std::io::Error> { std::io::copy(&mut file, &mut hasher)?; Ok(hasher.finalize().into()) } + +/// Return the file extension of a website path +pub fn site_path_ext(path: &str) -> Option<&str> { + let mut parts = path.split('.').rev(); + parts + .next() + .filter(|ext| !ext.contains('/') && parts.next().is_some()) +} + +pub fn trim_site_path(path: &str) -> &str { + path.trim_matches('/') +} + +/// Get the file extension of a website path +pub fn site_path_mime(path: &str) -> Option { + site_path_ext(path).and_then(|ext| mime_guess::from_ext(ext).first()) +} + +enum ContentCoding { + Brotli, + Gzip, + Star, +} + +impl FromStr for ContentCoding { + type Err = (); + + fn from_str(s: &str) -> Result { + if s.eq_ignore_ascii_case("gzip") { + Ok(ContentCoding::Gzip) + } else if s.eq_ignore_ascii_case("br") { + Ok(ContentCoding::Brotli) + } else if s == "*" { + Ok(ContentCoding::Star) + } else { + Err(()) + } + } +} + +/// Parse Accept-Encoding header and return the compressed file with the preferred algorithm +/// +/// Source: +pub fn parse_accept_encoding<'a, T>( + headers: &HeaderMap, + files: &'a BTreeMap, +) -> Option<(CompressionAlg, &'a T)> { + if files.is_empty() { + return None; + } + + headers + .get_all(header::ACCEPT_ENCODING) + .iter() + .filter_map(|hval| hval.to_str().ok()) + .flat_map(|s| s.split(',').map(str::trim)) + .filter_map(|v| { + let (e, q) = match v.split_once(";q=") { + Some((e, q)) => (e, (q.parse::().ok()? * 1000.0) as i32), + None => (v, 1000), + }; + let coding: ContentCoding = e.parse().ok()?; + let alg_file = match coding { + ContentCoding::Brotli => { + (CompressionAlg::Brotli, files.get(&CompressionAlg::Brotli)?) + } + ContentCoding::Gzip => (CompressionAlg::Gzip, files.get(&CompressionAlg::Gzip)?), + ContentCoding::Star => { + files.iter().max_by_key(|(a, _)| *a).map(|(a, f)| (*a, f))? + } + }; + Some((alg_file, q)) + }) + .max_by_key(|((a, _), q)| (*q, *a)) + .map(|(x, _)| x) + .or_else(|| { + files + .get(&CompressionAlg::None) + .map(|f| (CompressionAlg::None, f)) + }) +} + +#[cfg(test)] +mod tests { + use super::*; + + use rstest::rstest; + + #[rstest] + #[case::html("index.html", Some("html"))] + #[case::none("hello.world/test", None)] + #[case::none("hello", None)] + fn t_site_path_ext(#[case] path: &str, #[case] expect: Option<&str>) { + let ext = site_path_ext(path); + assert_eq!(ext, expect) + } + + #[rstest] + #[case::html("index.html", Some("text/html"))] + #[case::none("hello.world/test", None)] + fn t_site_path_mime(#[case] path: &str, #[case] expect: Option<&str>) { + let mime = site_path_mime(path).map(|mime| mime.essence_str().to_owned()); + assert_eq!(mime.as_deref(), expect) + } + + #[rstest] + #[case::none("", CompressionAlg::None)] + #[case::foo("foo", CompressionAlg::None)] + #[case::gz("gzip", CompressionAlg::Gzip)] + #[case::br("br", CompressionAlg::Brotli)] + #[case::star("*", CompressionAlg::Brotli)] + #[case::gz_deflate_br("gzip, deflate, br", CompressionAlg::Brotli)] + #[case::preference("br;q=0.8, gzip;q=1.0, *;q=0.1", CompressionAlg::Gzip)] + fn t_parse_accept_encoding(#[case] accept: &str, #[case] expect: CompressionAlg) { + let mut headers = HeaderMap::new(); + headers.insert(header::ACCEPT_ENCODING, accept.parse().unwrap()); + + let mut files = BTreeMap::new(); + files.insert(CompressionAlg::None, 0); + files.insert(CompressionAlg::Gzip, 1); + files.insert(CompressionAlg::Brotli, 2); + + let (compression, file) = parse_accept_encoding(&headers, &files).unwrap(); + assert_eq!(compression, expect); + assert_eq!(file, files.get(&compression).unwrap()); + } +} diff --git a/tests/fixtures/mod.rs b/tests/fixtures/mod.rs index 9731d24..853e9f5 100644 --- a/tests/fixtures/mod.rs +++ b/tests/fixtures/mod.rs @@ -1,13 +1,18 @@ use std::{collections::BTreeMap, ops::Deref}; use hex_literal::hex; +use path_macro::path; use rstest::fixture; use temp_testdir::TempDir; use time::macros::datetime; -use talon::db::{ - model::{Version, Website}, - Db, +use talon::{ + config::{CompressionCfg, Config, ConfigInner}, + db::{ + model::{Version, Website}, + Db, + }, + storage::Storage, }; pub const SUBDOMAIN_1: &str = ""; @@ -24,13 +29,12 @@ pub const HASH_1_1_INDEX: [u8; 32] = pub const HASH_1_1_STYLE: [u8; 32] = hex!("356f131c825fbf604797c7e9c85352549d81db8af91fee834016d075110af026"); -pub struct DbWrap { +pub struct DbTest { db: Db, - #[allow(dead_code)] - temp: TempDir, + _temp: TempDir, } -impl Deref for DbWrap { +impl Deref for DbTest { type Target = Db; fn deref(&self) -> &Self::Target { @@ -39,34 +43,14 @@ impl Deref for DbWrap { } #[fixture] -pub fn db_sp() -> DbWrap { +pub fn db_empty() -> DbTest { let temp = temp_testdir::TempDir::default(); let db = Db::new(&temp).unwrap(); - db.insert_website( - SUBDOMAIN_1, - &Website { - name: "ThetaDev".to_owned(), - created_at: datetime!(2023-02-18 16:30 +0), - latest_version: Some(VERSION_1_2), - icon: Some( - hex!("9f7e7971b4bfdb75429e534dea461ed90340886925078cda252cada9aa0e25f7").to_vec(), - ), - color: Some(2068974), - visibility: talon::model::Visibility::Featured, - ..Default::default() - }, - ) - .unwrap(); - - DbWrap { db, temp } + DbTest { db, _temp: temp } } -#[fixture] -pub fn db() -> DbWrap { - let temp = temp_testdir::TempDir::default(); - let db = Db::new(&temp).unwrap(); - +fn insert_websites(db: &Db) { db.insert_website( SUBDOMAIN_1, &Website { @@ -169,6 +153,13 @@ pub fn db() -> DbWrap { }, ) .unwrap(); +} + +#[fixture] +pub fn db() -> DbTest { + let temp = temp_testdir::TempDir::default(); + let db = Db::new(&temp).unwrap(); + insert_websites(&db); db.insert_file(VERSION_1_1, "index.html", &HASH_1_1_INDEX) .unwrap(); @@ -232,5 +223,54 @@ pub fn db() -> DbWrap { ) .unwrap(); - DbWrap { db, temp } + DbTest { db, _temp: temp } +} + +pub struct StorageTest { + store: Storage, + _temp: TempDir, +} + +impl Deref for StorageTest { + type Target = Storage; + + fn deref(&self) -> &Self::Target { + &self.store + } +} + +#[fixture] +pub fn store() -> StorageTest { + let temp = temp_testdir::TempDir::default(); + let db_path = path!(temp / "db"); + std::fs::create_dir(&db_path).unwrap(); + + let cfg = Config::new(ConfigInner { + compression: CompressionCfg { + gzip_en: true, + brotli_en: true, + ..Default::default() + }, + ..Default::default() + }); + + let db = Db::new(&db_path).unwrap(); + insert_websites(&db); + + let store = Storage::new(temp.to_path_buf(), db, cfg); + + store + .insert_dir(path!("tests" / "testfiles" / "ThetaDev0"), VERSION_1_1) + .unwrap(); + store + .insert_dir(path!("tests" / "testfiles" / "ThetaDev1"), VERSION_1_2) + .unwrap(); + store + .insert_dir(path!("tests" / "testfiles" / "GenderEx"), VERSION_2_1) + .unwrap(); + store + .insert_dir(path!("tests" / "testfiles" / "RustyPipe"), VERSION_3_1) + .unwrap(); + + StorageTest { store, _temp: temp } } diff --git a/tests/snapshots/tests__config__default.snap b/tests/snapshots/tests__config__default.snap new file mode 100644 index 0000000..77b2c6f --- /dev/null +++ b/tests/snapshots/tests__config__default.snap @@ -0,0 +1,30 @@ +--- +source: tests/tests.rs +expression: "&cfg" +--- +ConfigInner( + server: ServerCfg( + address: "127.0.0.1", + port: 3000, + ), + compression: CompressionCfg( + gzip_en: true, + gzip_level: 6, + brotli_en: true, + brotli_level: 7, + ), + keys: { + "04e99561e3824f387a217d141d2a3b46375de6864afbedf9c9a2cc102bc946a4": KeyCfg( + domains: "/^talon-\\d+/", + ), + "21bdac19ffd22870d561b1d55b35eddd9029497107edb7b926aa3e7856bb409b": KeyCfg( + domains: [ + "spotify-gender-ex", + "rustypipe", + ], + ), + "c32ff286c8ac1c3102625badf38ffd251ae0c4a56079d8ba490f320af63f1f47": KeyCfg( + domains: "*", + ), + }, +) diff --git a/tests/snapshots/tests__config__sparse.snap b/tests/snapshots/tests__config__sparse.snap new file mode 100644 index 0000000..0c0aca2 --- /dev/null +++ b/tests/snapshots/tests__config__sparse.snap @@ -0,0 +1,28 @@ +--- +source: tests/tests.rs +expression: "&cfg" +--- +ConfigInner( + server: ServerCfg( + address: "0.0.0.0", + port: 8080, + ), + compression: CompressionCfg( + gzip_en: true, + gzip_level: 6, + brotli_en: false, + brotli_level: 7, + ), + keys: { + "04e99561e3824f387a217d141d2a3b46375de6864afbedf9c9a2cc102bc946a4": KeyCfg(), + "21bdac19ffd22870d561b1d55b35eddd9029497107edb7b926aa3e7856bb409b": KeyCfg( + domains: [ + "spotify-gender-ex", + "rustypipe", + ], + ), + "c32ff286c8ac1c3102625badf38ffd251ae0c4a56079d8ba490f320af63f1f47": KeyCfg( + domains: "*", + ), + }, +) diff --git a/tests/testfiles/RustyPipe/index.html b/tests/testfiles/RustyPipe/index.html index 8e5c0fb..452b963 100644 --- a/tests/testfiles/RustyPipe/index.html +++ b/tests/testfiles/RustyPipe/index.html @@ -17,6 +17,9 @@ Client for the public YouTube / YouTube Music API (Innertube), inspired by NewPipe.

+

+ Page 2 +

Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly diff --git a/tests/testfiles/RustyPipe/page2/index.html b/tests/testfiles/RustyPipe/page2/index.html new file mode 100644 index 0000000..697a387 --- /dev/null +++ b/tests/testfiles/RustyPipe/page2/index.html @@ -0,0 +1,109 @@ + + + + + + + + + + + RustyPipe #2 + + +

+

RustyPipe #2

+

+ Client for the public YouTube / YouTube Music API (Innertube), inspired by + NewPipe. +

+

+ Page 1 +

+

+ Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum + pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly + fruitcake icing pastry. Dragée dessert cupcake cake sesame snaps toffee pie. + Sweet roll sweet roll chupa chups jelly-o gummies tootsie roll sweet halvah oat + cake. Carrot cake carrot cake muffin bonbon sesame snaps brownie. Bonbon candy + macaroon fruitcake candy canes. Cake pudding danish liquorice cupcake jelly-o + ice cream. Liquorice lollipop danish tootsie roll toffee. Gingerbread chocolate + candy canes donut lemon drops apple pie danish bear claw. Caramels cake jelly + jelly sweet chocolate bar gingerbread icing. Cake soufflé lollipop pudding + marshmallow candy canes tootsie roll danish. +

+

+ Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum + pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly + fruitcake icing pastry. Dragée dessert cupcake cake sesame snaps toffee pie. + Sweet roll sweet roll chupa chups jelly-o gummies tootsie roll sweet halvah oat + cake. Carrot cake carrot cake muffin bonbon sesame snaps brownie. Bonbon candy + macaroon fruitcake candy canes. Cake pudding danish liquorice cupcake jelly-o + ice cream. Liquorice lollipop danish tootsie roll toffee. Gingerbread chocolate + candy canes donut lemon drops apple pie danish bear claw. Caramels cake jelly + jelly sweet chocolate bar gingerbread icing. Cake soufflé lollipop pudding + marshmallow candy canes tootsie roll danish. +

+

+ Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum + pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly + fruitcake icing pastry. Dragée dessert cupcake cake sesame snaps toffee pie. + Sweet roll sweet roll chupa chups jelly-o gummies tootsie roll sweet halvah oat + cake. Carrot cake carrot cake muffin bonbon sesame snaps brownie. Bonbon candy + macaroon fruitcake candy canes. Cake pudding danish liquorice cupcake jelly-o + ice cream. Liquorice lollipop danish tootsie roll toffee. Gingerbread chocolate + candy canes donut lemon drops apple pie danish bear claw. Caramels cake jelly + jelly sweet chocolate bar gingerbread icing. Cake soufflé lollipop pudding + marshmallow candy canes tootsie roll danish. +

+

+ Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum + pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly + fruitcake icing pastry. Dragée dessert cupcake cake sesame snaps toffee pie. + Sweet roll sweet roll chupa chups jelly-o gummies tootsie roll sweet halvah oat + cake. Carrot cake carrot cake muffin bonbon sesame snaps brownie. Bonbon candy + macaroon fruitcake candy canes. Cake pudding danish liquorice cupcake jelly-o + ice cream. Liquorice lollipop danish tootsie roll toffee. Gingerbread chocolate + candy canes donut lemon drops apple pie danish bear claw. Caramels cake jelly + jelly sweet chocolate bar gingerbread icing. Cake soufflé lollipop pudding + marshmallow candy canes tootsie roll danish. +

+

+ Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum + pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly + fruitcake icing pastry. Dragée dessert cupcake cake sesame snaps toffee pie. + Sweet roll sweet roll chupa chups jelly-o gummies tootsie roll sweet halvah oat + cake. Carrot cake carrot cake muffin bonbon sesame snaps brownie. Bonbon candy + macaroon fruitcake candy canes. Cake pudding danish liquorice cupcake jelly-o + ice cream. Liquorice lollipop danish tootsie roll toffee. Gingerbread chocolate + candy canes donut lemon drops apple pie danish bear claw. Caramels cake jelly + jelly sweet chocolate bar gingerbread icing. Cake soufflé lollipop pudding + marshmallow candy canes tootsie roll danish. +

+

+ Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum + pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly + fruitcake icing pastry. Dragée dessert cupcake cake sesame snaps toffee pie. + Sweet roll sweet roll chupa chups jelly-o gummies tootsie roll sweet halvah oat + cake. Carrot cake carrot cake muffin bonbon sesame snaps brownie. Bonbon candy + macaroon fruitcake candy canes. Cake pudding danish liquorice cupcake jelly-o + ice cream. Liquorice lollipop danish tootsie roll toffee. Gingerbread chocolate + candy canes donut lemon drops apple pie danish bear claw. Caramels cake jelly + jelly sweet chocolate bar gingerbread icing. Cake soufflé lollipop pudding + marshmallow candy canes tootsie roll danish. +

+

+ Carrot cake biscuit icing pudding danish topping powder. Croissant sugar plum + pudding halvah chocolate. Cotton candy tart cake bonbon tart. Shortbread jelly + fruitcake icing pastry. Dragée dessert cupcake cake sesame snaps toffee pie. + Sweet roll sweet roll chupa chups jelly-o gummies tootsie roll sweet halvah oat + cake. Carrot cake carrot cake muffin bonbon sesame snaps brownie. Bonbon candy + macaroon fruitcake candy canes. Cake pudding danish liquorice cupcake jelly-o + ice cream. Liquorice lollipop danish tootsie roll toffee. Gingerbread chocolate + candy canes donut lemon drops apple pie danish bear claw. Caramels cake jelly + jelly sweet chocolate bar gingerbread icing. Cake soufflé lollipop pudding + marshmallow candy canes tootsie roll danish. +

+
+ + diff --git a/tests/testfiles/config/config.toml b/tests/testfiles/config/config.toml new file mode 100644 index 0000000..c88ae29 --- /dev/null +++ b/tests/testfiles/config/config.toml @@ -0,0 +1,23 @@ +[server] +address = "127.0.0.1" +port = 3000 + +# Talon compresses files when they are uploaded +# Here you can configure compression algorithms and levels +[compression] +gzip_en = true +gzip_level = 6 +brotli_en = true +brotli_level = 7 + +# API keys for uploading websites +# You can configure the allowed domains per key (either a single string or a list of strings) +# Regexes can be used if they start and end with a slash +[keys.c32ff286c8ac1c3102625badf38ffd251ae0c4a56079d8ba490f320af63f1f47] +domains = "*" + +[keys.21bdac19ffd22870d561b1d55b35eddd9029497107edb7b926aa3e7856bb409b] +domains = ["spotify-gender-ex", "rustypipe"] + +[keys.04e99561e3824f387a217d141d2a3b46375de6864afbedf9c9a2cc102bc946a4] +domains = "/^talon-\\d+/" diff --git a/tests/testfiles/config/config_sparse.toml b/tests/testfiles/config/config_sparse.toml new file mode 100644 index 0000000..731bdfb --- /dev/null +++ b/tests/testfiles/config/config_sparse.toml @@ -0,0 +1,16 @@ +# Talon compresses files when they are uploaded +# Here you can configure compression algorithms and levels +[compression] +gzip_en = true +gzip_level = 6 + +# API keys for uploading websites +# You can configure the allowed domains per key (either a single string or a list of strings) +# Regexes can be used if they start and end with a slash +[keys.c32ff286c8ac1c3102625badf38ffd251ae0c4a56079d8ba490f320af63f1f47] +domains = "*" + +[keys.21bdac19ffd22870d561b1d55b35eddd9029497107edb7b926aa3e7856bb409b] +domains = ["spotify-gender-ex", "rustypipe"] + +[keys.04e99561e3824f387a217d141d2a3b46375de6864afbedf9c9a2cc102bc946a4] diff --git a/tests/tests.rs b/tests/tests.rs index f963f19..f6fd71e 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -6,12 +6,13 @@ use path_macro::path; use rstest::rstest; use fixtures::*; -use talon::db::{model::WebsiteUpdate, Db, DbError}; -use talon::storage::Storage; +use talon::db::{Db, DbError}; mod database { use super::*; + use talon::db::model::WebsiteUpdate; + fn get_export(db: &Db) -> String { let mut buf: Vec = Vec::new(); db.export(&mut buf).unwrap(); @@ -19,13 +20,13 @@ mod database { } #[rstest] - fn export(db: DbWrap) { + fn export(db: DbTest) { let data = get_export(&db); insta::assert_snapshot!("export", data); } #[rstest] - fn export_import(db: DbWrap) { + fn export_import(db: DbTest) { let td = temp_testdir::TempDir::default(); let p_export = td.join("export.jsonl"); let p_db2 = td.join("db2"); @@ -41,7 +42,7 @@ mod database { } #[rstest] - fn get_website(db: DbWrap) { + fn get_website(db: DbTest) { let ws1 = db.get_website("").unwrap(); let ws2 = db.get_website("spotify-gender-ex").unwrap(); let ws3 = db.get_website("rustypipe").unwrap(); @@ -49,7 +50,7 @@ mod database { } #[rstest] - fn delete_website(db: DbWrap) { + fn delete_website(db: DbTest) { db.delete_website("", true).unwrap(); assert!(matches!( @@ -67,7 +68,7 @@ mod database { } #[rstest] - fn update_website(db: DbWrap) { + fn update_website(db: DbTest) { db.update_website( "", WebsiteUpdate { @@ -87,19 +88,19 @@ mod database { } #[rstest] - fn get_websites(db: DbWrap) { + fn get_websites(db: DbTest) { let websites = db.get_websites().map(|w| w.unwrap()).collect::>(); insta::assert_ron_snapshot!(websites); } #[rstest] - fn get_version(db: DbWrap) { + fn get_version(db: DbTest) { let version = db.get_version("", VERSION_1_1).unwrap(); insta::assert_ron_snapshot!(version); } #[rstest] - fn delete_version(db: DbWrap) { + fn delete_version(db: DbTest) { db.delete_version("", VERSION_1_2, true).unwrap(); assert!(matches!( db.get_version("", VERSION_1_2).unwrap_err(), @@ -120,7 +121,7 @@ mod database { } #[rstest] - fn get_website_versions(db: DbWrap) { + fn get_website_versions(db: DbTest) { let versions = db .get_website_versions("") .map(|v| v.unwrap()) @@ -129,7 +130,7 @@ mod database { } #[rstest] - fn get_website_version_ids(db: DbWrap) { + fn get_website_version_ids(db: DbTest) { let ids = db .get_website_version_ids("") .map(|v| v.unwrap()) @@ -138,13 +139,13 @@ mod database { } #[rstest] - fn get_file(db: DbWrap) { + fn get_file(db: DbTest) { let hash = db.get_file(VERSION_1_1, "index.html").unwrap(); assert_eq!(hash, HASH_1_1_INDEX); } #[rstest] - fn delete_file(db: DbWrap) { + fn delete_file(db: DbTest) { db.delete_file(VERSION_1_1, "index.html", true).unwrap(); assert!(matches!( db.get_file(VERSION_1_1, "index.html").unwrap_err(), @@ -158,7 +159,7 @@ mod database { } #[rstest] - fn get_version_files(db: DbWrap) { + fn get_version_files(db: DbTest) { let files = db .get_version_files(VERSION_1_1) .map(|f| f.unwrap()) @@ -173,7 +174,7 @@ mod database { } #[rstest] - fn get_file_hashes(db: DbWrap) { + fn get_file_hashes(db: DbTest) { let hashes = db.get_file_hashes().unwrap(); assert_eq!(hashes.len(), 12) } @@ -181,18 +182,21 @@ mod database { mod storage { use hex::ToHex; + use poem::http::{header, HeaderMap}; + use talon::config::{CompressionCfg, Config, ConfigInner}; + use talon::storage::Storage; use super::*; #[rstest] - fn insert_files(db_sp: DbWrap) { + fn insert_files(db_empty: DbTest) { let dir = path!("tests" / "testfiles" / "ThetaDev1"); let temp = temp_testdir::TempDir::default(); - let store = Storage::new(temp.to_path_buf(), db_sp.clone()); + let store = Storage::new(temp.to_path_buf(), db_empty.clone(), Default::default()); store.insert_dir(dir, 1).unwrap(); - let files = db_sp + let files = db_empty .get_version_files(1) .map(|f| f.unwrap()) .collect::>(); @@ -206,16 +210,16 @@ mod storage { } #[rstest] - fn insert_zip_archive(db_sp: DbWrap) { + fn insert_zip_archive(db_empty: DbTest) { let archive = path!("tests" / "testfiles" / "archive" / "ThetaDev1.zip"); let temp = temp_testdir::TempDir::default(); - let store = Storage::new(temp.to_path_buf(), db_sp.clone()); + let store = Storage::new(temp.to_path_buf(), db_empty.clone(), Default::default()); store .insert_zip_archive(File::open(archive).unwrap(), 1) .unwrap(); - let files = db_sp + let files = db_empty .get_version_files(1) .map(|f| f.unwrap()) .collect::>(); @@ -229,16 +233,16 @@ mod storage { } #[rstest] - fn insert_tgz_archive(db_sp: DbWrap) { + fn insert_tgz_archive(db_empty: DbTest) { let archive = path!("tests" / "testfiles" / "archive" / "ThetaDev1.tar.gz"); let temp = temp_testdir::TempDir::default(); - let store = Storage::new(temp.to_path_buf(), db_sp.clone()); + let store = Storage::new(temp.to_path_buf(), db_empty.clone(), Default::default()); store .insert_tgz_archive(File::open(archive).unwrap(), 1) .unwrap(); - let files = db_sp + let files = db_empty .get_version_files(1) .map(|f| f.unwrap()) .collect::>(); @@ -250,4 +254,98 @@ mod storage { assert!(path.is_file()); } } + + #[rstest] + #[case::gzip(CompressionCfg {gzip_en: true, ..Default::default()}, "gz")] + #[case::brotli(CompressionCfg {brotli_en: true, ..Default::default()}, "br")] + fn insert_files_compressed( + db_empty: DbTest, + #[case] compression: CompressionCfg, + #[case] ext: &str, + ) { + let dir = path!("tests" / "testfiles" / "ThetaDev1"); + let temp = temp_testdir::TempDir::default(); + let cfg = Config::new(ConfigInner { + compression, + ..Default::default() + }); + + let store = Storage::new(temp.to_path_buf(), db_empty.clone(), cfg); + store.insert_dir(dir, 1).unwrap(); + + for f in db_empty.get_version_files(1) { + let hash = f.unwrap().1; + let hash_str = hash.encode_hex::(); + let path = temp.join(&hash_str[..2]).join(&hash_str); + let path_compressed = path.with_extension(ext); + assert!(path.is_file()); + + // Images should not be compressed + let expect = &hash_str + != "901d291a47a8a9b55c06f84e5e5f82fd2dcee65cac1406d6e878b805d45c1e93" + && &hash_str != "9f7e7971b4bfdb75429e534dea461ed90340886925078cda252cada9aa0e25f7"; + assert_eq!(path_compressed.is_file(), expect) + } + } + + #[rstest] + #[case::nocmp("", VERSION_1_2, "", true, "text/html", None)] + #[case::gzip("gzip", VERSION_1_2, "", true, "text/html", None)] + #[case::br("br", VERSION_1_2, "", true, "text/html", None)] + #[case::image("br", VERSION_1_2, "assets/image.jpg", false, "image/jpeg", None)] + #[case::subdir("br", VERSION_3_1, "page2", true, "text/html", Some("page2/"))] + fn get_file( + store: StorageTest, + #[case] encoding: &str, + #[case] version: u32, + #[case] path: &str, + #[case] compressible: bool, + #[case] mime: &str, + #[case] rd_path: Option<&str>, + ) { + let mut headers = HeaderMap::new(); + headers.insert(header::ACCEPT_ENCODING, encoding.parse().unwrap()); + + let expect_ext = if compressible { + match encoding { + "gzip" => Some("gz"), + "" => None, + e => Some(e), + } + } else { + None + }; + + let index_file = store.get_file(version, path, &headers).unwrap(); + assert!(index_file.file_path.is_file()); + assert_eq!( + index_file + .file_path + .extension() + .map(|s| s.to_str().unwrap()), + expect_ext + ); + assert_eq!( + index_file.encoding, + Some(encoding).filter(|s| compressible && !s.is_empty()) + ); + assert_eq!(index_file.mime.unwrap().essence_str(), mime); + assert_eq!(index_file.rd_path.as_deref(), rd_path); + } +} + +mod config { + use talon::config::Config; + + use super::*; + + #[rstest] + #[case::default("default", "config.toml")] + #[case::sparse("sparse", "config_sparse.toml")] + fn parse_config(#[case] name: &str, #[case] fname: &str) { + let p = path!("tests" / "testfiles" / "config" / fname); + let cfg = Config::from_file(p).unwrap(); + + insta::assert_ron_snapshot!(name, &cfg); + } }