diff --git a/.env.example b/.env.example deleted file mode 100644 index 4c3da3c..0000000 --- a/.env.example +++ /dev/null @@ -1,6 +0,0 @@ -CACHE_DIR=/tmp/artifactview -MAX_ARTIFACT_SIZE=100000000 -MAX_AGE_H=12 -# If you only want to access public repositories, -# create a fine-grained token with Public Repositories (read-only) access -GITHUB_TOKEN=github_pat_123456 diff --git a/.gitignore b/.gitignore index 4f83806..ea8c4bf 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1 @@ /target -/.env diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml deleted file mode 100644 index c77c173..0000000 --- a/.pre-commit-config.yaml +++ /dev/null @@ -1,12 +0,0 @@ -repos: - - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.3.0 - hooks: - - id: end-of-file-fixer - - - repo: https://github.com/cathiele/pre-commit-rust - rev: v0.1.0 - hooks: - - id: cargo-fmt - - id: cargo-clippy - args: ["--all", "--tests", "--", "-D", "warnings"] diff --git a/Cargo.lock b/Cargo.lock index 0af50cd..d885295 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,30 +17,6 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe" -[[package]] -name = "aes" -version = "0.8.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b169f7a6d4742236a0a00c541b845991d0ac43e546831af1249753ab4c3aa3a0" -dependencies = [ - "cfg-if", - "cipher", - "cpufeatures", -] - -[[package]] -name = "ahash" -version = "0.8.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e89da841a80418a9b391ebaea17f5c112ffaaa96f621d2c285b5174da76b9011" -dependencies = [ - "cfg-if", - "getrandom", - "once_cell", - "version_check", - "zerocopy", -] - [[package]] name = "aho-corasick" version = "1.1.3" @@ -50,80 +26,6 @@ dependencies = [ "memchr", ] -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - -[[package]] -name = "android_system_properties" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" -dependencies = [ - "libc", -] - -[[package]] -name = "annotate-snippets" -version = "0.9.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ccaf7e9dfbb6ab22c82e473cd1a8a7bd313c19a5b7e40970f3d89ef5a5c9e81e" -dependencies = [ - "unicode-width", - "yansi-term", -] - -[[package]] -name = "anstream" -version = "0.6.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "418c75fa768af9c03be99d17643f93f79bbba589895012a80e3452a19ddda15b" -dependencies = [ - "anstyle", - "anstyle-parse", - "anstyle-query", - "anstyle-wincon", - "colorchoice", - "is_terminal_polyfill", - "utf8parse", -] - -[[package]] -name = "anstyle" -version = "1.0.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "038dfcf04a5feb68e9c60b21c9625a54c2c0616e79b72b0fd87075a056ae1d1b" - -[[package]] -name = "anstyle-parse" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c03a11a9034d92058ceb6ee011ce58af4a9bf61491aa7e1e59ecd24bd40d22d4" -dependencies = [ - "utf8parse", -] - -[[package]] -name = "anstyle-query" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a64c907d4e79225ac72e2a354c9ce84d50ebb4586dee56c82b3ee73004f537f5" -dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "anstyle-wincon" -version = "3.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "61a38449feb7068f52bb06c12759005cf459ee52bb4adc1d5a7c4322d716fb19" -dependencies = [ - "anstyle", - "windows-sys 0.52.0", -] - [[package]] name = "anyhow" version = "1.0.86" @@ -131,99 +33,25 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b3d1d046238990b9cf5bcde22a3fb3584ee5cf65fb2765f454ed428c7a0063da" [[package]] -name = "array-init" -version = "0.0.4" +name = "arc-swap" +version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23589ecb866b460d3a0f1278834750268c607e8e28a1b982c907219f3178cd72" -dependencies = [ - "nodrop", -] +checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" [[package]] name = "artifactview" version = "0.1.0" dependencies = [ - "async_zip", - "axum", - "axum-extra", - "dotenvy", - "envy", - "flate2", - "futures-lite", - "headers", - "hex", - "http", - "mime", - "mime_guess", + "anyhow", + "arc-swap", "once_cell", - "path_macro", - "percent-encoding", - "pin-project", "proptest", - "quick_cache", - "rand", "regex", "reqwest", "rstest", "serde", - "serde-env", - "serde-hex", "serde_json", - "siphasher", - "thiserror", "tokio", - "tokio-util", - "tower-http", - "tracing", - "tracing-subscriber", - "url", - "yarte", - "yarte_helpers", -] - -[[package]] -name = "async-compression" -version = "0.4.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9c90a406b4495d129f00461241616194cb8a032c8d1c53c657f0961d5f8e0498" -dependencies = [ - "bzip2", - "deflate64", - "flate2", - "futures-core", - "futures-io", - "memchr", - "pin-project-lite", - "xz2", - "zstd 0.13.1", - "zstd-safe 7.1.0", -] - -[[package]] -name = "async-trait" -version = "0.1.80" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6fa2087f2753a7da8cc1c0dbfcf89579dd57458e36769de5ac750b4671737ca" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "async_zip" -version = "0.0.17" -dependencies = [ - "async-compression", - "chrono", - "crc32fast", - "env_logger", - "futures-lite", - "pin-project", - "thiserror", - "tokio", - "tokio-util", - "zip", ] [[package]] @@ -238,84 +66,6 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" -[[package]] -name = "axum" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3a6c9af12842a67734c9a2e355436e5d03b22383ed60cf13cd0c18fbfe3dcbcf" -dependencies = [ - "async-trait", - "axum-core", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "hyper", - "hyper-util", - "itoa", - "matchit", - "memchr", - "mime", - "percent-encoding", - "pin-project-lite", - "rustversion", - "serde", - "serde_json", - "serde_path_to_error", - "serde_urlencoded", - "sync_wrapper 1.0.1", - "tokio", - "tower", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "axum-core" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a15c63fd72d41492dc4f497196f5da1fb04fb7529e631d73630d1b491e47a2e3" -dependencies = [ - "async-trait", - "bytes", - "futures-util", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "rustversion", - "sync_wrapper 0.1.2", - "tower-layer", - "tower-service", - "tracing", -] - -[[package]] -name = "axum-extra" -version = "0.9.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0be6ea09c9b96cb5076af0de2e383bd2bc0c18f827cf1967bdd353e0b910d733" -dependencies = [ - "axum", - "axum-core", - "bytes", - "futures-util", - "headers", - "http", - "http-body", - "http-body-util", - "mime", - "pin-project-lite", - "serde", - "tower", - "tower-layer", - "tower-service", - "tracing", -] - [[package]] name = "backtrace" version = "0.3.71" @@ -331,24 +81,12 @@ dependencies = [ "rustc-demangle", ] -[[package]] -name = "base64" -version = "0.21.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" - [[package]] name = "base64" version = "0.22.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" -[[package]] -name = "base64ct" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c3c1a368f70d6cf7302d78f8f7093da241fb8e8807c05cc9e51a125895a6d5b" - [[package]] name = "bit-set" version = "0.5.3" @@ -376,64 +114,23 @@ version = "2.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf4b9d6a944f767f8e5e0db018570623c85f3d925ac718db4e06d0187adb21c1" -[[package]] -name = "block-buffer" -version = "0.10.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3078c7629b62d3f0439517fa394996acacc5cbc91c5a20d8c658e77abd503a71" -dependencies = [ - "generic-array", -] - [[package]] name = "bumpalo" version = "3.16.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "79296716171880943b8470b5f8d03aa55eb2e645a4874bdbb28adb49162e012c" -[[package]] -name = "byteorder" -version = "1.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" - [[package]] name = "bytes" version = "1.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "514de17de45fdb8dc022b1a7975556c53c86f9f0aa5f534b98977b171857c2c9" -[[package]] -name = "bzip2" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bdb116a6ef3f6c3698828873ad02c3014b3c85cadb88496095628e3ef1e347f8" -dependencies = [ - "bzip2-sys", - "libc", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.11+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "736a955f3fa7875102d57c82b8cac37ec45224a07fd32d58f9f7a186b6cd4cdc" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - [[package]] name = "cc" version = "1.0.98" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "41c270e7540d725e65ac7f1b212ac8ce349719624d7bcff99f8e2e488e8cf03f" -dependencies = [ - "jobserver", - "libc", - "once_cell", -] [[package]] name = "cfg-if" @@ -441,46 +138,6 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" -[[package]] -name = "chrono" -version = "0.4.38" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" -dependencies = [ - "android-tzdata", - "iana-time-zone", - "num-traits", - "windows-targets 0.52.5", -] - -[[package]] -name = "cipher" -version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "773f3b9af64447d2ce9850330c473515014aa235e6a783b02db81ff39e4a3dad" -dependencies = [ - "crypto-common", - "inout", -] - -[[package]] -name = "colorchoice" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0b6a852b24ab71dffc585bcb46eaf7959d175cb865a7152e35b348d1b2960422" - -[[package]] -name = "constant_time_eq" -version = "0.1.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" - -[[package]] -name = "convert_case" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6245d59a3e82a7fc217c5828a6692dbc6dfb63a0c8c90495621f7b9d79704a0e" - [[package]] name = "core-foundation" version = "0.9.4" @@ -497,91 +154,6 @@ version = "0.8.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "06ea2b9bc92be3c2baa9334a323ebca2d6f074ff852cd1d7b11064035cd3868f" -[[package]] -name = "cpufeatures" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53fe5e26ff1b7aef8bca9c6080520cfb8d9333c7568e1829cef191a9723e5504" -dependencies = [ - "libc", -] - -[[package]] -name = "crc32fast" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "crossbeam-utils" -version = "0.8.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80" - -[[package]] -name = "crypto-common" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bfb12502f3fc46cca1bb51ac28df9d618d813cdc3d2f25b9fe775a34af26bb3" -dependencies = [ - "generic-array", - "typenum", -] - -[[package]] -name = "deflate64" -version = "0.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83ace6c86376be0b6cdcf3fb41882e81d94b31587573d1cfa9d01cd06bba210d" - -[[package]] -name = "deranged" -version = "0.3.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b42b6fa04a440b495c8b04d0e71b707c585f83cb9cb28cf8cd0d976c315e31b4" -dependencies = [ - "powerfmt", -] - -[[package]] -name = "derive_more" -version = "0.99.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4fb810d30a7c1953f91334de7244731fc3f3c10d7fe163338a35b9f640960321" -dependencies = [ - "convert_case", - "proc-macro2", - "quote", - "rustc_version", - "syn 1.0.109", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "dotenvy" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1aaf95b3e5c8f23aa320147307562d361db0ae0d51242340f558153b4eb2439b" - -[[package]] -name = "dtoa" -version = "1.0.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dcbb2bf8e87535c23f7a8a321e364ce21462d0ff10cb6407820e8e96dfff6653" - [[package]] name = "encoding_rs" version = "0.8.34" @@ -591,36 +163,6 @@ dependencies = [ "cfg-if", ] -[[package]] -name = "env_filter" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea" -dependencies = [ - "log", - "regex", -] - -[[package]] -name = "env_logger" -version = "0.11.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9" -dependencies = [ - "anstream", - "anstyle", - "env_filter", - "humantime", - "log", -] - -[[package]] -name = "envy" -version = "0.4.2" -dependencies = [ - "serde", -] - [[package]] name = "equivalent" version = "1.0.1" @@ -643,16 +185,6 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" -[[package]] -name = "flate2" -version = "1.0.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae" -dependencies = [ - "crc32fast", - "miniz_oxide", -] - [[package]] name = "fnv" version = "1.0.7" @@ -698,25 +230,6 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" -[[package]] -name = "futures-io" -version = "0.3.30" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a44623e20b9681a318efdd71c299b6b222ed6f231972bfe2f224ebad6311f0c1" - -[[package]] -name = "futures-lite" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52527eb5074e35e9339c6b4e8d12600c7128b68fb25dcb9fa9dec18f7c25f3a5" -dependencies = [ - "fastrand", - "futures-core", - "futures-io", - "parking", - "pin-project-lite", -] - [[package]] name = "futures-sink" version = "0.3.30" @@ -741,16 +254,6 @@ dependencies = [ "pin-utils", ] -[[package]] -name = "generic-array" -version = "0.14.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "85649ca51fd72272d7821adaf274ad91c288277713d9c18820d8499a7ff69e9a" -dependencies = [ - "typenum", - "version_check", -] - [[package]] name = "getrandom" version = "0.2.15" @@ -799,51 +302,6 @@ version = "0.14.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e5274423e17b7c9fc20b6e7e208532f9b19825d82dfd615708b70edd83df41f1" -[[package]] -name = "headers" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "322106e6bd0cba2d5ead589ddb8150a13d7c4217cf80d7c4f682ca994ccc6aa9" -dependencies = [ - "base64 0.21.7", - "bytes", - "headers-core", - "http", - "httpdate", - "mime", - "sha1", -] - -[[package]] -name = "headers-core" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54b4a22553d4242c49fddb9ba998a99962b5cc6f22cb5a3482bec22522403ce4" -dependencies = [ - "http", -] - -[[package]] -name = "hermit-abi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" - -[[package]] -name = "hex" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7f24254aa9a54b5c858eaee2f5bccdb46aaf0e486a595ed5fd8f86ba55232a70" - -[[package]] -name = "hmac" -version = "0.12.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c49c37c09c17a53d937dfbb742eb3a961d65a994e6bcdcf37e7399d0cc8ab5e" -dependencies = [ - "digest", -] - [[package]] name = "http" version = "1.1.0" @@ -884,18 +342,6 @@ version = "1.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d897f394bad6a705d5f4104762e116a75639e470d80901eed05a860a95cb1904" -[[package]] -name = "httpdate" -version = "1.0.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" - -[[package]] -name = "humantime" -version = "2.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" - [[package]] name = "hyper" version = "1.3.1" @@ -909,10 +355,9 @@ dependencies = [ "http", "http-body", "httparse", - "httpdate", "itoa", "pin-project-lite", - "smallvec 1.13.2", + "smallvec", "tokio", "want", ] @@ -953,29 +398,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "iana-time-zone" -version = "0.1.60" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" -dependencies = [ - "android_system_properties", - "core-foundation-sys", - "iana-time-zone-haiku", - "js-sys", - "wasm-bindgen", - "windows-core", -] - -[[package]] -name = "iana-time-zone-haiku" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" -dependencies = [ - "cc", -] - [[package]] name = "idna" version = "0.5.0" @@ -996,42 +418,18 @@ dependencies = [ "hashbrown", ] -[[package]] -name = "inout" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a0c10553d664a4d0bcff9f4215d0aac67a639cc68ef660840afe309b807bc9f5" -dependencies = [ - "generic-array", -] - [[package]] name = "ipnet" version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" -[[package]] -name = "is_terminal_polyfill" -version = "1.70.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8478577c03552c21db0e2724ffb8986a5ce7af88107e6be5d2ee6e158c12800" - [[package]] name = "itoa" version = "1.0.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" -[[package]] -name = "jobserver" -version = "0.1.31" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d2b099aaa34a9751c5bf0878add70444e1ed2dd73f347be99003d4577277de6e" -dependencies = [ - "libc", -] - [[package]] name = "js-sys" version = "0.3.69" @@ -1065,45 +463,12 @@ version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "78b3ae25bc7c8c38cec158d1f2757ee79e9b3740fbc7ccf0e59e4b08d793fa89" -[[package]] -name = "lock_api" -version = "0.4.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07af8b9cdd281b7915f413fa73f29ebd5d55d0d3f0155584dade1ff18cea1b17" -dependencies = [ - "autocfg", - "scopeguard", -] - [[package]] name = "log" version = "0.4.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" -[[package]] -name = "lzma-sys" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] - -[[package]] -name = "matchit" -version = "0.7.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" - -[[package]] -name = "maybe-uninit" -version = "2.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" - [[package]] name = "memchr" version = "2.7.2" @@ -1116,16 +481,6 @@ version = "0.3.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6877bb514081ee2a7ff5ef9de3281f14a4dd4bceac4c09388074a6b5df8a139a" -[[package]] -name = "mime_guess" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4192263c238a5f0d0c6bfd21f336a313a4ce1c450542449ca191bb657b4642ef" -dependencies = [ - "mime", - "unicase", -] - [[package]] name = "miniz_oxide" version = "0.7.3" @@ -1164,28 +519,6 @@ dependencies = [ "tempfile", ] -[[package]] -name = "nodrop" -version = "0.1.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" - -[[package]] -name = "nu-ansi-term" -version = "0.46.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" -dependencies = [ - "overload", - "winapi", -] - -[[package]] -name = "num-conv" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" - [[package]] name = "num-traits" version = "0.2.19" @@ -1196,16 +529,6 @@ dependencies = [ "libm", ] -[[package]] -name = "num_cpus" -version = "1.16.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4161fcb6d602d4d2081af7c3a45852d875a03dd337a6bfdd6e06407b61342a43" -dependencies = [ - "hermit-abi", - "libc", -] - [[package]] name = "object" version = "0.32.2" @@ -1244,7 +567,7 @@ checksum = "a948666b637a0f465e8564c73e89d4dde00d72d4d473cc972f390fc3dcee7d9c" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1265,70 +588,6 @@ dependencies = [ "vcpkg", ] -[[package]] -name = "overload" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" - -[[package]] -name = "parking" -version = "2.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bb813b8af86854136c6922af0598d719255ecb2179515e6e7730d468f05c9cae" - -[[package]] -name = "parking_lot" -version = "0.12.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f1bf18183cf54e8d6059647fc3063646a1801cf30896933ec2311622cc4b9a27" -dependencies = [ - "lock_api", - "parking_lot_core", -] - -[[package]] -name = "parking_lot_core" -version = "0.9.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e401f977ab385c9e4e3ab30627d6f26d00e2c73eef317493c4ec6d468726cf8" -dependencies = [ - "cfg-if", - "libc", - "redox_syscall", - "smallvec 1.13.2", - "windows-targets 0.52.5", -] - -[[package]] -name = "password-hash" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7676374caaee8a325c9e7a2ae557f216c5563a171d6997b0ef8a65af35147700" -dependencies = [ - "base64ct", - "rand_core", - "subtle", -] - -[[package]] -name = "path_macro" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6e819bbd49d5939f682638fa54826bf1650abddcd65d000923de8ad63cc7d15" - -[[package]] -name = "pbkdf2" -version = "0.11.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83a0692ec44e4cf1ef28ca317f14f8f07da2d95ec3fa01f86e4467b725e60917" -dependencies = [ - "digest", - "hmac", - "password-hash", - "sha2", -] - [[package]] name = "percent-encoding" version = "2.3.1" @@ -1352,7 +611,7 @@ checksum = "2f38a4412a78282e09a2cf38d195ea5420d15ba0602cb375210efbc877243965" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1373,28 +632,12 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" -[[package]] -name = "prettyplease" -version = "0.1.25" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c8646e95016a7a6c4adea95bafa8a16baab64b583356217f2c85db4a39d9a86" -dependencies = [ - "proc-macro2", - "syn 1.0.109", -] - [[package]] name = "proc-macro2" version = "1.0.84" @@ -1430,18 +673,6 @@ version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a1d01941d82fa2ab50be1e79e6714289dd7cde78eba4c074bc5a4374f650dfe0" -[[package]] -name = "quick_cache" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "347e1a588d1de074eeb3c00eadff93db4db65aeb62aee852b1efd0949fe65b6c" -dependencies = [ - "ahash", - "equivalent", - "hashbrown", - "parking_lot", -] - [[package]] name = "quote" version = "1.0.36" @@ -1490,15 +721,6 @@ dependencies = [ "rand_core", ] -[[package]] -name = "redox_syscall" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "469052894dcb553421e483e4209ee581a45100d31b4018de03e5a7ad86374a7e" -dependencies = [ - "bitflags 2.5.0", -] - [[package]] name = "regex" version = "1.10.4" @@ -1540,7 +762,7 @@ version = "0.12.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "566cafdd92868e0939d3fb961bd0dc25fcfaaed179291093b3d43e6b3150ea10" dependencies = [ - "base64 0.22.1", + "base64", "bytes", "encoding_rs", "futures-core", @@ -1564,7 +786,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "sync_wrapper 0.1.2", + "sync_wrapper", "system-configuration", "tokio", "tokio-native-tls", @@ -1599,7 +821,7 @@ dependencies = [ "regex", "relative-path", "rustc_version", - "syn 2.0.66", + "syn", "unicode-ident", ] @@ -1637,7 +859,7 @@ version = "2.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "29993a25686778eb88d4189742cd713c9bce943bc54251a33509dc63cbacf73d" dependencies = [ - "base64 0.22.1", + "base64", "rustls-pki-types", ] @@ -1647,12 +869,6 @@ version = "1.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "976295e77ce332211c0d24d92c0e83e50f5c5f046d11082cea19f3df13a3562d" -[[package]] -name = "rustversion" -version = "1.0.17" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "955d28af4278de8121b7ebeb796b6a45735dc01436d898801014aced2773a3d6" - [[package]] name = "rusty-fork" version = "0.3.0" @@ -1680,12 +896,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "scopeguard" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" - [[package]] name = "security-framework" version = "2.11.0" @@ -1724,28 +934,6 @@ dependencies = [ "serde_derive", ] -[[package]] -name = "serde-env" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c68119a0846249fd6f4b38561b4b4727dbc4fd9fea074f1253bca7d50440ce58" -dependencies = [ - "anyhow", - "log", - "serde", -] - -[[package]] -name = "serde-hex" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca37e3e4d1b39afd7ff11ee4e947efae85adfddf4841787bfa47c470e96dc26d" -dependencies = [ - "array-init", - "serde", - "smallvec 0.6.14", -] - [[package]] name = "serde_derive" version = "1.0.203" @@ -1754,7 +942,7 @@ checksum = "500cbc0ebeb6f46627f50f3f5811ccf6bf00643be300b4c3eabc0ef55dc5b5ba" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -1768,16 +956,6 @@ dependencies = [ "serde", ] -[[package]] -name = "serde_path_to_error" -version = "0.1.16" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "af99884400da37c88f5e9146b7f1fd0fbcae8f6eec4e9da38b67d05486f814a6" -dependencies = [ - "itoa", - "serde", -] - [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -1790,52 +968,6 @@ dependencies = [ "serde", ] -[[package]] -name = "sha1" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3bf829a2d51ab4a5ddf1352d8470c140cadc8301b2ae1789db023f01cedd6ba" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sha2" -version = "0.10.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "793db75ad2bcafc3ffa7c68b215fee268f537982cd901d132f89c6343f3a3dc8" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sharded-slab" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" -dependencies = [ - "lazy_static", -] - -[[package]] -name = "signal-hook-registry" -version = "1.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9e9e0b4211b72e7b8b6e85c807d36c212bdb33ea8587f7569562a84df5465b1" -dependencies = [ - "libc", -] - -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "slab" version = "0.4.9" @@ -1845,15 +977,6 @@ dependencies = [ "autocfg", ] -[[package]] -name = "smallvec" -version = "0.6.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b97fcaeba89edba30f044a10c6a3cc39df9c3f17d7cd829dd1446cab35f890e0" -dependencies = [ - "maybe-uninit", -] - [[package]] name = "smallvec" version = "1.13.2" @@ -1870,23 +993,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "subtle" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "81cdd64d312baedb58e21336b31bc043b77e01cc99033ce76ef539f78e965ebc" - -[[package]] -name = "syn" -version = "1.0.109" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" -dependencies = [ - "proc-macro2", - "quote", - "unicode-ident", -] - [[package]] name = "syn" version = "2.0.66" @@ -1904,12 +1010,6 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" -[[package]] -name = "sync_wrapper" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7065abeca94b6a8a577f9bd45aa0867a2238b74e8eb67cf10d492bc39351394" - [[package]] name = "system-configuration" version = "0.5.1" @@ -1943,55 +1043,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "thiserror" -version = "1.0.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c546c80d6be4bc6a00c0f01730c08df82eaa7a7a61f11d656526506112cc1709" -dependencies = [ - "thiserror-impl", -] - -[[package]] -name = "thiserror-impl" -version = "1.0.61" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46c3384250002a6d5af4d114f2845d37b57521033f30d5c3f46c4d70e1197533" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "thread_local" -version = "1.1.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" -dependencies = [ - "cfg-if", - "once_cell", -] - -[[package]] -name = "time" -version = "0.3.36" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5dfd88e563464686c916c7e46e623e520ddc6d79fa6641390f2e3fa86e83e885" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde", - "time-core", -] - -[[package]] -name = "time-core" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef927ca75afb808a4d64dd374f00a2adf8d0fcff8e7b184af886c3c87ec4a3f3" - [[package]] name = "tinyvec" version = "1.6.0" @@ -2017,10 +1068,7 @@ dependencies = [ "bytes", "libc", "mio", - "num_cpus", - "parking_lot", "pin-project-lite", - "signal-hook-registry", "socket2", "tokio-macros", "windows-sys 0.48.0", @@ -2034,7 +1082,7 @@ checksum = "5b8a1e28f2deaa14e508979454cb3a223b10b938b45af148bc0986de36f1923b" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", ] [[package]] @@ -2055,21 +1103,11 @@ checksum = "9cf6b47b3771c49ac75ad09a6162f53ad4b8088b76ac60e8ec1455b31a189fe1" dependencies = [ "bytes", "futures-core", - "futures-io", "futures-sink", "pin-project-lite", "tokio", ] -[[package]] -name = "toml" -version = "0.5.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4f7f0dd8d50a853a531c426359045b1998f04219d88799810762cd4ad314234" -dependencies = [ - "serde", -] - [[package]] name = "tower" version = "0.4.13" @@ -2083,24 +1121,6 @@ dependencies = [ "tokio", "tower-layer", "tower-service", - "tracing", -] - -[[package]] -name = "tower-http" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" -dependencies = [ - "bitflags 2.5.0", - "bytes", - "http", - "http-body", - "http-body-util", - "pin-project-lite", - "tower-layer", - "tower-service", - "tracing", ] [[package]] @@ -2121,23 +1141,10 @@ version = "0.1.40" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ - "log", "pin-project-lite", - "tracing-attributes", "tracing-core", ] -[[package]] -name = "tracing-attributes" -version = "0.1.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - [[package]] name = "tracing-core" version = "0.1.32" @@ -2145,32 +1152,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" dependencies = [ "once_cell", - "valuable", -] - -[[package]] -name = "tracing-log" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] - -[[package]] -name = "tracing-subscriber" -version = "0.3.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" -dependencies = [ - "nu-ansi-term", - "sharded-slab", - "smallvec 1.13.2", - "thread_local", - "tracing-core", - "tracing-log", ] [[package]] @@ -2179,27 +1160,12 @@ version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" -[[package]] -name = "typenum" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "42ff0bf0c66b8238c6f3b578df37d0b7848e55df8577b3f74f92a69acceeb825" - [[package]] name = "unarray" version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eaea85b334db583fe3274d12b4cd1880032beab409c0d774be044d4480ab9a94" -[[package]] -name = "unicase" -version = "2.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7d2d4dafb69621809a81864c9c1b864479e1235c0dd4e199924b9742439ed89" -dependencies = [ - "version_check", -] - [[package]] name = "unicode-bidi" version = "0.3.15" @@ -2221,18 +1187,6 @@ dependencies = [ "tinyvec", ] -[[package]] -name = "unicode-width" -version = "0.1.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "68f5e5f3158ecfd4b8ff6fe086db7c8467a2dfdac97fe420f2b7c4aa97af66d6" - -[[package]] -name = "unicode-xid" -version = "0.2.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" - [[package]] name = "url" version = "2.5.0" @@ -2244,46 +1198,12 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "utf8parse" -version = "0.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" - -[[package]] -name = "v_eval" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0dd8b599d797eb038d0dde9a3860aacb6bbba3bffa4ac64f807c8673820cc9d9" -dependencies = [ - "regex", - "syn 1.0.109", -] - -[[package]] -name = "v_htmlescape" -version = "0.15.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4e8257fbc510f0a46eb602c10215901938b5c2a7d5e70fc11483b1d3c9b5b18c" - -[[package]] -name = "valuable" -version = "0.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" - [[package]] name = "vcpkg" version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" -[[package]] -name = "version_check" -version = "0.9.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" - [[package]] name = "wait-timeout" version = "0.2.0" @@ -2329,7 +1249,7 @@ dependencies = [ "once_cell", "proc-macro2", "quote", - "syn 2.0.66", + "syn", "wasm-bindgen-shared", ] @@ -2363,7 +1283,7 @@ checksum = "e94f17b526d0a461a191c78ea52bbce64071ed5c04c9ffe424dcb38f74171bb7" dependencies = [ "proc-macro2", "quote", - "syn 2.0.66", + "syn", "wasm-bindgen-backend", "wasm-bindgen-shared", ] @@ -2384,37 +1304,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "winapi" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" -dependencies = [ - "winapi-i686-pc-windows-gnu", - "winapi-x86_64-pc-windows-gnu", -] - -[[package]] -name = "winapi-i686-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" - -[[package]] -name = "winapi-x86_64-pc-windows-gnu" -version = "0.4.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" - -[[package]] -name = "windows-core" -version = "0.52.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" -dependencies = [ - "windows-targets 0.52.5", -] - [[package]] name = "windows-sys" version = "0.48.0" @@ -2563,193 +1452,3 @@ dependencies = [ "cfg-if", "windows-sys 0.48.0", ] - -[[package]] -name = "xz2" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] - -[[package]] -name = "yansi-term" -version = "0.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fe5c30ade05e61656247b2e334a031dfd0cc466fadef865bdcdea8d537951bf1" -dependencies = [ - "winapi", -] - -[[package]] -name = "yarte" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfce1df93f3b16e5272221a559e60bbbaaa71dbc042a43996d223e51a690aab2" -dependencies = [ - "yarte_derive", - "yarte_helpers", -] - -[[package]] -name = "yarte_codegen" -version = "0.15.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a79312078b97a195de91a8c1457c2e0d7abd97e6e605f3cdeb01b3c105d2cff" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "yarte_helpers", - "yarte_hir", -] - -[[package]] -name = "yarte_derive" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b296edd7e1a81717b6f794baa2de8dfe89646050847161550b2d963b3ca6fe80" -dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", - "yarte_codegen", - "yarte_helpers", - "yarte_hir", - "yarte_parser", -] - -[[package]] -name = "yarte_helpers" -version = "0.15.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d1076f8cee9541ea5ffbecd9102f751252c91f085e7d30a18a3ce805ebd3ee" -dependencies = [ - "dtoa", - "itoa", - "prettyplease", - "serde", - "syn 1.0.109", - "toml", - "v_htmlescape", -] - -[[package]] -name = "yarte_hir" -version = "0.15.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dee42d2f704a3b1d8bc111d47a705d1302a0943d85e4c230f4e8300ee0dde4a6" -dependencies = [ - "derive_more", - "proc-macro2", - "quote", - "syn 1.0.109", - "v_eval", - "v_htmlescape", - "yarte_helpers", - "yarte_parser", -] - -[[package]] -name = "yarte_parser" -version = "0.15.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "538f72049cf7104e12d5c444048d112cb8fc788a31308afd912442a381ba860c" -dependencies = [ - "annotate-snippets", - "derive_more", - "proc-macro2", - "quote", - "serde", - "syn 1.0.109", - "unicode-xid", - "yarte_helpers", -] - -[[package]] -name = "zerocopy" -version = "0.7.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ae87e3fcd617500e5d106f0380cf7b77f3c6092aae37191433159dda23cfb087" -dependencies = [ - "zerocopy-derive", -] - -[[package]] -name = "zerocopy-derive" -version = "0.7.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "15e934569e47891f7d9411f1a451d947a60e000ab3bd24fbb970f000387d1b3b" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.66", -] - -[[package]] -name = "zip" -version = "0.6.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "760394e246e4c28189f19d488c058bf16f564016aefac5d32bb1f3b51d5e9261" -dependencies = [ - "aes", - "byteorder", - "bzip2", - "constant_time_eq", - "crc32fast", - "crossbeam-utils", - "flate2", - "hmac", - "pbkdf2", - "sha1", - "time", - "zstd 0.11.2+zstd.1.5.2", -] - -[[package]] -name = "zstd" -version = "0.11.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "20cc960326ece64f010d2d2107537f26dc589a6573a316bd5b1dba685fa5fde4" -dependencies = [ - "zstd-safe 5.0.2+zstd.1.5.2", -] - -[[package]] -name = "zstd" -version = "0.13.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d789b1514203a1120ad2429eae43a7bd32b90976a7bb8a05f7ec02fa88cc23a" -dependencies = [ - "zstd-safe 7.1.0", -] - -[[package]] -name = "zstd-safe" -version = "5.0.2+zstd.1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d2a5585e04f9eea4b2a3d1eca508c4dee9592a89ef6f450c11719da0726f4db" -dependencies = [ - "libc", - "zstd-sys", -] - -[[package]] -name = "zstd-safe" -version = "7.1.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cd99b45c6bc03a018c8b8a86025678c87e55526064e38f9df301989dce7ec0a" -dependencies = [ - "zstd-sys", -] - -[[package]] -name = "zstd-sys" -version = "2.0.10+zstd.1.5.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c253a4914af5bafc8fa8c86ee400827e83cf6ec01195ec1f1ed8441bf00d65aa" -dependencies = [ - "cc", - "pkg-config", -] diff --git a/Cargo.toml b/Cargo.toml index 1f8173d..c181bb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -4,47 +4,15 @@ version = "0.1.0" edition = "2021" [dependencies] -async_zip = { path = "crates/async_zip", features = ["tokio", "tokio-fs", "deflate"] } -axum = { version = "0.7.5", features = ["http2"] } -axum-extra = { version = "0.9.3", features = ["typed-header"] } -dotenvy = "0.15.7" -envy = { path = "crates/envy" } -flate2 = "1.0.30" -futures-lite = "2.3.0" -headers = "0.4.0" -hex = "0.4.3" -http = "1.1.0" -mime = "0.3.17" -mime_guess = "2.0.4" +anyhow = "1.0.86" +arc-swap = "1.7.1" once_cell = "1.19.0" -path_macro = "1.0.0" -percent-encoding = "2.3.1" -pin-project = "1.1.5" -quick_cache = "0.5.1" -rand = "0.8.5" regex = "1.10.4" reqwest = { version = "0.12.4", features = ["json"] } serde = { version = "1.0.203", features = ["derive"] } -serde-env = "0.1.1" -serde-hex = "0.1.0" serde_json = "1.0.117" -siphasher = "1.0.1" -thiserror = "1.0.61" -tokio = { version = "1.37.0", features = ["macros", "fs", "rt-multi-thread"] } -tokio-util = { version = "0.7.11", features = ["io"] } -tower-http = { version = "0.5.2", features = ["trace"] } -tracing = "0.1.40" -tracing-subscriber = "0.3.18" -url = "2.5.0" -yarte = "0.15.7" - -[build-dependencies] -yarte_helpers = "0.15.8" +tokio = {version = "1.37.0", features = ["macros"]} [dev-dependencies] proptest = "1.4.0" rstest = { version = "0.19.0", default-features = false } - -[workspace] -members = [".", "crates/*"] -resolver = "2" diff --git a/Justfile b/Justfile deleted file mode 100644 index 7e99822..0000000 --- a/Justfile +++ /dev/null @@ -1,30 +0,0 @@ -test: - cargo test - -release: - #!/usr/bin/env bash - set -e - - CRATE="artifactview" - CHANGELOG="CHANGELOG.md" - - VERSION=$(cargo pkgid --package "$CRATE" | tr '#@' '\n' | tail -n 1) - TAG="v${VERSION}" - echo "Releasing $TAG:" - - if git rev-parse "$TAG" >/dev/null 2>&1; then echo "version tag $TAG already exists"; exit 1; fi - - CLIFF_ARGS="--tag '${TAG}' --unreleased" - echo "git-cliff $CLIFF_ARGS" - if [ -f "$CHANGELOG" ]; then - eval "git-cliff $CLIFF_ARGS --prepend '$CHANGELOG'" - else - eval "git-cliff $CLIFF_ARGS --output '$CHANGELOG'" - fi - - git add "$CHANGELOG" - git commit -m "chore(release): release $CRATE v$VERSION" - - awk 'BEGIN{RS="(^|\n)## [^\n]+\n*"} NR==2 { print }' "$CHANGELOG" | git tag -as -F - --cleanup whitespace "$TAG" - - echo "🚀 Run 'git push origin $TAG' to publish" diff --git a/README.md b/README.md index 6d078a3..69cc74b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# Artifactview +# artifactview View CI build artifacts from Forgejo/Github using your web browser. @@ -20,21 +20,4 @@ status code 404 if no file was found. Artifactview accepts URLs in the given format: `-------.example.com` -Example: `https://github-com--theta-dev--example-project--4-11.example.com` - -## Security considerations - -It is recommended to use the whitelist feature to limit Artifactview to access only trusted -servers, users and organizations. - -Since many -[well-known URIs](https://www.iana.org/assignments/well-known-uris/well-known-uris.xhtml) -are used to configure security-relevant properties of a website or are used to attest -ownership of a website (like `.well-known/acme-challenge` for issuing TLS certificates), -Artifactview will serve no files from the `.well-known` folder. - -There is a configurable limit for both the maximum downloaded artifact size and the -maximum size of individual files to be served (100MB by default). -Additionally there is a configurable timeout for the zip file indexing operation. -These measures should protect the server againt denial-of-service attacks like -overfilling the server drive or uploading zip bombs. +Example: `github-com--theta-dev--example-project--4-11.example.com` diff --git a/build.rs b/build.rs deleted file mode 100644 index 7745f71..0000000 --- a/build.rs +++ /dev/null @@ -1,3 +0,0 @@ -fn main() { - yarte_helpers::recompile::when_changed(); -} diff --git a/crates/async_zip/.cargo-ok b/crates/async_zip/.cargo-ok deleted file mode 100644 index 59cae28..0000000 --- a/crates/async_zip/.cargo-ok +++ /dev/null @@ -1 +0,0 @@ -{"v":1} diff --git a/crates/async_zip/.cargo_vcs_info.json b/crates/async_zip/.cargo_vcs_info.json deleted file mode 100644 index 5f86548..0000000 --- a/crates/async_zip/.cargo_vcs_info.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "git": { - "sha1": "e4ee7a521f624aea3c2c3eef6b78fb1ec057504b" - }, - "path_in_vcs": "" -} diff --git a/crates/async_zip/.github/dependabot.yml b/crates/async_zip/.github/dependabot.yml deleted file mode 100644 index 60ab683..0000000 --- a/crates/async_zip/.github/dependabot.yml +++ /dev/null @@ -1,12 +0,0 @@ -version: 2 -updates: - - package-ecosystem: "github-actions" - # Workflow files stored in the - # default location of `.github/workflows` - directory: "/" - schedule: - interval: "daily" - - package-ecosystem: "cargo" - directory: "/" - schedule: - interval: "daily" diff --git a/crates/async_zip/.github/workflows/ci-clippy.yml b/crates/async_zip/.github/workflows/ci-clippy.yml deleted file mode 100644 index d3b12e7..0000000 --- a/crates/async_zip/.github/workflows/ci-clippy.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: clippy (Linux) - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Run clippy - run: cargo clippy --all-features -- -D clippy::all diff --git a/crates/async_zip/.github/workflows/ci-fmt.yml b/crates/async_zip/.github/workflows/ci-fmt.yml deleted file mode 100644 index ea6d7e4..0000000 --- a/crates/async_zip/.github/workflows/ci-fmt.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: rustfmt (Linux) - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - name: Run rustfmt - run: cargo fmt --check diff --git a/crates/async_zip/.github/workflows/ci-linux.yml b/crates/async_zip/.github/workflows/ci-linux.yml deleted file mode 100644 index 6d81998..0000000 --- a/crates/async_zip/.github/workflows/ci-linux.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: Test (Linux) - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Test [no features] - run: cargo test --verbose - - - name: Test ['chrono' feature] - run: cargo test --verbose --features chrono - - - name: Test ['tokio' feature] - run: cargo test --verbose --features tokio - - - name: Test ['tokio-fs' feature] - run: cargo test --verbose --features tokio-fs - - - name: Test ['deflate' feature] - run: cargo test --verbose --features deflate - - - name: Test ['bzip2' feature] - run: cargo test --verbose --features bzip2 - - - name: Test ['lzma' feature] - run: cargo test --verbose --features lzma - - - name: Test ['zstd' feature] - run: cargo test --verbose --features zstd - - - name: Test ['xz' feature] - run: cargo test --verbose --features xz - - - name: Test ['deflate64' feature] - run: cargo test --verbose --features deflate64 - - - name: Test ['full' feature] - run: cargo test --verbose --features full diff --git a/crates/async_zip/.github/workflows/ci-typos.yml b/crates/async_zip/.github/workflows/ci-typos.yml deleted file mode 100644 index 9e60d51..0000000 --- a/crates/async_zip/.github/workflows/ci-typos.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: typos (Linux) - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - - name: Install typos - run: cargo install typos-cli - - - name: Run typos - run: typos --format brief diff --git a/crates/async_zip/.github/workflows/ci-wasm.yml b/crates/async_zip/.github/workflows/ci-wasm.yml deleted file mode 100644 index 3214a73..0000000 --- a/crates/async_zip/.github/workflows/ci-wasm.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: Build (WASM) - -on: - push: - branches: [ main ] - pull_request: - branches: [ main ] - -env: - CARGO_TERM_COLOR: always - -jobs: - build: - name: Build ['full-wasm' feature] on ${{ matrix.target }} - runs-on: ubuntu-latest - strategy: - matrix: - target: - - wasm32-wasi - - wasm32-unknown-unknown - steps: - - uses: actions/checkout@v4 - - run: rustup target add ${{ matrix.target }} - - run: cargo build --verbose --target ${{ matrix.target }} --features full-wasm diff --git a/crates/async_zip/.gitignore b/crates/async_zip/.gitignore deleted file mode 100644 index a08f02c..0000000 --- a/crates/async_zip/.gitignore +++ /dev/null @@ -1,15 +0,0 @@ -# Generated by Cargo -# will have compiled files and executables -/target/ -/examples/**/target/ - -# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries -# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html -/Cargo.lock - -# These are backup files generated by rustfmt -**/*.rs.bk -/examples/**/*.rs.bk - -# Ignore generated zip test file that is large -/src/tests/read/zip64/zip64many.zip diff --git a/crates/async_zip/Cargo.toml b/crates/async_zip/Cargo.toml deleted file mode 100644 index 2f32d76..0000000 --- a/crates/async_zip/Cargo.toml +++ /dev/null @@ -1,63 +0,0 @@ -[package] -name = "async_zip" -version = "0.0.17" -edition = "2021" -authors = ["Harry [hello@majored.pw]"] -repository = "https://github.com/Majored/rs-async-zip" -description = "An asynchronous ZIP archive reading/writing crate." -readme = "README.md" -license = "MIT" -documentation = "https://docs.rs/async_zip/" -homepage = "https://github.com/Majored/rs-async-zip" -keywords = ["async", "zip", "archive", "tokio"] -categories = ["asynchronous", "compression"] - -[features] -full = ["chrono", "tokio-fs", "deflate", "bzip2", "lzma", "zstd", "xz", "deflate64"] - -# All features that are compatible with WASM -full-wasm = ["chrono", "deflate", "zstd"] - -tokio = ["dep:tokio", "tokio-util", "tokio/io-util"] -tokio-fs = ["tokio/fs"] - -deflate = ["async-compression/deflate"] -bzip2 = ["async-compression/bzip2"] -lzma = ["async-compression/lzma"] -zstd = ["async-compression/zstd"] -xz = ["async-compression/xz"] -deflate64 = ["async-compression/deflate64"] - -[package.metadata.docs.rs] -all-features = true -# defines the configuration attribute `docsrs` -rustdoc-args = ["--cfg", "docsrs"] - -[dependencies] -crc32fast = "1" -futures-lite = { version = "2.1.0", default-features = false, features = ["std"] } -pin-project = "1" -thiserror = "1" - -async-compression = { version = "0.4.2", default-features = false, features = ["futures-io"], optional = true } -chrono = { version = "0.4", default-features = false, features = ["clock"], optional = true } -tokio = { version = "1", default-features = false, optional = true } -tokio-util = { version = "0.7", features = ["compat"], optional = true } - -[dev-dependencies] -# tests -tokio = { version = "1", features = ["full"] } -tokio-util = { version = "0.7", features = ["compat"] } -env_logger = "0.11.2" -zip = "0.6.3" - -# shared across multiple examples -# anyhow = "1" -# sanitize-filename = "0.5" - -# actix_multipart -# actix-web = "4" -# actix-multipart = "0.6" -# futures = "0.3" -# derive_more = "0.99" -# uuid = { version = "1", features = ["v4", "serde"] } diff --git a/crates/async_zip/LICENSE b/crates/async_zip/LICENSE deleted file mode 100644 index ea2b727..0000000 --- a/crates/async_zip/LICENSE +++ /dev/null @@ -1,22 +0,0 @@ -MIT License - -Copyright (c) 2021 Harry -Copyright (c) 2023 Cognite AS - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. diff --git a/crates/async_zip/README.md b/crates/async_zip/README.md deleted file mode 100644 index 6e6a32a..0000000 --- a/crates/async_zip/README.md +++ /dev/null @@ -1,81 +0,0 @@ -# async_zip -[![Crates.io](https://img.shields.io/crates/v/async_zip?style=flat-square)](https://crates.io/crates/async_zip) -[![Crates.io](https://img.shields.io/crates/d/async_zip?style=flat-square)](https://crates.io/crates/async_zip) -[![docs.rs](https://img.shields.io/docsrs/async_zip?style=flat-square)](https://docs.rs/async_zip/) -[![GitHub Workflow Status (branch)](https://img.shields.io/github/actions/workflow/status/Majored/rs-async-zip/ci-linux.yml?branch=main&style=flat-square)](https://github.com/Majored/rs-async-zip/actions?query=branch%3Amain) -[![GitHub](https://img.shields.io/github/license/Majored/rs-async-zip?style=flat-square)](https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -An asynchronous ZIP archive reading/writing crate. - -## Features -- A base implementation atop `futures`'s IO traits. -- An extended implementation atop `tokio`'s IO traits. -- Support for Stored, Deflate, bzip2, LZMA, zstd, and xz compression methods. -- Various different reading approaches (seek, stream, filesystem, in-memory buffer, etc). -- Support for writing complete data (u8 slices) or streams using data descriptors. -- Initial support for ZIP64 reading and writing. -- Aims for reasonable [specification](https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md) compliance. - -## Installation & Basic Usage - -```toml -[dependencies] -async_zip = { version = "0.0.17", features = ["full"] } -``` - -A (soon to be) extensive list of [examples](https://github.com/Majored/rs-async-zip/tree/main/examples) can be found under the `/examples` directory. - -### Feature Flags -- `full` - Enables all below features. -- `full-wasm` - Enables all below features that are compatible with WASM. -- `chrono` - Enables support for parsing dates via `chrono`. -- `tokio` - Enables support for the `tokio` implementation module. -- `tokio-fs` - Enables support for the `tokio::fs` reading module. -- `deflate` - Enables support for the Deflate compression method. -- `bzip2` - Enables support for the bzip2 compression method. -- `lzma` - Enables support for the LZMA compression method. -- `zstd` - Enables support for the zstd compression method. -- `xz` - Enables support for the xz compression method. - -### Reading -```rust -use tokio::{io::BufReader, fs::File}; -use async_zip::tokio::read::seek::ZipFileReader; -... - -let mut file = BufReader::new(File::open("./Archive.zip").await?); -let mut zip = ZipFileReader::with_tokio(&mut file).await?; - -let mut string = String::new(); -let mut reader = zip.reader_with_entry(0).await?; -reader.read_to_string_checked(&mut string).await?; - -println!("{}", string); -``` - -### Writing -```rust -use async_zip::tokio::write::ZipFileWriter; -use async_zip::{Compression, ZipEntryBuilder}; -use tokio::fs::File; -... - -let mut file = File::create("foo.zip").await?; -let mut writer = ZipFileWriter::with_tokio(&mut file); - -let data = b"This is an example file."; -let builder = ZipEntryBuilder::new("bar.txt".into(), Compression::Deflate); - -writer.write_entry_whole(builder, data).await?; -writer.close().await?; -``` - -## Contributions -Whilst I will be continuing to maintain this crate myself, reasonable specification compliance is a huge undertaking for a single individual. As such, contributions will always be encouraged and appreciated. - -No contribution guidelines exist but additions should be developed with readability in mind, with appropriate comments, and make use of `rustfmt`. - -## Issues & Support -Whether you're wanting to report a bug you've come across during use of this crate or are seeking general help/assistance, please utilise the [issues tracker](https://github.com/Majored/rs-async-zip/issues) and provide as much detail as possible (eg. recreation steps). - -I try to respond to issues within a reasonable timeframe. diff --git a/crates/async_zip/SPECIFICATION.md b/crates/async_zip/SPECIFICATION.md deleted file mode 100644 index 707311e..0000000 --- a/crates/async_zip/SPECIFICATION.md +++ /dev/null @@ -1,3996 +0,0 @@ -File: APPNOTE.TXT - .ZIP File Format Specification -Version: 6.3.9 -Status: FINAL - replaces version 6.3.8 -Revised: July 15, 2020 -Copyright (c) 1989 - 2014, 2018, 2019, 2020 PKWARE Inc., All Rights Reserved. - -## 1.0 Introduction - -## 1.1 Purpose - - ### 1.1.1 - This specification is intended to define a cross-platform, - interoperable file storage and transfer format. Since its - first publication in 1989, PKWARE, Inc. ("PKWARE") has remained - committed to ensuring the interoperability of the .ZIP file - format through periodic publication and maintenance of this - specification. We trust that all .ZIP compatible vendors and - application developers that use and benefit from this format - will share and support this commitment to interoperability. - -## 1.2 Scope - - ### 1.2.1 - ZIP is one of the most widely used compressed file formats. It is - universally used to aggregate, compress, and encrypt files into a single - interoperable container. No specific use or application need is - defined by this format and no specific implementation guidance is - provided. This document provides details on the storage format for - creating ZIP files. Information is provided on the records and - fields that describe what a ZIP file is. - -## 1.3 Trademarks - - ### 1.3.1 - PKWARE, PKZIP, Smartcrypt, SecureZIP, and PKSFX are registered - trademarks of PKWARE, Inc. in the United States and elsewhere. - PKPatchMaker, Deflate64, and ZIP64 are trademarks of PKWARE, Inc. - Other marks referenced within this document appear for identification - purposes only and are the property of their respective owners. - - -## 1.4 Permitted Use - - ### 1.4.1 - This document, "APPNOTE.TXT - .ZIP File Format Specification" is the - exclusive property of PKWARE. Use of the information contained in this - document is permitted solely for the purpose of creating products, - programs and processes that read and write files in the ZIP format - subject to the terms and conditions herein. - - ### 1.4.2 - Use of the content of this document within other publications is - permitted only through reference to this document. Any reproduction - or distribution of this document in whole or in part without prior - written permission from PKWARE is strictly prohibited. - - ### 1.4.3 - Certain technological components provided in this document are the - patented proprietary technology of PKWARE and as such require a - separate, executed license agreement from PKWARE. Applicable - components are marked with the following, or similar, statement: - 'Refer to the section in this document entitled "Incorporating - PKWARE Proprietary Technology into Your Product" for more information'. - -## 1.5 Contacting PKWARE - - ### 1.5.1 - If you have questions on this format, its use, or licensing, or if you - wish to report defects, request changes or additions, please contact: - - PKWARE, Inc. - 201 E. Pittsburgh Avenue, Suite 400 - Milwaukee, WI 53204 - +1-414-289-9788 - +1-414-289-9789 FAX - zipformat@pkware.com - - ### 1.5.2 - Information about this format and a reference copy of this document - is publicly available at: - - http://www.pkware.com/appnote - -## 1.6 Disclaimer - - ### 1.6.1 - Although PKWARE will attempt to supply current and accurate - information relating to its file formats, algorithms, and the - subject programs, the possibility of error or omission cannot - be eliminated. PKWARE therefore expressly disclaims any warranty - that the information contained in the associated materials relating - to the subject programs and/or the format of the files created or - accessed by the subject programs and/or the algorithms used by - the subject programs, or any other matter, is current, correct or - accurate as delivered. Any risk of damage due to any possible - inaccurate information is assumed by the user of the information. - Furthermore, the information relating to the subject programs - and/or the file formats created or accessed by the subject - programs and/or the algorithms used by the subject programs is - subject to change without notice. - -## 2.0 Revisions - -## 2.1 Document Status - - ### 2.1.1 - If the STATUS of this file is marked as DRAFT, the content - defines proposed revisions to this specification which may consist - of changes to the ZIP format itself, or that may consist of other - content changes to this document. Versions of this document and - the format in DRAFT form may be subject to modification prior to - publication STATUS of FINAL. DRAFT versions are published periodically - to provide notification to the ZIP community of pending changes and to - provide opportunity for review and comment. - - ### 2.1.2 - Versions of this document having a STATUS of FINAL are - considered to be in the final form for that version of the document - and are not subject to further change until a new, higher version - numbered document is published. Newer versions of this format - specification are intended to remain interoperable with all prior - versions whenever technically possible. - -## 2.2 Change Log - - Version Change Description Date - ------- ------------------ ---------- - 5.2 -Single Password Symmetric Encryption 07/16/2003 - storage - - 6.1.0 - -Smartcard compatibility 01/20/2004 - -Documentation on certificate storage - - 6.2.0 - -Introduction of Central Directory 04/26/2004 - Encryption for encrypting metadata - -Added OS X to Version Made By values - - 6.2.1 - -Added Extra Field placeholder for 04/01/2005 - POSZIP using ID 0x4690 - - -Clarified size field on - "zip64 end of central directory record" - - 6.2.2 - -Documented Final Feature Specification 01/06/2006 - for Strong Encryption - - -Clarifications and typographical - corrections - - 6.3.0 - -Added tape positioning storage 09/29/2006 - parameters - - -Expanded list of supported hash algorithms - - -Expanded list of supported compression - algorithms - - -Expanded list of supported encryption - algorithms - - -Added option for Unicode filename - storage - - -Clarifications for consistent use - of Data Descriptor records - - -Added additional "Extra Field" - definitions - - 6.3.1 - -Corrected standard hash values for 04/11/2007 - SHA-256/384/512 - - 6.3.2 - -Added compression method 97 09/28/2007 - - -Documented InfoZIP "Extra Field" - values for UTF-8 file name and - file comment storage - - 6.3.3 - -Formatting changes to support 09/01/2012 - easier referencing of this APPNOTE - from other documents and standards - - 6.3.4 - -Address change 10/01/2014 - - 6.3.5 - -Documented compression methods 16 11/31/2018 - and 99 (4.4.5, 4.6.1, 5.11, 5.17, - APPENDIX E) - - -Corrected several typographical - errors (2.1.2, 3.2, 4.1.1, 10.2) - - -Marked legacy algorithms as no - longer suitable for use (4.4.5.1) - - -Added clarity on MS DOS time format - (4.4.6) - - -Assign extrafield ID for Timestamps - (4.5.2) - - -Field code description correction (A.2) - - -More consistent use of MAY/SHOULD/MUST - - -Expanded 0x0065 record attribute codes (B.2) - - -Initial information on 0x0022 Extra Data - - 6.3.6 - -Corrected typographical error 04/26/2019 - (4.4.1.3) - - 6.3.7 - -Added Zstandard compression method ID - (4.4.5) - - -Corrected several reported typos - - -Marked intended use for general purpose bit 14 - - -Added Data Stream Alignment Extra Data info - (4.6.11) - - 6.3.8 - -Resolved Zstandard compression method ID conflict - (4.4.5) - - -Added additional compression method ID values in use - - 6.3.9 - -Corrected a typo in Data Stream Alignment description - (4.6.11) - - - - -## 3.0 Notations - - 3.1 Use of the term MUST or SHALL indicates a required element. - - 3.2 MUST NOT or SHALL NOT indicates an element is prohibited from use. - - 3.3 SHOULD indicates a RECOMMENDED element. - - 3.4 SHOULD NOT indicates an element NOT RECOMMENDED for use. - - 3.5 MAY indicates an OPTIONAL element. - - -## 4.0 ZIP Files - -## 4.1 What is a ZIP file - - ### 4.1.1 - ZIP files MAY be identified by the standard .ZIP file extension - although use of a file extension is not required. Use of the - extension .ZIPX is also recognized and MAY be used for ZIP files. - Other common file extensions using the ZIP format include .JAR, .WAR, - .DOCX, .XLSX, .PPTX, .ODT, .ODS, .ODP and others. Programs reading or - writing ZIP files SHOULD rely on internal record signatures described - in this document to identify files in this format. - - ### 4.1.2 - ZIP files SHOULD contain at least one file and MAY contain - multiple files. - - ### 4.1.3 - Data compression MAY be used to reduce the size of files - placed into a ZIP file, but is not required. This format supports the - use of multiple data compression algorithms. When compression is used, - one of the documented compression algorithms MUST be used. Implementors - are advised to experiment with their data to determine which of the - available algorithms provides the best compression for their needs. - Compression method 8 (Deflate) is the method used by default by most - ZIP compatible application programs. - - - ### 4.1.4 - Data encryption MAY be used to protect files within a ZIP file. - Keying methods supported for encryption within this format include - passwords and public/private keys. Either MAY be used individually - or in combination. Encryption MAY be applied to individual files. - Additional security MAY be used through the encryption of ZIP file - metadata stored within the Central Directory. See the section on the - Strong Encryption Specification for information. Refer to the section - in this document entitled "Incorporating PKWARE Proprietary Technology - into Your Product" for more information. - - ### 4.1.5 - Data integrity MUST be provided for each file using CRC32. - - ### 4.1.6 - Additional data integrity MAY be included through the use of - digital signatures. Individual files MAY be signed with one or more - digital signatures. The Central Directory, if signed, MUST use a - single signature. - - ### 4.1.7 - Files MAY be placed within a ZIP file uncompressed or stored. - The term "stored" as used in the context of this document means the file - is copied into the ZIP file uncompressed. - - ### 4.1.8 - Each data file placed into a ZIP file MAY be compressed, stored, - encrypted or digitally signed independent of how other data files in the - same ZIP file are archived. - - ### 4.1.9 - ZIP files MAY be streamed, split into segments (on fixed or on - removable media) or "self-extracting". Self-extracting ZIP - files MUST include extraction code for a target platform within - the ZIP file. - - ### 4.1.10 - Extensibility is provided for platform or application specific - needs through extra data fields that MAY be defined for custom - purposes. Extra data definitions MUST NOT conflict with existing - documented record definitions. - - ### 4.1.11 - Common uses for ZIP MAY also include the use of manifest files. - Manifest files store application specific information within a file stored - within the ZIP file. This manifest file SHOULD be the first file in the - ZIP file. This specification does not provide any information or guidance on - the use of manifest files within ZIP files. Refer to the application developer - for information on using manifest files and for any additional profile - information on using ZIP within an application. - - ### 4.1.12 - ZIP files MAY be placed within other ZIP files. - -## 4.2 ZIP Metadata - - ### 4.2.1 - ZIP files are identified by metadata consisting of defined record types - containing the storage information necessary for maintaining the files - placed into a ZIP file. Each record type MUST be identified using a header - signature that identifies the record type. Signature values begin with the - two byte constant marker of 0x4b50, representing the characters "PK". - - -## 4.3 General Format of a .ZIP file - - ### 4.3.1 - A ZIP file MUST contain an "end of central directory record". A ZIP - file containing only an "end of central directory record" is considered an - empty ZIP file. Files MAY be added or replaced within a ZIP file, or deleted. - A ZIP file MUST have only one "end of central directory record". Other - records defined in this specification MAY be used as needed to support - storage requirements for individual ZIP files. - - ### 4.3.2 - Each file placed into a ZIP file MUST be preceded by a "local - file header" record for that file. Each "local file header" MUST be - accompanied by a corresponding "central directory header" record within - the central directory section of the ZIP file. - - ### 4.3.3 - Files MAY be stored in arbitrary order within a ZIP file. A ZIP - file MAY span multiple volumes or it MAY be split into user-defined - segment sizes. All values MUST be stored in little-endian byte order unless - otherwise specified in this document for a specific data element. - - ### 4.3.4 - Compression MUST NOT be applied to a "local file header", an "encryption - header", or an "end of central directory record". Individual "central - directory records" MUST NOT be compressed, but the aggregate of all central - directory records MAY be compressed. - - ### 4.3.5 - File data MAY be followed by a "data descriptor" for the file. Data - descriptors are used to facilitate ZIP file streaming. - - - ### 4.3.6 - Overall .ZIP file format: - - [local file header 1] - [encryption header 1] - [file data 1] - [data descriptor 1] - . - . - . - [local file header n] - [encryption header n] - [file data n] - [data descriptor n] - [archive decryption header] - [archive extra data record] - [central directory header 1] - . - . - . - [central directory header n] - [zip64 end of central directory record] - [zip64 end of central directory locator] - [end of central directory record] - - - ### 4.3.7 - Local file header: - - local file header signature 4 bytes (0x04034b50) - version needed to extract 2 bytes - general purpose bit flag 2 bytes - compression method 2 bytes - last mod file time 2 bytes - last mod file date 2 bytes - crc-32 4 bytes - compressed size 4 bytes - uncompressed size 4 bytes - file name length 2 bytes - extra field length 2 bytes - - file name (variable size) - extra field (variable size) - - ### 4.3.8 - File data - - Immediately following the local header for a file - SHOULD be placed the compressed or stored data for the file. - If the file is encrypted, the encryption header for the file - SHOULD be placed after the local header and before the file - data. The series of [local file header][encryption header] - [file data][data descriptor] repeats for each file in the - .ZIP archive. - - Zero-byte files, directories, and other file types that - contain no content MUST NOT include file data. - - ### 4.3.9 - Data descriptor: - - crc-32 4 bytes - compressed size 4 bytes - uncompressed size 4 bytes - -### 4.3.9.1 -This descriptor MUST exist if bit 3 of the general - purpose bit flag is set (see below). It is byte aligned - and immediately follows the last byte of compressed data. - This descriptor SHOULD be used only when it was not possible to - seek in the output .ZIP file, e.g., when the output .ZIP file - was standard output or a non-seekable device. For ZIP64(tm) format - archives, the compressed and uncompressed sizes are 8 bytes each. - -### 4.3.9.2 -When compressing files, compressed and uncompressed sizes - SHOULD be stored in ZIP64 format (as 8 byte values) when a - file's size exceeds 0xFFFFFFFF. However ZIP64 format MAY be - used regardless of the size of a file. When extracting, if - the zip64 extended information extra field is present for - the file the compressed and uncompressed sizes will be 8 - byte values. - -### 4.3.9.3 -Although not originally assigned a signature, the value - 0x08074b50 has commonly been adopted as a signature value - for the data descriptor record. Implementers SHOULD be - aware that ZIP files MAY be encountered with or without this - signature marking data descriptors and SHOULD account for - either case when reading ZIP files to ensure compatibility. - -### 4.3.9.4 -When writing ZIP files, implementors SHOULD include the - signature value marking the data descriptor record. When - the signature is used, the fields currently defined for - the data descriptor record will immediately follow the - signature. - -### 4.3.9.5 -An extensible data descriptor will be released in a - future version of this APPNOTE. This new record is intended to - resolve conflicts with the use of this record going forward, - and to provide better support for streamed file processing. - -### 4.3.9.6 -When the Central Directory Encryption method is used, - the data descriptor record is not required, but MAY be used. - If present, and bit 3 of the general purpose bit field is set to - indicate its presence, the values in fields of the data descriptor - record MUST be set to binary zeros. See the section on the Strong - Encryption Specification for information. Refer to the section in - this document entitled "Incorporating PKWARE Proprietary Technology - into Your Product" for more information. - - - ### 4.3.10 - Archive decryption header: - -### 4.3.10.1 -The Archive Decryption Header is introduced in version 6.2 - of the ZIP format specification. This record exists in support - of the Central Directory Encryption Feature implemented as part of - the Strong Encryption Specification as described in this document. - When the Central Directory Structure is encrypted, this decryption - header MUST precede the encrypted data segment. - -### 4.3.10.2 -The encrypted data segment SHALL consist of the Archive - extra data record (if present) and the encrypted Central Directory - Structure data. The format of this data record is identical to the - Decryption header record preceding compressed file data. If the - central directory structure is encrypted, the location of the start of - this data record is determined using the Start of Central Directory - field in the Zip64 End of Central Directory record. See the - section on the Strong Encryption Specification for information - on the fields used in the Archive Decryption Header record. - Refer to the section in this document entitled "Incorporating - PKWARE Proprietary Technology into Your Product" for more information. - - - ### 4.3.11 - Archive extra data record: - - archive extra data signature 4 bytes (0x08064b50) - extra field length 4 bytes - extra field data (variable size) - -### 4.3.11.1 -The Archive Extra Data Record is introduced in version 6.2 - of the ZIP format specification. This record MAY be used in support - of the Central Directory Encryption Feature implemented as part of - the Strong Encryption Specification as described in this document. - When present, this record MUST immediately precede the central - directory data structure. - -### 4.3.11.2 -The size of this data record SHALL be included in the - Size of the Central Directory field in the End of Central - Directory record. If the central directory structure is compressed, - but not encrypted, the location of the start of this data record is - determined using the Start of Central Directory field in the Zip64 - End of Central Directory record. Refer to the section in this document - entitled "Incorporating PKWARE Proprietary Technology into Your - Product" for more information. - - ### 4.3.12 - Central directory structure: - - [central directory header 1] - . - . - . - [central directory header n] - [digital signature] - - File header: - - central file header signature 4 bytes (0x02014b50) - version made by 2 bytes - version needed to extract 2 bytes - general purpose bit flag 2 bytes - compression method 2 bytes - last mod file time 2 bytes - last mod file date 2 bytes - crc-32 4 bytes - compressed size 4 bytes - uncompressed size 4 bytes - file name length 2 bytes - extra field length 2 bytes - file comment length 2 bytes - disk number start 2 bytes - internal file attributes 2 bytes - external file attributes 4 bytes - relative offset of local header 4 bytes - - file name (variable size) - extra field (variable size) - file comment (variable size) - - ### 4.3.13 - Digital signature: - - header signature 4 bytes (0x05054b50) - size of data 2 bytes - signature data (variable size) - - With the introduction of the Central Directory Encryption - feature in version 6.2 of this specification, the Central - Directory Structure MAY be stored both compressed and encrypted. - Although not required, it is assumed when encrypting the - Central Directory Structure, that it will be compressed - for greater storage efficiency. Information on the - Central Directory Encryption feature can be found in the section - describing the Strong Encryption Specification. The Digital - Signature record will be neither compressed nor encrypted. - - ### 4.3.14 - Zip64 end of central directory record - - zip64 end of central dir - signature 4 bytes (0x06064b50) - size of zip64 end of central - directory record 8 bytes - version made by 2 bytes - version needed to extract 2 bytes - number of this disk 4 bytes - number of the disk with the - start of the central directory 4 bytes - total number of entries in the - central directory on this disk 8 bytes - total number of entries in the - central directory 8 bytes - size of the central directory 8 bytes - offset of start of central - directory with respect to - the starting disk number 8 bytes - zip64 extensible data sector (variable size) - -### 4.3.14.1 -The value stored into the "size of zip64 end of central - directory record" SHOULD be the size of the remaining - record and SHOULD NOT include the leading 12 bytes. - - Size = SizeOfFixedFields + SizeOfVariableData - 12. - -### 4.3.14.2 -The above record structure defines Version 1 of the - zip64 end of central directory record. Version 1 was - implemented in versions of this specification preceding - 6.2 in support of the ZIP64 large file feature. The - introduction of the Central Directory Encryption feature - implemented in version 6.2 as part of the Strong Encryption - Specification defines Version 2 of this record structure. - Refer to the section describing the Strong Encryption - Specification for details on the version 2 format for - this record. Refer to the section in this document entitled - "Incorporating PKWARE Proprietary Technology into Your Product" - for more information applicable to use of Version 2 of this - record. - -### 4.3.14.3 -Special purpose data MAY reside in the zip64 extensible - data sector field following either a V1 or V2 version of this - record. To ensure identification of this special purpose data - it MUST include an identifying header block consisting of the - following: - - Header ID - 2 bytes - Data Size - 4 bytes - - The Header ID field indicates the type of data that is in the - data block that follows. - - Data Size identifies the number of bytes that follow for this - data block type. - -### 4.3.14.4 -Multiple special purpose data blocks MAY be present. - Each MUST be preceded by a Header ID and Data Size field. Current - mappings of Header ID values supported in this field are as - defined in APPENDIX C. - - ### 4.3.15 - Zip64 end of central directory locator - - zip64 end of central dir locator - signature 4 bytes (0x07064b50) - number of the disk with the - start of the zip64 end of - central directory 4 bytes - relative offset of the zip64 - end of central directory record 8 bytes - total number of disks 4 bytes - - ### 4.3.16 - End of central directory record: - - end of central dir signature 4 bytes (0x06054b50) - number of this disk 2 bytes - number of the disk with the - start of the central directory 2 bytes - total number of entries in the - central directory on this disk 2 bytes - total number of entries in - the central directory 2 bytes - size of the central directory 4 bytes - offset of start of central - directory with respect to - the starting disk number 4 bytes - .ZIP file comment length 2 bytes - .ZIP file comment (variable size) - -## 4.4 Explanation of fields - - ### 4.4.1 - General notes on fields - -### 4.4.1.1 - All fields unless otherwise noted are unsigned and stored - in Intel low-byte:high-byte, low-word:high-word order. - -### 4.4.1.2 - String fields are not null terminated, since the length - is given explicitly. - -### 4.4.1.3 - The entries in the central directory MAY NOT necessarily - be in the same order that files appear in the .ZIP file. - -### 4.4.1.4 - If one of the fields in the end of central directory - record is too small to hold required data, the field SHOULD be - set to -1 (0xFFFF or 0xFFFFFFFF) and the ZIP64 format record - SHOULD be created. - -### 4.4.1.5 - The end of central directory record and the Zip64 end - of central directory locator record MUST reside on the same - disk when splitting or spanning an archive. - - ### 4.4.2 - version made by (2 bytes) - - ### 4.4.2.1 -The upper byte indicates the compatibility of the file - attribute information. If the external file attributes - are compatible with MS-DOS and can be read by PKZIP for - DOS version 2.04g then this value will be zero. If these - attributes are not compatible, then this value will - identify the host system on which the attributes are - compatible. Software can use this information to determine - the line record format for text files etc. - - ### 4.4.2.2 -The current mappings are: - - 0 - MS-DOS and OS/2 (FAT / VFAT / FAT32 file systems) - 1 - Amiga 2 - OpenVMS - 3 - UNIX 4 - VM/CMS - 5 - Atari ST 6 - OS/2 H.P.F.S. - 7 - Macintosh 8 - Z-System - 9 - CP/M 10 - Windows NTFS - 11 - MVS (OS/390 - Z/OS) 12 - VSE - 13 - Acorn Risc 14 - VFAT - 15 - alternate MVS 16 - BeOS - 17 - Tandem 18 - OS/400 - 19 - OS X (Darwin) 20 thru 255 - unused - - ### 4.4.2.3 -The lower byte indicates the ZIP specification version - (the version of this document) supported by the software - used to encode the file. The value/10 indicates the major - version number, and the value mod 10 is the minor version - number. - - ### 4.4.3 - version needed to extract (2 bytes) - - ### 4.4.3.1 -The minimum supported ZIP specification version needed - to extract the file, mapped as above. This value is based on - the specific format features a ZIP program MUST support to - be able to extract the file. If multiple features are - applied to a file, the minimum version MUST be set to the - feature having the highest value. New features or feature - changes affecting the published format specification will be - implemented using higher version numbers than the last - published value to avoid conflict. - - ### 4.4.3.2 -Current minimum feature versions are as defined below: - - 1.0 - Default value - 1.1 - File is a volume label - 2.0 - File is a folder (directory) - 2.0 - File is compressed using Deflate compression - 2.0 - File is encrypted using traditional PKWARE encryption - 2.1 - File is compressed using Deflate64(tm) - 2.5 - File is compressed using PKWARE DCL Implode - 2.7 - File is a patch data set - 4.5 - File uses ZIP64 format extensions - 4.6 - File is compressed using BZIP2 compression* - 5.0 - File is encrypted using DES - 5.0 - File is encrypted using 3DES - 5.0 - File is encrypted using original RC2 encryption - 5.0 - File is encrypted using RC4 encryption - 5.1 - File is encrypted using AES encryption - 5.1 - File is encrypted using corrected RC2 encryption** - 5.2 - File is encrypted using corrected RC2-64 encryption** - 6.1 - File is encrypted using non-OAEP key wrapping*** - 6.2 - Central directory encryption - 6.3 - File is compressed using LZMA - 6.3 - File is compressed using PPMd+ - 6.3 - File is encrypted using Blowfish - 6.3 - File is encrypted using Twofish - - ### 4.4.3.3 -Notes on version needed to extract - - * Early 7.x (pre-7.2) versions of PKZIP incorrectly set the - version needed to extract for BZIP2 compression to be 50 - when it SHOULD have been 46. - - ** Refer to the section on Strong Encryption Specification - for additional information regarding RC2 corrections. - - *** Certificate encryption using non-OAEP key wrapping is the - intended mode of operation for all versions beginning with 6.1. - Support for OAEP key wrapping MUST only be used for - backward compatibility when sending ZIP files to be opened by - versions of PKZIP older than 6.1 (5.0 or 6.0). - - + Files compressed using PPMd MUST set the version - needed to extract field to 6.3, however, not all ZIP - programs enforce this and MAY be unable to decompress - data files compressed using PPMd if this value is set. - - When using ZIP64 extensions, the corresponding value in the - zip64 end of central directory record MUST also be set. - This field SHOULD be set appropriately to indicate whether - Version 1 or Version 2 format is in use. - - - ### 4.4.4 - general purpose bit flag: (2 bytes) - - Bit 0: If set, indicates that the file is encrypted. - - (For Method 6 - Imploding) - Bit 1: If the compression method used was type 6, - Imploding, then this bit, if set, indicates - an 8K sliding dictionary was used. If clear, - then a 4K sliding dictionary was used. - - Bit 2: If the compression method used was type 6, - Imploding, then this bit, if set, indicates - 3 Shannon-Fano trees were used to encode the - sliding dictionary output. If clear, then 2 - Shannon-Fano trees were used. - - (For Methods 8 and 9 - Deflating) - Bit 2 Bit 1 - 0 0 Normal (-en) compression option was used. - 0 1 Maximum (-exx/-ex) compression option was used. - 1 0 Fast (-ef) compression option was used. - 1 1 Super Fast (-es) compression option was used. - - (For Method 14 - LZMA) - Bit 1: If the compression method used was type 14, - LZMA, then this bit, if set, indicates - an end-of-stream (EOS) marker is used to - mark the end of the compressed data stream. - If clear, then an EOS marker is not present - and the compressed data size must be known - to extract. - - Note: Bits 1 and 2 are undefined if the compression - method is any other. - - Bit 3: If this bit is set, the fields crc-32, compressed - size and uncompressed size are set to zero in the - local header. The correct values are put in the - data descriptor immediately following the compressed - data. (Note: PKZIP version 2.04g for DOS only - recognizes this bit for method 8 compression, newer - versions of PKZIP recognize this bit for any - compression method.) - - Bit 4: Reserved for use with method 8, for enhanced - deflating. - - Bit 5: If this bit is set, this indicates that the file is - compressed patched data. (Note: Requires PKZIP - version 2.70 or greater) - - Bit 6: Strong encryption. If this bit is set, you MUST - set the version needed to extract value to at least - 50 and you MUST also set bit 0. If AES encryption - is used, the version needed to extract value MUST - be at least 51. See the section describing the Strong - Encryption Specification for details. Refer to the - section in this document entitled "Incorporating PKWARE - Proprietary Technology into Your Product" for more - information. - - Bit 7: Currently unused. - - Bit 8: Currently unused. - - Bit 9: Currently unused. - - Bit 10: Currently unused. - - Bit 11: Language encoding flag (EFS). If this bit is set, - the filename and comment fields for this file - MUST be encoded using UTF-8. (see APPENDIX D) - - Bit 12: Reserved by PKWARE for enhanced compression. - - Bit 13: Set when encrypting the Central Directory to indicate - selected data values in the Local Header are masked to - hide their actual values. See the section describing - the Strong Encryption Specification for details. Refer - to the section in this document entitled "Incorporating - PKWARE Proprietary Technology into Your Product" for - more information. - - Bit 14: Reserved by PKWARE for alternate streams. - - Bit 15: Reserved by PKWARE. - - ### 4.4.5 - compression method: (2 bytes) - - 0 - The file is stored (no compression) - 1 - The file is Shrunk - 2 - The file is Reduced with compression factor 1 - 3 - The file is Reduced with compression factor 2 - 4 - The file is Reduced with compression factor 3 - 5 - The file is Reduced with compression factor 4 - 6 - The file is Imploded - 7 - Reserved for Tokenizing compression algorithm - 8 - The file is Deflated - 9 - Enhanced Deflating using Deflate64(tm) - 10 - PKWARE Data Compression Library Imploding (old IBM TERSE) - 11 - Reserved by PKWARE - 12 - File is compressed using BZIP2 algorithm - 13 - Reserved by PKWARE - 14 - LZMA - 15 - Reserved by PKWARE - 16 - IBM z/OS CMPSC Compression - 17 - Reserved by PKWARE - 18 - File is compressed using IBM TERSE (new) - 19 - IBM LZ77 z Architecture - 20 - deprecated (use method 93 for zstd) - 93 - Zstandard (zstd) Compression - 94 - MP3 Compression - 95 - XZ Compression - 96 - JPEG variant - 97 - WavPack compressed data - 98 - PPMd version I, Rev 1 - 99 - AE-x encryption marker (see APPENDIX E) - - ### 4.4.5.1 -Methods 1-6 are legacy algorithms and are no longer - recommended for use when compressing files. - - ### 4.4.6 - date and time fields: (2 bytes each) - - The date and time are encoded in standard MS-DOS format. - If input came from standard input, the date and time are - those at which compression was started for this data. - If encrypting the central directory and general purpose bit - flag 13 is set indicating masking, the value stored in the - Local Header will be zero. MS-DOS time format is different - from more commonly used computer time formats such as - UTC. For example, MS-DOS uses year values relative to 1980 - and 2 second precision. - - ### 4.4.7 - CRC-32: (4 bytes) - - The CRC-32 algorithm was generously contributed by - David Schwaderer and can be found in his excellent - book "C Programmers Guide to NetBIOS" published by - Howard W. Sams & Co. Inc. The 'magic number' for - the CRC is 0xdebb20e3. The proper CRC pre and post - conditioning is used, meaning that the CRC register - is pre-conditioned with all ones (a starting value - of 0xffffffff) and the value is post-conditioned by - taking the one's complement of the CRC residual. - If bit 3 of the general purpose flag is set, this - field is set to zero in the local header and the correct - value is put in the data descriptor and in the central - directory. When encrypting the central directory, if the - local header is not in ZIP64 format and general purpose - bit flag 13 is set indicating masking, the value stored - in the Local Header will be zero. - - ### 4.4.8 - compressed size: (4 bytes) - ### 4.4.9 - uncompressed size: (4 bytes) - - The size of the file compressed (4.4.8) and uncompressed, - (4.4.9) respectively. When a decryption header is present it - will be placed in front of the file data and the value of the - compressed file size will include the bytes of the decryption - header. If bit 3 of the general purpose bit flag is set, - these fields are set to zero in the local header and the - correct values are put in the data descriptor and - in the central directory. If an archive is in ZIP64 format - and the value in this field is 0xFFFFFFFF, the size will be - in the corresponding 8 byte ZIP64 extended information - extra field. When encrypting the central directory, if the - local header is not in ZIP64 format and general purpose bit - flag 13 is set indicating masking, the value stored for the - uncompressed size in the Local Header will be zero. - - ### 4.4.10 - file name length: (2 bytes) - ### 4.4.11 - extra field length: (2 bytes) - ### 4.4.12 - file comment length: (2 bytes) - - The length of the file name, extra field, and comment - fields respectively. The combined length of any - directory record and these three fields SHOULD NOT - generally exceed 65,535 bytes. If input came from standard - input, the file name length is set to zero. - - - ### 4.4.13 - disk number start: (2 bytes) - - The number of the disk on which this file begins. If an - archive is in ZIP64 format and the value in this field is - 0xFFFF, the size will be in the corresponding 4 byte zip64 - extended information extra field. - - ### 4.4.14 - internal file attributes: (2 bytes) - - Bits 1 and 2 are reserved for use by PKWARE. - - ### 4.4.14.1 -The lowest bit of this field indicates, if set, - that the file is apparently an ASCII or text file. If not - set, that the file apparently contains binary data. - The remaining bits are unused in version 1.0. - - ### 4.4.14.2 -The 0x0002 bit of this field indicates, if set, that - a 4 byte variable record length control field precedes each - logical record indicating the length of the record. The - record length control field is stored in little-endian byte - order. This flag is independent of text control characters, - and if used in conjunction with text data, includes any - control characters in the total length of the record. This - value is provided for mainframe data transfer support. - - ### 4.4.15 - external file attributes: (4 bytes) - - The mapping of the external attributes is - host-system dependent (see 'version made by'). For - MS-DOS, the low order byte is the MS-DOS directory - attribute byte. If input came from standard input, this - field is set to zero. - - ### 4.4.16 - relative offset of local header: (4 bytes) - - This is the offset from the start of the first disk on - which this file appears, to where the local header SHOULD - be found. If an archive is in ZIP64 format and the value - in this field is 0xFFFFFFFF, the size will be in the - corresponding 8 byte zip64 extended information extra field. - - ### 4.4.17 - file name: (Variable) - - ### 4.4.17.1 -The name of the file, with optional relative path. - The path stored MUST NOT contain a drive or - device letter, or a leading slash. All slashes - MUST be forward slashes '/' as opposed to - backwards slashes '\' for compatibility with Amiga - and UNIX file systems etc. If input came from standard - input, there is no file name field. - - ### 4.4.17.2 -If using the Central Directory Encryption Feature and - general purpose bit flag 13 is set indicating masking, the file - name stored in the Local Header will not be the actual file name. - A masking value consisting of a unique hexadecimal value will - be stored. This value will be sequentially incremented for each - file in the archive. See the section on the Strong Encryption - Specification for details on retrieving the encrypted file name. - Refer to the section in this document entitled "Incorporating PKWARE - Proprietary Technology into Your Product" for more information. - - - ### 4.4.18 - file comment: (Variable) - - The comment for this file. - - ### 4.4.19 - number of this disk: (2 bytes) - - The number of this disk, which contains central - directory end record. If an archive is in ZIP64 format - and the value in this field is 0xFFFF, the size will - be in the corresponding 4 byte zip64 end of central - directory field. - - - ### 4.4.20 - number of the disk with the start of the central - directory: (2 bytes) - - The number of the disk on which the central - directory starts. If an archive is in ZIP64 format - and the value in this field is 0xFFFF, the size will - be in the corresponding 4 byte zip64 end of central - directory field. - - ### 4.4.21 - total number of entries in the central dir on - this disk: (2 bytes) - - The number of central directory entries on this disk. - If an archive is in ZIP64 format and the value in - this field is 0xFFFF, the size will be in the - corresponding 8 byte zip64 end of central - directory field. - - ### 4.4.22 - total number of entries in the central dir: (2 bytes) - - The total number of files in the .ZIP file. If an - archive is in ZIP64 format and the value in this field - is 0xFFFF, the size will be in the corresponding 8 byte - zip64 end of central directory field. - - ### 4.4.23 - size of the central directory: (4 bytes) - - The size (in bytes) of the entire central directory. - If an archive is in ZIP64 format and the value in - this field is 0xFFFFFFFF, the size will be in the - corresponding 8 byte zip64 end of central - directory field. - - ### 4.4.24 - offset of start of central directory with respect to - the starting disk number: (4 bytes) - - Offset of the start of the central directory on the - disk on which the central directory starts. If an - archive is in ZIP64 format and the value in this - field is 0xFFFFFFFF, the size will be in the - corresponding 8 byte zip64 end of central - directory field. - - ### 4.4.25 - .ZIP file comment length: (2 bytes) - - The length of the comment for this .ZIP file. - - ### 4.4.26 - .ZIP file comment: (Variable) - - The comment for this .ZIP file. ZIP file comment data - is stored unsecured. No encryption or data authentication - is applied to this area at this time. Confidential information - SHOULD NOT be stored in this section. - - ### 4.4.27 - zip64 extensible data sector (variable size) - - (currently reserved for use by PKWARE) - - - ### 4.4.28 - extra field: (Variable) - - This SHOULD be used for storage expansion. If additional - information needs to be stored within a ZIP file for special - application or platform needs, it SHOULD be stored here. - Programs supporting earlier versions of this specification can - then safely skip the file, and find the next file or header. - This field will be 0 length in version 1.0. - - Existing extra fields are defined in the section - Extensible data fields that follows. - -## 4.5 Extensible data fields - - ### 4.5.1 - In order to allow different programs and different types - of information to be stored in the 'extra' field in .ZIP - files, the following structure MUST be used for all - programs storing data in this field: - - header1+data1 + header2+data2 . . . - - Each header MUST consist of: - - Header ID - 2 bytes - Data Size - 2 bytes - - Note: all fields stored in Intel low-byte/high-byte order. - - The Header ID field indicates the type of data that is in - the following data block. - - Header IDs of 0 thru 31 are reserved for use by PKWARE. - The remaining IDs can be used by third party vendors for - proprietary usage. - - ### 4.5.2 - The current Header ID mappings defined by PKWARE are: - - 0x0001 Zip64 extended information extra field - 0x0007 AV Info - 0x0008 Reserved for extended language encoding data (PFS) - (see APPENDIX D) - 0x0009 OS/2 - 0x000a NTFS - 0x000c OpenVMS - 0x000d UNIX - 0x000e Reserved for file stream and fork descriptors - 0x000f Patch Descriptor - 0x0014 PKCS#7 Store for X.509 Certificates - 0x0015 X.509 Certificate ID and Signature for - individual file - 0x0016 X.509 Certificate ID for Central Directory - 0x0017 Strong Encryption Header - 0x0018 Record Management Controls - 0x0019 PKCS#7 Encryption Recipient Certificate List - 0x0020 Reserved for Timestamp record - 0x0021 Policy Decryption Key Record - 0x0022 Smartcrypt Key Provider Record - 0x0023 Smartcrypt Policy Key Data Record - 0x0065 IBM S/390 (Z390), AS/400 (I400) attributes - - uncompressed - 0x0066 Reserved for IBM S/390 (Z390), AS/400 (I400) - attributes - compressed - 0x4690 POSZIP 4690 (reserved) - - - ### 4.5.3 - -Zip64 Extended Information Extra Field (0x0001): - - The following is the layout of the zip64 extended - information "extra" block. If one of the size or - offset fields in the Local or Central directory - record is too small to hold the required data, - a Zip64 extended information record is created. - The order of the fields in the zip64 extended - information record is fixed, but the fields MUST - only appear if the corresponding Local or Central - directory record field is set to 0xFFFF or 0xFFFFFFFF. - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(ZIP64) 0x0001 2 bytes Tag for this "extra" block type - Size 2 bytes Size of this "extra" block - Original - Size 8 bytes Original uncompressed file size - Compressed - Size 8 bytes Size of compressed data - Relative Header - Offset 8 bytes Offset of local header record - Disk Start - Number 4 bytes Number of the disk on which - this file starts - - This entry in the Local header MUST include BOTH original - and compressed file size fields. If encrypting the - central directory and bit 13 of the general purpose bit - flag is set indicating masking, the value stored in the - Local Header for the original file size will be zero. - - - ### 4.5.4 - -OS/2 Extra Field (0x0009): - - The following is the layout of the OS/2 attributes "extra" - block. (Last Revision 09/05/95) - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(OS/2) 0x0009 2 bytes Tag for this "extra" block type - TSize 2 bytes Size for the following data block - BSize 4 bytes Uncompressed Block Size - CType 2 bytes Compression type - EACRC 4 bytes CRC value for uncompress block - (var) variable Compressed block - - The OS/2 extended attribute structure (FEA2LIST) is - compressed and then stored in its entirety within this - structure. There will only ever be one "block" of data in - VarFields[]. - - ### 4.5.5 - -NTFS Extra Field (0x000a): - - The following is the layout of the NTFS attributes - "extra" block. (Note: At this time the Mtime, Atime - and Ctime values MAY be used on any WIN32 system.) - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(NTFS) 0x000a 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of the total "extra" block - Reserved 4 bytes Reserved for future use - Tag1 2 bytes NTFS attribute tag value #1 - Size1 2 bytes Size of attribute #1, in bytes - (var) Size1 Attribute #1 data - . - . - . - TagN 2 bytes NTFS attribute tag value #N - SizeN 2 bytes Size of attribute #N, in bytes - (var) SizeN Attribute #N data - - For NTFS, values for Tag1 through TagN are as follows: - (currently only one set of attributes is defined for NTFS) - - Tag Size Description - ----- ---- ----------- - 0x0001 2 bytes Tag for attribute #1 - Size1 2 bytes Size of attribute #1, in bytes - Mtime 8 bytes File last modification time - Atime 8 bytes File last access time - Ctime 8 bytes File creation time - - ### 4.5.6 - -OpenVMS Extra Field (0x000c): - - The following is the layout of the OpenVMS attributes - "extra" block. - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- - (VMS) 0x000c 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of the total "extra" block - CRC 4 bytes 32-bit CRC for remainder of the block - Tag1 2 bytes OpenVMS attribute tag value #1 - Size1 2 bytes Size of attribute #1, in bytes - (var) Size1 Attribute #1 data - . - . - . - TagN 2 bytes OpenVMS attribute tag value #N - SizeN 2 bytes Size of attribute #N, in bytes - (var) SizeN Attribute #N data - - OpenVMS Extra Field Rules: - -### 4.5.6.1. -There will be one or more attributes present, which - will each be preceded by the above TagX & SizeX values. - These values are identical to the ATR$C_XXXX and ATR$S_XXXX - constants which are defined in ATR.H under OpenVMS C. Neither - of these values will ever be zero. - -### 4.5.6.2. -No word alignment or padding is performed. - -### 4.5.6.3. -A well-behaved PKZIP/OpenVMS program SHOULD NOT produce - more than one sub-block with the same TagX value. Also, there MUST - NOT be more than one "extra" block of type 0x000c in a particular - directory record. - - ### 4.5.7 - -UNIX Extra Field (0x000d): - - The following is the layout of the UNIX "extra" block. - Note: all fields are stored in Intel low-byte/high-byte - order. - - Value Size Description - ----- ---- ----------- -(UNIX) 0x000d 2 bytes Tag for this "extra" block type - TSize 2 bytes Size for the following data block - Atime 4 bytes File last access time - Mtime 4 bytes File last modification time - Uid 2 bytes File user ID - Gid 2 bytes File group ID - (var) variable Variable length data field - - The variable length data field will contain file type - specific data. Currently the only values allowed are - the original "linked to" file names for hard or symbolic - links, and the major and minor device node numbers for - character and block device nodes. Since device nodes - cannot be either symbolic or hard links, only one set of - variable length data is stored. Link files will have the - name of the original file stored. This name is NOT NULL - terminated. Its size can be determined by checking TSize - - 12. Device entries will have eight bytes stored as two 4 - byte entries (in little endian format). The first entry - will be the major device number, and the second the minor - device number. - - ### 4.5.8 - -PATCH Descriptor Extra Field (0x000f): - - ### 4.5.8.1 -The following is the layout of the Patch Descriptor - "extra" block. - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(Patch) 0x000f 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of the total "extra" block - Version 2 bytes Version of the descriptor - Flags 4 bytes Actions and reactions (see below) - OldSize 4 bytes Size of the file about to be patched - OldCRC 4 bytes 32-bit CRC of the file to be patched - NewSize 4 bytes Size of the resulting file - NewCRC 4 bytes 32-bit CRC of the resulting file - - ### 4.5.8.2 -Actions and reactions - - Bits Description - ---- ---------------- - 0 Use for auto detection - 1 Treat as a self-patch - 2-3 RESERVED - 4-5 Action (see below) - 6-7 RESERVED - 8-9 Reaction (see below) to absent file - 10-11 Reaction (see below) to newer file - 12-13 Reaction (see below) to unknown file - 14-15 RESERVED - 16-31 RESERVED - - ### 4.5.8.2.1 - Actions - - Action Value - ------ ----- - none 0 - add 1 - delete 2 - patch 3 - - ### 4.5.8.2.2 - Reactions - - Reaction Value - -------- ----- - ask 0 - skip 1 - ignore 2 - fail 3 - - ### 4.5.8.3 -Patch support is provided by PKPatchMaker(tm) technology - and is covered under U.S. Patents and Patents Pending. The use or - implementation in a product of certain technological aspects set - forth in the current APPNOTE, including those with regard to - strong encryption or patching requires a license from PKWARE. - Refer to the section in this document entitled "Incorporating - PKWARE Proprietary Technology into Your Product" for more - information. - - ### 4.5.9 - -PKCS#7 Store for X.509 Certificates (0x0014): - - This field MUST contain information about each of the certificates - files MAY be signed with. When the Central Directory Encryption - feature is enabled for a ZIP file, this record will appear in - the Archive Extra Data Record, otherwise it will appear in the - first central directory record and will be ignored in any - other record. - - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(Store) 0x0014 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of the store data - TData TSize Data about the store - - - ### 4.5.10 - -X.509 Certificate ID and Signature for individual file (0x0015): - - This field contains the information about which certificate in - the PKCS#7 store was used to sign a particular file. It also - contains the signature data. This field can appear multiple - times, but can only appear once per certificate. - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(CID) 0x0015 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of data that follows - TData TSize Signature Data - - ### 4.5.11 - -X.509 Certificate ID and Signature for central directory (0x0016): - - This field contains the information about which certificate in - the PKCS#7 store was used to sign the central directory structure. - When the Central Directory Encryption feature is enabled for a - ZIP file, this record will appear in the Archive Extra Data Record, - otherwise it will appear in the first central directory record. - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(CDID) 0x0016 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of data that follows - TData TSize Data - - ### 4.5.12 - -Strong Encryption Header (0x0017): - - Value Size Description - ----- ---- ----------- - 0x0017 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of data that follows - Format 2 bytes Format definition for this record - AlgID 2 bytes Encryption algorithm identifier - Bitlen 2 bytes Bit length of encryption key - Flags 2 bytes Processing flags - CertData TSize-8 Certificate decryption extra field data - (refer to the explanation for CertData - in the section describing the - Certificate Processing Method under - the Strong Encryption Specification) - - See the section describing the Strong Encryption Specification - for details. Refer to the section in this document entitled - "Incorporating PKWARE Proprietary Technology into Your Product" - for more information. - - ### 4.5.13 - -Record Management Controls (0x0018): - - Value Size Description - ----- ---- ----------- -(Rec-CTL) 0x0018 2 bytes Tag for this "extra" block type - CSize 2 bytes Size of total extra block data - Tag1 2 bytes Record control attribute 1 - Size1 2 bytes Size of attribute 1, in bytes - Data1 Size1 Attribute 1 data - . - . - . - TagN 2 bytes Record control attribute N - SizeN 2 bytes Size of attribute N, in bytes - DataN SizeN Attribute N data - - - ### 4.5.14 - -PKCS#7 Encryption Recipient Certificate List (0x0019): - - This field MAY contain information about each of the certificates - used in encryption processing and it can be used to identify who is - allowed to decrypt encrypted files. This field SHOULD only appear - in the archive extra data record. This field is not required and - serves only to aid archive modifications by preserving public - encryption key data. Individual security requirements may dictate - that this data be omitted to deter information exposure. - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- -(CStore) 0x0019 2 bytes Tag for this "extra" block type - TSize 2 bytes Size of the store data - TData TSize Data about the store - - TData: - - Value Size Description - ----- ---- ----------- - Version 2 bytes Format version number - MUST be 0x0001 at this time - CStore (var) PKCS#7 data blob - - See the section describing the Strong Encryption Specification - for details. Refer to the section in this document entitled - "Incorporating PKWARE Proprietary Technology into Your Product" - for more information. - - ### 4.5.15 - -MVS Extra Field (0x0065): - - The following is the layout of the MVS "extra" block. - Note: Some fields are stored in Big Endian format. - All text is in EBCDIC format unless otherwise specified. -Value Size Description - ----- ---- ----------- -(MVS) 0x0065 2 bytes Tag for this "extra" block type - TSize 2 bytes Size for the following data block - ID 4 bytes EBCDIC "Z390" 0xE9F3F9F0 or - "T4MV" for TargetFour - (var) TSize-4 Attribute data (see APPENDIX B) - - - ### 4.5.16 - -OS/400 Extra Field (0x0065): - - The following is the layout of the OS/400 "extra" block. - Note: Some fields are stored in Big Endian format. - All text is in EBCDIC format unless otherwise specified. - - Value Size Description - ----- ---- ----------- -(OS400) 0x0065 2 bytes Tag for this "extra" block type - TSize 2 bytes Size for the following data block - ID 4 bytes EBCDIC "I400" 0xC9F4F0F0 or - "T4MV" for TargetFour - (var) TSize-4 Attribute data (see APPENDIX A) - - ### 4.5.17 - -Policy Decryption Key Record Extra Field (0x0021): - - The following is the layout of the Policy Decryption Key "extra" block. - TData is a variable length, variable content field. It holds - information about encryptions and/or encryption key sources. - Contact PKWARE for information on current TData structures. - Information in this "extra" block may aternatively be placed - within comment fields. Refer to the section in this document - entitled "Incorporating PKWARE Proprietary Technology into Your - Product" for more information. - - Value Size Description - ----- ---- ----------- - 0x0021 2 bytes Tag for this "extra" block type - TSize 2 bytes Size for the following data block - TData TSize Data about the key - - ### 4.5.18 - -Key Provider Record Extra Field (0x0022): - - The following is the layout of the Key Provider "extra" block. - TData is a variable length, variable content field. It holds - information about encryptions and/or encryption key sources. - Contact PKWARE for information on current TData structures. - Information in this "extra" block may aternatively be placed - within comment fields. Refer to the section in this document - entitled "Incorporating PKWARE Proprietary Technology into Your - Product" for more information. - - Value Size Description - ----- ---- ----------- - 0x0022 2 bytes Tag for this "extra" block type - TSize 2 bytes Size for the following data block - TData TSize Data about the key - - ### 4.5.19 - -Policy Key Data Record Record Extra Field (0x0023): - - The following is the layout of the Policy Key Data "extra" block. - TData is a variable length, variable content field. It holds - information about encryptions and/or encryption key sources. - Contact PKWARE for information on current TData structures. - Information in this "extra" block may aternatively be placed - within comment fields. Refer to the section in this document - entitled "Incorporating PKWARE Proprietary Technology into Your - Product" for more information. - - Value Size Description - ----- ---- ----------- - 0x0023 2 bytes Tag for this "extra" block type - TSize 2 bytes Size for the following data block - TData TSize Data about the key - -## 4.6 Third Party Mappings - - ### 4.6.1 - Third party mappings commonly used are: - - 0x07c8 Macintosh - 0x2605 ZipIt Macintosh - 0x2705 ZipIt Macintosh 1.3.5+ - 0x2805 ZipIt Macintosh 1.3.5+ - 0x334d Info-ZIP Macintosh - 0x4341 Acorn/SparkFS - 0x4453 Windows NT security descriptor (binary ACL) - 0x4704 VM/CMS - 0x470f MVS - 0x4b46 FWKCS MD5 (see below) - 0x4c41 OS/2 access control list (text ACL) - 0x4d49 Info-ZIP OpenVMS - 0x4f4c Xceed original location extra field - 0x5356 AOS/VS (ACL) - 0x5455 extended timestamp - 0x554e Xceed unicode extra field - 0x5855 Info-ZIP UNIX (original, also OS/2, NT, etc) - 0x6375 Info-ZIP Unicode Comment Extra Field - 0x6542 BeOS/BeBox - 0x7075 Info-ZIP Unicode Path Extra Field - 0x756e ASi UNIX - 0x7855 Info-ZIP UNIX (new) - 0xa11e Data Stream Alignment (Apache Commons-Compress) - 0xa220 Microsoft Open Packaging Growth Hint - 0xfd4a SMS/QDOS - 0x9901 AE-x encryption structure (see APPENDIX E) - 0x9902 unknown - - - Detailed descriptions of Extra Fields defined by third - party mappings will be documented as information on - these data structures is made available to PKWARE. - PKWARE does not guarantee the accuracy of any published - third party data. - - ### 4.6.2 - Third-party Extra Fields MUST include a Header ID using - the format defined in the section of this document - titled Extensible Data Fields (section 4.5). - - The Data Size field indicates the size of the following - data block. Programs can use this value to skip to the - next header block, passing over any data blocks that are - not of interest. - - Note: As stated above, the size of the entire .ZIP file - header, including the file name, comment, and extra - field SHOULD NOT exceed 64K in size. - - ### 4.6.3 - In case two different programs appropriate the same - Header ID value, it is strongly recommended that each - program SHOULD place a unique signature of at least two bytes in - size (and preferably 4 bytes or bigger) at the start of - each data area. Every program SHOULD verify that its - unique signature is present, in addition to the Header ID - value being correct, before assuming that it is a block of - known type. - - Third-party Mappings: - - ### 4.6.4 - -ZipIt Macintosh Extra Field (long) (0x2605): - - The following is the layout of the ZipIt extra block - for Macintosh. The local-header and central-header versions - are identical. This block MUST be present if the file is - stored MacBinary-encoded and it SHOULD NOT be used if the file - is not stored MacBinary-encoded. - - Value Size Description - ----- ---- ----------- - (Mac2) 0x2605 Short tag for this extra block type - TSize Short total data size for this block - "ZPIT" beLong extra-field signature - FnLen Byte length of FileName - FileName variable full Macintosh filename - FileType Byte[4] four-byte Mac file type string - Creator Byte[4] four-byte Mac creator string - - - ### 4.6.5 - -ZipIt Macintosh Extra Field (short, for files) (0x2705): - - The following is the layout of a shortened variant of the - ZipIt extra block for Macintosh (without "full name" entry). - This variant is used by ZipIt 1.3.5 and newer for entries of - files (not directories) that do not have a MacBinary encoded - file. The local-header and central-header versions are identical. - - Value Size Description - ----- ---- ----------- - (Mac2b) 0x2705 Short tag for this extra block type - TSize Short total data size for this block (12) - "ZPIT" beLong extra-field signature - FileType Byte[4] four-byte Mac file type string - Creator Byte[4] four-byte Mac creator string - fdFlags beShort attributes from FInfo.frFlags, - MAY be omitted - 0x0000 beShort reserved, MAY be omitted - - - ### 4.6.6 - -ZipIt Macintosh Extra Field (short, for directories) (0x2805): - - The following is the layout of a shortened variant of the - ZipIt extra block for Macintosh used only for directory - entries. This variant is used by ZipIt 1.3.5 and newer to - save some optional Mac-specific information about directories. - The local-header and central-header versions are identical. - - Value Size Description - ----- ---- ----------- - (Mac2c) 0x2805 Short tag for this extra block type - TSize Short total data size for this block (12) - "ZPIT" beLong extra-field signature - frFlags beShort attributes from DInfo.frFlags, MAY - be omitted - View beShort ZipIt view flag, MAY be omitted - - - The View field specifies ZipIt-internal settings as follows: - - Bits of the Flags: - bit 0 if set, the folder is shown expanded (open) - when the archive contents are viewed in ZipIt. - bits 1-15 reserved, zero; - - - ### 4.6.7 - -FWKCS MD5 Extra Field (0x4b46): - - The FWKCS Contents_Signature System, used in - automatically identifying files independent of file name, - optionally adds and uses an extra field to support the - rapid creation of an enhanced contents_signature: - - Header ID = 0x4b46 - Data Size = 0x0013 - Preface = 'M','D','5' - followed by 16 bytes containing the uncompressed file's - 128_bit MD5 hash(1), low byte first. - - When FWKCS revises a .ZIP file central directory to add - this extra field for a file, it also replaces the - central directory entry for that file's uncompressed - file length with a measured value. - - FWKCS provides an option to strip this extra field, if - present, from a .ZIP file central directory. In adding - this extra field, FWKCS preserves .ZIP file Authenticity - Verification; if stripping this extra field, FWKCS - preserves all versions of AV through PKZIP version 2.04g. - - FWKCS, and FWKCS Contents_Signature System, are - trademarks of Frederick W. Kantor. - - (1) R. Rivest, RFC1321.TXT, MIT Laboratory for Computer - Science and RSA Data Security, Inc., April 1992. - ll.76-77: "The MD5 algorithm is being placed in the - public domain for review and possible adoption as a - standard." - - - ### 4.6.8 - -Info-ZIP Unicode Comment Extra Field (0x6375): - - Stores the UTF-8 version of the file comment as stored in the - central directory header. (Last Revision 20070912) - - Value Size Description - ----- ---- ----------- - (UCom) 0x6375 Short tag for this extra block type ("uc") - TSize Short total data size for this block - Version 1 byte version of this extra field, currently 1 - ComCRC32 4 bytes Comment Field CRC32 Checksum - UnicodeCom Variable UTF-8 version of the entry comment - - Currently Version is set to the number 1. If there is a need - to change this field, the version will be incremented. Changes - MAY NOT be backward compatible so this extra field SHOULD NOT be - used if the version is not recognized. - - The ComCRC32 is the standard zip CRC32 checksum of the File Comment - field in the central directory header. This is used to verify that - the comment field has not changed since the Unicode Comment extra field - was created. This can happen if a utility changes the File Comment - field but does not update the UTF-8 Comment extra field. If the CRC - check fails, this Unicode Comment extra field SHOULD be ignored and - the File Comment field in the header SHOULD be used instead. - - The UnicodeCom field is the UTF-8 version of the File Comment field - in the header. As UnicodeCom is defined to be UTF-8, no UTF-8 byte - order mark (BOM) is used. The length of this field is determined by - subtracting the size of the previous fields from TSize. If both the - File Name and Comment fields are UTF-8, the new General Purpose Bit - Flag, bit 11 (Language encoding flag (EFS)), can be used to indicate - both the header File Name and Comment fields are UTF-8 and, in this - case, the Unicode Path and Unicode Comment extra fields are not - needed and SHOULD NOT be created. Note that, for backward - compatibility, bit 11 SHOULD only be used if the native character set - of the paths and comments being zipped up are already in UTF-8. It is - expected that the same file comment storage method, either general - purpose bit 11 or extra fields, be used in both the Local and Central - Directory Header for a file. - - - ### 4.6.9 - -Info-ZIP Unicode Path Extra Field (0x7075): - - Stores the UTF-8 version of the file name field as stored in the - local header and central directory header. (Last Revision 20070912) - - Value Size Description - ----- ---- ----------- - (UPath) 0x7075 Short tag for this extra block type ("up") - TSize Short total data size for this block - Version 1 byte version of this extra field, currently 1 - NameCRC32 4 bytes File Name Field CRC32 Checksum - UnicodeName Variable UTF-8 version of the entry File Name - - Currently Version is set to the number 1. If there is a need - to change this field, the version will be incremented. Changes - MAY NOT be backward compatible so this extra field SHOULD NOT be - used if the version is not recognized. - - The NameCRC32 is the standard zip CRC32 checksum of the File Name - field in the header. This is used to verify that the header - File Name field has not changed since the Unicode Path extra field - was created. This can happen if a utility renames the File Name but - does not update the UTF-8 path extra field. If the CRC check fails, - this UTF-8 Path Extra Field SHOULD be ignored and the File Name field - in the header SHOULD be used instead. - - The UnicodeName is the UTF-8 version of the contents of the File Name - field in the header. As UnicodeName is defined to be UTF-8, no UTF-8 - byte order mark (BOM) is used. The length of this field is determined - by subtracting the size of the previous fields from TSize. If both - the File Name and Comment fields are UTF-8, the new General Purpose - Bit Flag, bit 11 (Language encoding flag (EFS)), can be used to - indicate that both the header File Name and Comment fields are UTF-8 - and, in this case, the Unicode Path and Unicode Comment extra fields - are not needed and SHOULD NOT be created. Note that, for backward - compatibility, bit 11 SHOULD only be used if the native character set - of the paths and comments being zipped up are already in UTF-8. It is - expected that the same file name storage method, either general - purpose bit 11 or extra fields, be used in both the Local and Central - Directory Header for a file. - - - ### 4.6.10 - -Microsoft Open Packaging Growth Hint (0xa220): - - Value Size Description - ----- ---- ----------- - 0xa220 Short tag for this extra block type - TSize Short size of Sig + PadVal + Padding - Sig Short verification signature (A028) - PadVal Short Initial padding value - Padding variable filled with NULL characters - - ### 4.6.11 - -Data Stream Alignment (Apache Commons-Compress) (0xa11e): - - (per Zbynek Vyskovsky) Defines alignment of data stream of this - entry within the zip archive. Additionally, indicates whether the - compression method should be kept when re-compressing the zip file. - - The purpose of this extra field is to align specific resources to - word or page boundaries so they can be easily mapped into memory. - - Value Size Description - ----- ---- ----------- - 0xa11e Short tag for this extra block type - TSize Short total data size for this block (2+padding) - alignment Short required alignment and indicator - 0x00 Variable padding - - The alignment field (lower 15 bits) defines the minimal alignment - required by the data stream. Bit 15 of alignment field indicates - whether the compression method of this entry can be changed when - recompressing the zip file. The value 0 means the compression method - should not be changed. The value 1 indicates the compression method - may be changed. The padding field contains padding to ensure the correct - alignment. It can be changed at any time when the offset or required - alignment changes. (see https://issues.apache.org/jira/browse/COMPRESS-391) - - -## 4.7 Manifest Files - -### 4.7.1 - Applications using ZIP files MAY have a need for additional - information that MUST be included with the files placed into - a ZIP file. Application specific information that cannot be - stored using the defined ZIP storage records SHOULD be stored - using the extensible Extra Field convention defined in this - document. However, some applications MAY use a manifest - file as a means for storing additional information. One - example is the META-INF/MANIFEST.MF file used in ZIP formatted - files having the .JAR extension (JAR files). - -### 4.7.2 - A manifest file is a file created for the application process - that requires this information. A manifest file MAY be of any - file type required by the defining application process. It is - placed within the same ZIP file as files to which this information - applies. By convention, this file is typically the first file placed - into the ZIP file and it MAY include a defined directory path. - -### 4.7.3 - Manifest files MAY be compressed or encrypted as needed for - application processing of the files inside the ZIP files. - - Manifest files are outside of the scope of this specification. - - -## 5.0 Explanation of compression methods - - -## 5.1 UnShrinking - Method 1 - -### 5.1.1 - Shrinking is a Dynamic Ziv-Lempel-Welch compression algorithm - with partial clearing. The initial code size is 9 bits, and the - maximum code size is 13 bits. Shrinking differs from conventional - Dynamic Ziv-Lempel-Welch implementations in several respects: - -### 5.1.2 - The code size is controlled by the compressor, and is - not automatically increased when codes larger than the current - code size are created (but not necessarily used). When - the decompressor encounters the code sequence 256 - (decimal) followed by 1, it SHOULD increase the code size - read from the input stream to the next bit size. No - blocking of the codes is performed, so the next code at - the increased size SHOULD be read from the input stream - immediately after where the previous code at the smaller - bit size was read. Again, the decompressor SHOULD NOT - increase the code size used until the sequence 256,1 is - encountered. - -### 5.1.3 - When the table becomes full, total clearing is not - performed. Rather, when the compressor emits the code - sequence 256,2 (decimal), the decompressor SHOULD clear - all leaf nodes from the Ziv-Lempel tree, and continue to - use the current code size. The nodes that are cleared - from the Ziv-Lempel tree are then re-used, with the lowest - code value re-used first, and the highest code value - re-used last. The compressor can emit the sequence 256,2 - at any time. - -## 5.2 Expanding - Methods 2-5 - -### 5.2.1 - The Reducing algorithm is actually a combination of two - distinct algorithms. The first algorithm compresses repeated - byte sequences, and the second algorithm takes the compressed - stream from the first algorithm and applies a probabilistic - compression method. - -### 5.2.2 - The probabilistic compression stores an array of 'follower - sets' S(j), for j=0 to 255, corresponding to each possible - ASCII character. Each set contains between 0 and 32 - characters, to be denoted as S(j)[0],...,S(j)[m], where m<32. - The sets are stored at the beginning of the data area for a - Reduced file, in reverse order, with S(255) first, and S(0) - last. - -### 5.2.3 - The sets are encoded as { N(j), S(j)[0],...,S(j)[N(j)-1] }, - where N(j) is the size of set S(j). N(j) can be 0, in which - case the follower set for S(j) is empty. Each N(j) value is - encoded in 6 bits, followed by N(j) eight bit character values - corresponding to S(j)[0] to S(j)[N(j)-1] respectively. If - N(j) is 0, then no values for S(j) are stored, and the value - for N(j-1) immediately follows. - -### 5.2.4 - Immediately after the follower sets, is the compressed data - stream. The compressed data stream can be interpreted for the - probabilistic decompression as follows: - - let Last-Character <- 0. - loop until done - if the follower set S(Last-Character) is empty then - read 8 bits from the input stream, and copy this - value to the output stream. - otherwise if the follower set S(Last-Character) is non-empty then - read 1 bit from the input stream. - if this bit is not zero then - read 8 bits from the input stream, and copy this - value to the output stream. - otherwise if this bit is zero then - read B(N(Last-Character)) bits from the input - stream, and assign this value to I. - Copy the value of S(Last-Character)[I] to the - output stream. - - assign the last value placed on the output stream to - Last-Character. - end loop - - B(N(j)) is defined as the minimal number of bits required to - encode the value N(j)-1. - -### 5.2.5 - The decompressed stream from above can then be expanded to - re-create the original file as follows: - - let State <- 0. - - loop until done - read 8 bits from the input stream into C. - case State of - 0: if C is not equal to DLE (144 decimal) then - copy C to the output stream. - otherwise if C is equal to DLE then - let State <- 1. - - 1: if C is non-zero then - let V <- C. - let Len <- L(V) - let State <- F(Len). - otherwise if C is zero then - copy the value 144 (decimal) to the output stream. - let State <- 0 - - 2: let Len <- Len + C - let State <- 3. - - 3: move backwards D(V,C) bytes in the output stream - (if this position is before the start of the output - stream, then assume that all the data before the - start of the output stream is filled with zeros). - copy Len+3 bytes from this position to the output stream. - let State <- 0. - end case - end loop - - The functions F,L, and D are dependent on the 'compression - factor', 1 through 4, and are defined as follows: - - For compression factor 1: - L(X) equals the lower 7 bits of X. - F(X) equals 2 if X equals 127 otherwise F(X) equals 3. - D(X,Y) equals the (upper 1 bit of X) * 256 + Y + 1. - For compression factor 2: - L(X) equals the lower 6 bits of X. - F(X) equals 2 if X equals 63 otherwise F(X) equals 3. - D(X,Y) equals the (upper 2 bits of X) * 256 + Y + 1. - For compression factor 3: - L(X) equals the lower 5 bits of X. - F(X) equals 2 if X equals 31 otherwise F(X) equals 3. - D(X,Y) equals the (upper 3 bits of X) * 256 + Y + 1. - For compression factor 4: - L(X) equals the lower 4 bits of X. - F(X) equals 2 if X equals 15 otherwise F(X) equals 3. - D(X,Y) equals the (upper 4 bits of X) * 256 + Y + 1. - -## 5.3 Imploding - Method 6 - -### 5.3.1 - The Imploding algorithm is actually a combination of two - distinct algorithms. The first algorithm compresses repeated byte - sequences using a sliding dictionary. The second algorithm is - used to compress the encoding of the sliding dictionary output, - using multiple Shannon-Fano trees. - -### 5.3.2 - The Imploding algorithm can use a 4K or 8K sliding dictionary - size. The dictionary size used can be determined by bit 1 in the - general purpose flag word; a 0 bit indicates a 4K dictionary - while a 1 bit indicates an 8K dictionary. - -### 5.3.3 - The Shannon-Fano trees are stored at the start of the - compressed file. The number of trees stored is defined by bit 2 in - the general purpose flag word; a 0 bit indicates two trees stored, - a 1 bit indicates three trees are stored. If 3 trees are stored, - the first Shannon-Fano tree represents the encoding of the - Literal characters, the second tree represents the encoding of - the Length information, the third represents the encoding of the - Distance information. When 2 Shannon-Fano trees are stored, the - Length tree is stored first, followed by the Distance tree. - -### 5.3.4 - The Literal Shannon-Fano tree, if present is used to represent - the entire ASCII character set, and contains 256 values. This - tree is used to compress any data not compressed by the sliding - dictionary algorithm. When this tree is present, the Minimum - Match Length for the sliding dictionary is 3. If this tree is - not present, the Minimum Match Length is 2. - -### 5.3.5 - The Length Shannon-Fano tree is used to compress the Length - part of the (length,distance) pairs from the sliding dictionary - output. The Length tree contains 64 values, ranging from the - Minimum Match Length, to 63 plus the Minimum Match Length. - -### 5.3.6 - The Distance Shannon-Fano tree is used to compress the Distance - part of the (length,distance) pairs from the sliding dictionary - output. The Distance tree contains 64 values, ranging from 0 to - 63, representing the upper 6 bits of the distance value. The - distance values themselves will be between 0 and the sliding - dictionary size, either 4K or 8K. - -### 5.3.7 - The Shannon-Fano trees themselves are stored in a compressed - format. The first byte of the tree data represents the number of - bytes of data representing the (compressed) Shannon-Fano tree - minus 1. The remaining bytes represent the Shannon-Fano tree - data encoded as: - - High 4 bits: Number of values at this bit length + 1. (1 - 16) - Low 4 bits: Bit Length needed to represent value + 1. (1 - 16) - -### 5.3.8 - The Shannon-Fano codes can be constructed from the bit lengths - using the following algorithm: - - 1) Sort the Bit Lengths in ascending order, while retaining the - order of the original lengths stored in the file. - - 2) Generate the Shannon-Fano trees: - - Code <- 0 - CodeIncrement <- 0 - LastBitLength <- 0 - i <- number of Shannon-Fano codes - 1 (either 255 or 63) - - loop while i >= 0 - Code = Code + CodeIncrement - if BitLength(i) <> LastBitLength then - LastBitLength=BitLength(i) - CodeIncrement = 1 shifted left (16 - LastBitLength) - ShannonCode(i) = Code - i <- i - 1 - end loop - - 3) Reverse the order of all the bits in the above ShannonCode() - vector, so that the most significant bit becomes the least - significant bit. For example, the value 0x1234 (hex) would - become 0x2C48 (hex). - - 4) Restore the order of Shannon-Fano codes as originally stored - within the file. - - Example: - - This example will show the encoding of a Shannon-Fano tree - of size 8. Notice that the actual Shannon-Fano trees used - for Imploding are either 64 or 256 entries in size. - - Example: 0x02, 0x42, 0x01, 0x13 - - The first byte indicates 3 values in this table. Decoding the - bytes: - 0x42 = 5 codes of 3 bits long - 0x01 = 1 code of 2 bits long - 0x13 = 2 codes of 4 bits long - - This would generate the original bit length array of: - (3, 3, 3, 3, 3, 2, 4, 4) - - There are 8 codes in this table for the values 0 thru 7. Using - the algorithm to obtain the Shannon-Fano codes produces: - - Reversed Order Original - Val Sorted Constructed Code Value Restored Length - --- ------ ----------------- -------- -------- ------ - 0: 2 1100000000000000 11 101 3 - 1: 3 1010000000000000 101 001 3 - 2: 3 1000000000000000 001 110 3 - 3: 3 0110000000000000 110 010 3 - 4: 3 0100000000000000 010 100 3 - 5: 3 0010000000000000 100 11 2 - 6: 4 0001000000000000 1000 1000 4 - 7: 4 0000000000000000 0000 0000 4 - - The values in the Val, Order Restored and Original Length columns - now represent the Shannon-Fano encoding tree that can be used for - decoding the Shannon-Fano encoded data. How to parse the - variable length Shannon-Fano values from the data stream is beyond - the scope of this document. (See the references listed at the end of - this document for more information.) However, traditional decoding - schemes used for Huffman variable length decoding, such as the - Greenlaw algorithm, can be successfully applied. - -### 5.3.9 - The compressed data stream begins immediately after the - compressed Shannon-Fano data. The compressed data stream can be - interpreted as follows: - - loop until done - read 1 bit from input stream. - - if this bit is non-zero then (encoded data is literal data) - if Literal Shannon-Fano tree is present - read and decode character using Literal Shannon-Fano tree. - otherwise - read 8 bits from input stream. - copy character to the output stream. - otherwise (encoded data is sliding dictionary match) - if 8K dictionary size - read 7 bits for offset Distance (lower 7 bits of offset). - otherwise - read 6 bits for offset Distance (lower 6 bits of offset). - - using the Distance Shannon-Fano tree, read and decode the - upper 6 bits of the Distance value. - - using the Length Shannon-Fano tree, read and decode - the Length value. - - Length <- Length + Minimum Match Length - - if Length = 63 + Minimum Match Length - read 8 bits from the input stream, - add this value to Length. - - move backwards Distance+1 bytes in the output stream, and - copy Length characters from this position to the output - stream. (if this position is before the start of the output - stream, then assume that all the data before the start of - the output stream is filled with zeros). - end loop - -## 5.4 Tokenizing - Method 7 - -### 5.4.1 - This method is not used by PKZIP. - -## 5.5 Deflating - Method 8 - -### 5.5.1 - The Deflate algorithm is similar to the Implode algorithm using - a sliding dictionary of up to 32K with secondary compression - from Huffman/Shannon-Fano codes. - -### 5.5.2 - The compressed data is stored in blocks with a header describing - the block and the Huffman codes used in the data block. The header - format is as follows: - - Bit 0: Last Block bit This bit is set to 1 if this is the last - compressed block in the data. - Bits 1-2: Block type - 00 (0) - Block is stored - All stored data is byte aligned. - Skip bits until next byte, then next word = block - length, followed by the ones compliment of the block - length word. Remaining data in block is the stored - data. - - 01 (1) - Use fixed Huffman codes for literal and distance codes. - Lit Code Bits Dist Code Bits - --------- ---- --------- ---- - 0 - 143 8 0 - 31 5 - 144 - 255 9 - 256 - 279 7 - 280 - 287 8 - - Literal codes 286-287 and distance codes 30-31 are - never used but participate in the huffman construction. - - 10 (2) - Dynamic Huffman codes. (See expanding Huffman codes) - - 11 (3) - Reserved - Flag a "Error in compressed data" if seen. - -### 5.5.3 - Expanding Huffman Codes - - If the data block is stored with dynamic Huffman codes, the Huffman - codes are sent in the following compressed format: - - 5 Bits: # of Literal codes sent - 256 (256 - 286) - All other codes are never sent. - 5 Bits: # of Dist codes - 1 (1 - 32) - 4 Bits: # of Bit Length codes - 3 (3 - 19) - - The Huffman codes are sent as bit lengths and the codes are built as - described in the implode algorithm. The bit lengths themselves are - compressed with Huffman codes. There are 19 bit length codes: - - 0 - 15: Represent bit lengths of 0 - 15 - 16: Copy the previous bit length 3 - 6 times. - The next 2 bits indicate repeat length (0 = 3, ... ,3 = 6) - Example: Codes 8, 16 (+2 bits 11), 16 (+2 bits 10) will - expand to 12 bit lengths of 8 (1 + 6 + 5) - 17: Repeat a bit length of 0 for 3 - 10 times. (3 bits of length) - 18: Repeat a bit length of 0 for 11 - 138 times (7 bits of length) - - The lengths of the bit length codes are sent packed 3 bits per value - (0 - 7) in the following order: - - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 - - The Huffman codes SHOULD be built as described in the Implode algorithm - except codes are assigned starting at the shortest bit length, i.e. the - shortest code SHOULD be all 0's rather than all 1's. Also, codes with - a bit length of zero do not participate in the tree construction. The - codes are then used to decode the bit lengths for the literal and - distance tables. - - The bit lengths for the literal tables are sent first with the number - of entries sent described by the 5 bits sent earlier. There are up - to 286 literal characters; the first 256 represent the respective 8 - bit character, code 256 represents the End-Of-Block code, the remaining - 29 codes represent copy lengths of 3 thru 258. There are up to 30 - distance codes representing distances from 1 thru 32k as described - below. - - Length Codes - ------------ - Extra Extra Extra Extra - Code Bits Length Code Bits Lengths Code Bits Lengths Code Bits Length(s) - ---- ---- ------ ---- ---- ------- ---- ---- ------- ---- ---- --------- - 257 0 3 265 1 11,12 273 3 35-42 281 5 131-162 - 258 0 4 266 1 13,14 274 3 43-50 282 5 163-194 - 259 0 5 267 1 15,16 275 3 51-58 283 5 195-226 - 260 0 6 268 1 17,18 276 3 59-66 284 5 227-257 - 261 0 7 269 2 19-22 277 4 67-82 285 0 258 - 262 0 8 270 2 23-26 278 4 83-98 - 263 0 9 271 2 27-30 279 4 99-114 - 264 0 10 272 2 31-34 280 4 115-130 - - Distance Codes - -------------- - Extra Extra Extra Extra - Code Bits Dist Code Bits Dist Code Bits Distance Code Bits Distance - ---- ---- ---- ---- ---- ------ ---- ---- -------- ---- ---- -------- - 0 0 1 8 3 17-24 16 7 257-384 24 11 4097-6144 - 1 0 2 9 3 25-32 17 7 385-512 25 11 6145-8192 - 2 0 3 10 4 33-48 18 8 513-768 26 12 8193-12288 - 3 0 4 11 4 49-64 19 8 769-1024 27 12 12289-16384 - 4 1 5,6 12 5 65-96 20 9 1025-1536 28 13 16385-24576 - 5 1 7,8 13 5 97-128 21 9 1537-2048 29 13 24577-32768 - 6 2 9-12 14 6 129-192 22 10 2049-3072 - 7 2 13-16 15 6 193-256 23 10 3073-4096 - -### 5.5.4 - The compressed data stream begins immediately after the - compressed header data. The compressed data stream can be - interpreted as follows: - - do - read header from input stream. - - if stored block - skip bits until byte aligned - read count and 1's compliment of count - copy count bytes data block - otherwise - loop until end of block code sent - decode literal character from input stream - if literal < 256 - copy character to the output stream - otherwise - if literal = end of block - break from loop - otherwise - decode distance from input stream - - move backwards distance bytes in the output stream, and - copy length characters from this position to the output - stream. - end loop - while not last block - - if data descriptor exists - skip bits until byte aligned - read crc and sizes - endif - -## 5.6 Enhanced Deflating - Method 9 - -### 5.6.1 - The Enhanced Deflating algorithm is similar to Deflate but uses - a sliding dictionary of up to 64K. Deflate64(tm) is supported - by the Deflate extractor. - -## 5.7 BZIP2 - Method 12 - -### 5.7.1 - BZIP2 is an open-source data compression algorithm developed by - Julian Seward. Information and source code for this algorithm - can be found on the internet. - -## 5.8 LZMA - Method 14 - -### 5.8.1 - LZMA is a block-oriented, general purpose data compression - algorithm developed and maintained by Igor Pavlov. It is a derivative - of LZ77 that utilizes Markov chains and a range coder. Information and - source code for this algorithm can be found on the internet. Consult - with the author of this algorithm for information on terms or - restrictions on use. - - Support for LZMA within the ZIP format is defined as follows: - -### 5.8.2 - The Compression method field within the ZIP Local and Central - Header records will be set to the value 14 to indicate data was - compressed using LZMA. - -### 5.8.3 - The Version needed to extract field within the ZIP Local and - Central Header records will be set to 6.3 to indicate the minimum - ZIP format version supporting this feature. - -### 5.8.4 - File data compressed using the LZMA algorithm MUST be placed - immediately following the Local Header for the file. If a standard - ZIP encryption header is required, it will follow the Local Header - and will precede the LZMA compressed file data segment. The location - of LZMA compressed data segment within the ZIP format will be as shown: - - [local header file 1] - [encryption header file 1] - [LZMA compressed data segment for file 1] - [data descriptor 1] - [local header file 2] - -### 5.8.5 - The encryption header and data descriptor records MAY - be conditionally present. The LZMA Compressed Data Segment - will consist of an LZMA Properties Header followed by the - LZMA Compressed Data as shown: - - [LZMA properties header for file 1] - [LZMA compressed data for file 1] - -### 5.8.6 - The LZMA Compressed Data will be stored as provided by the - LZMA compression library. Compressed size, uncompressed size and - other file characteristics about the file being compressed MUST be - stored in standard ZIP storage format. - -### 5.8.7 - The LZMA Properties Header will store specific data required - to decompress the LZMA compressed Data. This data is set by the - LZMA compression engine using the function WriteCoderProperties() - as documented within the LZMA SDK. - -### 5.8.8 - Storage fields for the property information within the LZMA - Properties Header are as follows: - - LZMA Version Information 2 bytes - LZMA Properties Size 2 bytes - LZMA Properties Data variable, defined by "LZMA Properties Size" - - ### 5.8.8.1 -LZMA Version Information - this field identifies which version - of the LZMA SDK was used to compress a file. The first byte will - store the major version number of the LZMA SDK and the second - byte will store the minor number. - - ### 5.8.8.2 -LZMA Properties Size - this field defines the size of the - remaining property data. Typically this size SHOULD be determined by - the version of the SDK. This size field is included as a convenience - and to help avoid any ambiguity arising in the future due - to changes in this compression algorithm. - - ### 5.8.8.3 -LZMA Property Data - this variable sized field records the - required values for the decompressor as defined by the LZMA SDK. - The data stored in this field SHOULD be obtained using the - WriteCoderProperties() in the version of the SDK defined by - the "LZMA Version Information" field. - - ### 5.8.8.4 -The layout of the "LZMA Properties Data" field is a function of - the LZMA compression algorithm. It is possible that this layout MAY be - changed by the author over time. The data layout in version 4.3 of the - LZMA SDK defines a 5 byte array that uses 4 bytes to store the dictionary - size in little-endian order. This is preceded by a single packed byte as - the first element of the array that contains the following fields: - - PosStateBits - LiteralPosStateBits - LiteralContextBits - - Refer to the LZMA documentation for a more detailed explanation of - these fields. - -### 5.8.9 - Data compressed with method 14, LZMA, MAY include an end-of-stream - (EOS) marker ending the compressed data stream. This marker is not - required, but its use is highly recommended to facilitate processing - and implementers SHOULD include the EOS marker whenever possible. - When the EOS marker is used, general purpose bit 1 MUSY be set. If - general purpose bit 1 is not set, the EOS marker is not present. - -## 5.9 WavPack - Method 97 - -### 5.9.1 - Information describing the use of compression method 97 is - provided by WinZIP International, LLC. This method relies on the - open source WavPack audio compression utility developed by David Bryant. - Information on WavPack is available at www.wavpack.com. Please consult - with the author of this algorithm for information on terms and - restrictions on use. - -### 5.9.2 - WavPack data for a file begins immediately after the end of the - local header data. This data is the output from WavPack compression - routines. Within the ZIP file, the use of WavPack compression is - indicated by setting the compression method field to a value of 97 - in both the local header and the central directory header. The Version - needed to extract and version made by fields use the same values as are - used for data compressed using the Deflate algorithm. - -### 5.9.3 - An implementation note for storing digital sample data when using - WavPack compression within ZIP files is that all of the bytes of - the sample data SHOULD be compressed. This includes any unused - bits up to the byte boundary. An example is a 2 byte sample that - uses only 12 bits for the sample data with 4 unused bits. If only - 12 bits are passed as the sample size to the WavPack routines, the 4 - unused bits will be set to 0 on extraction regardless of their original - state. To avoid this, the full 16 bits of the sample data size - SHOULD be provided. - -## 5.10 PPMd - Method 98 - -### 5.10.1 - PPMd is a data compression algorithm developed by Dmitry Shkarin - which includes a carryless rangecoder developed by Dmitry Subbotin. - This algorithm is based on predictive phrase matching on multiple - order contexts. Information and source code for this algorithm - can be found on the internet. Consult with the author of this - algorithm for information on terms or restrictions on use. - -### 5.10.2 - Support for PPMd within the ZIP format currently is provided only - for version I, revision 1 of the algorithm. Storage requirements - for using this algorithm are as follows: - -### 5.10.3 - Parameters needed to control the algorithm are stored in the two - bytes immediately preceding the compressed data. These bytes are - used to store the following fields: - - Model order - sets the maximum model order, default is 8, possible - values are from 2 to 16 inclusive - - Sub-allocator size - sets the size of sub-allocator in MB, default is 50, - possible values are from 1MB to 256MB inclusive - - Model restoration method - sets the method used to restart context - model at memory insufficiency, values are: - - 0 - restarts model from scratch - default - 1 - cut off model - decreases performance by as much as 2x - 2 - freeze context tree - not recommended - -### 5.10.4 - An example for packing these fields into the 2 byte storage field is - illustrated below. These values are stored in Intel low-byte/high-byte - order. - - wPPMd = (Model order - 1) + - ((Sub-allocator size - 1) << 4) + - (Model restoration method << 12) - - -## 5.11 AE-x Encryption marker - Method 99 - -## 5.12 JPEG variant - Method 96 - -## 5.13 PKWARE Data Compression Library Imploding - Method 10 - -## 5.14 Reserved - Method 11 - -## 5.15 Reserved - Method 13 - -## 5.16 Reserved - Method 15 - -## 5.17 IBM z/OS CMPSC Compression - Method 16 - -Method 16 utilizes the IBM hardware compression facility available -on most IBM mainframes. Hardware compression can significantly -increase the speed of data compression. This method uses a variant -of the LZ78 algorithm. CMPSC hardware compression is performed -using the COMPRESSION CALL instruction. - -ZIP archives can be created using this method only on mainframes -supporting the CP instruction. Extraction MAY occur on any -platform supporting this compression algorithm. Use of this -algorithm requires creation of a compression dictionary and -an expansion dictionary. The expansion dictionary MUST be -placed into the ZIP archive for use on the system where -extraction will occur. - -Additional information on this compression algorithm and dictionaries -can be found in the IBM provided document titled IBM ESA/390 Data -Compression (SA22-7208-01). Storage requirements for using CMPSC -compression are as follows. - -The format for the compressed data stream placed into the ZIP -archive following the Local Header is: - - [dictionary header] - [expansion dictionary] - [CMPSC compressed data] - -If encryption is used to encrypt a file compressed with CMPSC, these -sections MUST be encrypted as a single entity. - -The format of the dictionary header is: - - Value Size Description - ----- ---- ----------- - Version 1 byte 1 - Flags/Symsize 1 byte Processing flags and - symbol size - DictionaryLen 4 bytes Length of the - expansion dictionary - -Explanation of processing flags and symbol size: - -The high 4 bits are used to store the processing flags. The low -4 bits represent the size of a symbol, in bits (values range -from 9-13). Flag values are defined below. - - 0x80 - expansion dictionary - 0x40 - expansion dictionary is compressed using Deflate - 0x20 - Reserved - 0x10 - Reserved - - -## 5.18 Reserved - Method 17 - -## 5.19 IBM TERSE - Method 18 - -## 5.20 IBM LZ77 z Architecture - Method 19 - -## 6.0 Traditional PKWARE Encryption - -### 6.0.1 - The following information discusses the decryption steps - required to support traditional PKWARE encryption. This - form of encryption is considered weak by today's standards - and its use is recommended only for situations with - low security needs or for compatibility with older .ZIP - applications. - -## 6.1 Traditional PKWARE Decryption - -### 6.1.1 - PKWARE is grateful to Mr. Roger Schlafly for his expert - contribution towards the development of PKWARE's traditional - encryption. - -### 6.1.2 - PKZIP encrypts the compressed data stream. Encrypted files - MUST be decrypted before they can be extracted to their original - form. - -### 6.1.3 - Each encrypted file has an extra 12 bytes stored at the start - of the data area defining the encryption header for that file. The - encryption header is originally set to random values, and then - itself encrypted, using three, 32-bit keys. The key values are - initialized using the supplied encryption password. After each byte - is encrypted, the keys are then updated using pseudo-random number - generation techniques in combination with the same CRC-32 algorithm - used in PKZIP and described elsewhere in this document. - -### 6.1.4 - The following are the basic steps required to decrypt a file: - - 1) Initialize the three 32-bit keys with the password. - 2) Read and decrypt the 12-byte encryption header, further - initializing the encryption keys. - 3) Read and decrypt the compressed data stream using the - encryption keys. - -### 6.1.5 - Initializing the encryption keys - - Key(0) <- 305419896 - Key(1) <- 591751049 - Key(2) <- 878082192 - - loop for i <- 0 to length(password)-1 - update_keys(password(i)) - end loop - - Where update_keys() is defined as: - - update_keys(char): - Key(0) <- crc32(key(0),char) - Key(1) <- Key(1) + (Key(0) & 000000ffH) - Key(1) <- Key(1) * 134775813 + 1 - Key(2) <- crc32(key(2),key(1) >> 24) - end update_keys - - Where crc32(old_crc,char) is a routine that given a CRC value and a - character, returns an updated CRC value after applying the CRC-32 - algorithm described elsewhere in this document. - - ### 6.1.6 - Decrypting the encryption header - - The purpose of this step is to further initialize the encryption - keys, based on random data, to render a plaintext attack on the - data ineffective. - - Read the 12-byte encryption header into Buffer, in locations - Buffer(0) thru Buffer(11). - - loop for i <- 0 to 11 - C <- buffer(i) ^ decrypt_byte() - update_keys(C) - buffer(i) <- C - end loop - - Where decrypt_byte() is defined as: - - unsigned char decrypt_byte() - local unsigned short temp - temp <- Key(2) | 2 - decrypt_byte <- (temp * (temp ^ 1)) >> 8 - end decrypt_byte - - After the header is decrypted, the last 1 or 2 bytes in Buffer - SHOULD be the high-order word/byte of the CRC for the file being - decrypted, stored in Intel low-byte/high-byte order. Versions of - PKZIP prior to 2.0 used a 2 byte CRC check; a 1 byte CRC check is - used on versions after 2.0. This can be used to test if the password - supplied is correct or not. - -### 6.1.7 - Decrypting the compressed data stream - - The compressed data stream can be decrypted as follows: - - loop until done - read a character into C - Temp <- C ^ decrypt_byte() - update_keys(temp) - output Temp - end loop - - -## 7.0 Strong Encryption Specification - -### 7.0.1 - Portions of the Strong Encryption technology defined in this - specification are covered under patents and pending patent applications. - Refer to the section in this document entitled "Incorporating - PKWARE Proprietary Technology into Your Product" for more information. - -## 7.1 Strong Encryption Overview - -### 7.1.1 - Version 5.x of this specification introduced support for strong - encryption algorithms. These algorithms can be used with either - a password or an X.509v3 digital certificate to encrypt each file. - This format specification supports either password or certificate - based encryption to meet the security needs of today, to enable - interoperability between users within both PKI and non-PKI - environments, and to ensure interoperability between different - computing platforms that are running a ZIP program. - -### 7.1.2 - Password based encryption is the most common form of encryption - people are familiar with. However, inherent weaknesses with - passwords (e.g. susceptibility to dictionary/brute force attack) - as well as password management and support issues make certificate - based encryption a more secure and scalable option. Industry - efforts and support are defining and moving towards more advanced - security solutions built around X.509v3 digital certificates and - Public Key Infrastructures(PKI) because of the greater scalability, - administrative options, and more robust security over traditional - password based encryption. - -### 7.1.3 - Most standard encryption algorithms are supported with this - specification. Reference implementations for many of these - algorithms are available from either commercial or open source - distributors. Readily available cryptographic toolkits make - implementation of the encryption features straight-forward. - This document is not intended to provide a treatise on data - encryption principles or theory. Its purpose is to document the - data structures required for implementing interoperable data - encryption within the .ZIP format. It is strongly recommended that - you have a good understanding of data encryption before reading - further. - -### 7.1.4 - The algorithms introduced in Version 5.0 of this specification - include: - - RC2 40 bit, 64 bit, and 128 bit - RC4 40 bit, 64 bit, and 128 bit - DES - 3DES 112 bit and 168 bit - - Version 5.1 adds support for the following: - - AES 128 bit, 192 bit, and 256 bit - - -### 7.1.5 - Version 6.1 introduces encryption data changes to support - interoperability with Smartcard and USB Token certificate storage - methods which do not support the OAEP strengthening standard. - -### 7.1.6 - Version 6.2 introduces support for encrypting metadata by compressing - and encrypting the central directory data structure to reduce information - leakage. Information leakage can occur in legacy ZIP applications - through exposure of information about a file even though that file is - stored encrypted. The information exposed consists of file - characteristics stored within the records and fields defined by this - specification. This includes data such as a file's name, its original - size, timestamp and CRC32 value. - -### 7.1.7 - Version 6.3 introduces support for encrypting data using the Blowfish - and Twofish algorithms. These are symmetric block ciphers developed - by Bruce Schneier. Blowfish supports using a variable length key from - 32 to 448 bits. Block size is 64 bits. Implementations SHOULD use 16 - rounds and the only mode supported within ZIP files is CBC. Twofish - supports key sizes 128, 192 and 256 bits. Block size is 128 bits. - Implementations SHOULD use 16 rounds and the only mode supported within - ZIP files is CBC. Information and source code for both Blowfish and - Twofish algorithms can be found on the internet. Consult with the author - of these algorithms for information on terms or restrictions on use. - -### 7.1.8 - Central Directory Encryption provides greater protection against - information leakage by encrypting the Central Directory structure and - by masking key values that are replicated in the unencrypted Local - Header. ZIP compatible programs that cannot interpret an encrypted - Central Directory structure cannot rely on the data in the corresponding - Local Header for decompression information. - -### 7.1.9 - Extra Field records that MAY contain information about a file that SHOULD - not be exposed SHOULD NOT be stored in the Local Header and SHOULD only - be written to the Central Directory where they can be encrypted. This - design currently does not support streaming. Information in the End of - Central Directory record, the Zip64 End of Central Directory Locator, - and the Zip64 End of Central Directory records are not encrypted. Access - to view data on files within a ZIP file with an encrypted Central Directory - requires the appropriate password or private key for decryption prior to - viewing any files, or any information about the files, in the archive. - -### 7.1.10 - Older ZIP compatible programs not familiar with the Central Directory - Encryption feature will no longer be able to recognize the Central - Directory and MAY assume the ZIP file is corrupt. Programs that - attempt streaming access using Local Headers will see invalid - information for each file. Central Directory Encryption need not be - used for every ZIP file. Its use is recommended for greater security. - ZIP files not using Central Directory Encryption SHOULD operate as - in the past. - -### 7.1.11 - This strong encryption feature specification is intended to provide for - scalable, cross-platform encryption needs ranging from simple password - encryption to authenticated public/private key encryption. - -### 7.1.12 - Encryption provides data confidentiality and privacy. It is - recommended that you combine X.509 digital signing with encryption - to add authentication and non-repudiation. - - -## 7.2 Single Password Symmetric Encryption Method - -### 7.2.1 - The Single Password Symmetric Encryption Method using strong - encryption algorithms operates similarly to the traditional - PKWARE encryption defined in this format. Additional data - structures are added to support the processing needs of the - strong algorithms. - - The Strong Encryption data structures are: - -### 7.2.2 - General Purpose Bits - Bits 0 and 6 of the General Purpose bit - flag in both local and central header records. Both bits set - indicates strong encryption. Bit 13, when set indicates the Central - Directory is encrypted and that selected fields in the Local Header - are masked to hide their actual value. - - -### 7.2.3 - Extra Field 0x0017 in central header only. - - Fields to consider in this record are: - - ### 7.2.3.1 -Format - the data format identifier for this record. The only - value allowed at this time is the integer value 2. - - ### 7.2.3.2 -AlgId - integer identifier of the encryption algorithm from the - following range - - 0x6601 - DES - 0x6602 - RC2 (version needed to extract < 5.2) - 0x6603 - 3DES 168 - 0x6609 - 3DES 112 - 0x660E - AES 128 - 0x660F - AES 192 - 0x6610 - AES 256 - 0x6702 - RC2 (version needed to extract >= 5.2) - 0x6720 - Blowfish - 0x6721 - Twofish - 0x6801 - RC4 - 0xFFFF - Unknown algorithm - - ### 7.2.3.3 -Bitlen - Explicit bit length of key - - 32 - 448 bits - - ### 7.2.3.4 -Flags - Processing flags needed for decryption - - 0x0001 - Password is required to decrypt - 0x0002 - Certificates only - 0x0003 - Password or certificate required to decrypt - - Values > 0x0003 reserved for certificate processing - - - ### 7.2.4 - Decryption header record preceding compressed file data. - - -Decryption Header: - - Value Size Description - ----- ---- ----------- - IVSize 2 bytes Size of initialization vector (IV) - IVData IVSize Initialization vector for this file - Size 4 bytes Size of remaining decryption header data - Format 2 bytes Format definition for this record - AlgID 2 bytes Encryption algorithm identifier - Bitlen 2 bytes Bit length of encryption key - Flags 2 bytes Processing flags - ErdSize 2 bytes Size of Encrypted Random Data - ErdData ErdSize Encrypted Random Data - Reserved1 4 bytes Reserved certificate processing data - Reserved2 (var) Reserved for certificate processing data - VSize 2 bytes Size of password validation data - VData VSize-4 Password validation data - VCRC32 4 bytes Standard ZIP CRC32 of password validation data - - ### 7.2.4.1 -IVData - The size of the IV SHOULD match the algorithm block size. - The IVData can be completely random data. If the size of - the randomly generated data does not match the block size - it SHOULD be complemented with zero's or truncated as - necessary. If IVSize is 0,then IV = CRC32 + Uncompressed - File Size (as a 64 bit little-endian, unsigned integer value). - - ### 7.2.4.2 -Format - the data format identifier for this record. The only - value allowed at this time is the integer value 3. - - ### 7.2.4.3 -AlgId - integer identifier of the encryption algorithm from the - following range - - 0x6601 - DES - 0x6602 - RC2 (version needed to extract < 5.2) - 0x6603 - 3DES 168 - 0x6609 - 3DES 112 - 0x660E - AES 128 - 0x660F - AES 192 - 0x6610 - AES 256 - 0x6702 - RC2 (version needed to extract >= 5.2) - 0x6720 - Blowfish - 0x6721 - Twofish - 0x6801 - RC4 - 0xFFFF - Unknown algorithm - - ### 7.2.4.4 -Bitlen - Explicit bit length of key - - 32 - 448 bits - - ### 7.2.4.5 -Flags - Processing flags needed for decryption - - 0x0001 - Password is required to decrypt - 0x0002 - Certificates only - 0x0003 - Password or certificate required to decrypt - - Values > 0x0003 reserved for certificate processing - - ### 7.2.4.6 -ErdData - Encrypted random data is used to store random data that - is used to generate a file session key for encrypting - each file. SHA1 is used to calculate hash data used to - derive keys. File session keys are derived from a master - session key generated from the user-supplied password. - If the Flags field in the decryption header contains - the value 0x4000, then the ErdData field MUST be - decrypted using 3DES. If the value 0x4000 is not set, - then the ErdData field MUST be decrypted using AlgId. - - - ### 7.2.4.7 -Reserved1 - Reserved for certificate processing, if value is - zero, then Reserved2 data is absent. See the explanation - under the Certificate Processing Method for details on - this data structure. - - ### 7.2.4.8 -Reserved2 - If present, the size of the Reserved2 data structure - is located by skipping the first 4 bytes of this field - and using the next 2 bytes as the remaining size. See - the explanation under the Certificate Processing Method - for details on this data structure. - - ### 7.2.4.9 -VSize - This size value will always include the 4 bytes of the - VCRC32 data and will be greater than 4 bytes. - -### 7.2.4 - .10 VData - Random data for password validation. This data is VSize - in length and VSize MUST be a multiple of the encryption - block size. VCRC32 is a checksum value of VData. - VData and VCRC32 are stored encrypted and start the - stream of encrypted data for a file. - - -### 7.2.5 - Useful Tips - - ### 7.2.5.1 -Strong Encryption is always applied to a file after compression. The - block oriented algorithms all operate in Cypher Block Chaining (CBC) - mode. The block size used for AES encryption is 16. All other block - algorithms use a block size of 8. Two IDs are defined for RC2 to - account for a discrepancy found in the implementation of the RC2 - algorithm in the cryptographic library on Windows XP SP1 and all - earlier versions of Windows. It is recommended that zero length files - not be encrypted, however programs SHOULD be prepared to extract them - if they are found within a ZIP file. - - ### 7.2.5.2 -A pseudo-code representation of the encryption process is as follows: - - Password = GetUserPassword() - MasterSessionKey = DeriveKey(SHA1(Password)) - RD = CryptographicStrengthRandomData() - For Each File - IV = CryptographicStrengthRandomData() - VData = CryptographicStrengthRandomData() - VCRC32 = CRC32(VData) - FileSessionKey = DeriveKey(SHA1(IV + RD) - ErdData = Encrypt(RD,MasterSessionKey,IV) - Encrypt(VData + VCRC32 + FileData, FileSessionKey,IV) - Done - - ### 7.2.5.3 -The function names and parameter requirements will depend on - the choice of the cryptographic toolkit selected. Almost any - toolkit supporting the reference implementations for each - algorithm can be used. The RSA BSAFE(r), OpenSSL, and Microsoft - CryptoAPI libraries are all known to work well. - - - 7.3 Single Password - Central Directory Encryption - -------------------------------------------------- - -### 7.3.1 - Central Directory Encryption is achieved within the .ZIP format by - encrypting the Central Directory structure. This encapsulates the metadata - most often used for processing .ZIP files. Additional metadata is stored for - redundancy in the Local Header for each file. The process of concealing - metadata by encrypting the Central Directory does not protect the data within - the Local Header. To avoid information leakage from the exposed metadata - in the Local Header, the fields containing information about a file are masked. - -### 7.3.2 - Local Header - - Masking replaces the true content of the fields for a file in the Local - Header with false information. When masked, the Local Header is not - suitable for streaming access and the options for data recovery of damaged - archives is reduced. Extra Data fields that MAY contain confidential - data SHOULD NOT be stored within the Local Header. The value set into - the Version needed to extract field SHOULD be the correct value needed to - extract the file without regard to Central Directory Encryption. The fields - within the Local Header targeted for masking when the Central Directory is - encrypted are: - - Field Name Mask Value - ------------------ --------------------------- - compression method 0 - last mod file time 0 - last mod file date 0 - crc-32 0 - compressed size 0 - uncompressed size 0 - file name (variable size) Base 16 value from the - range 1 - 0xFFFFFFFFFFFFFFFF - represented as a string whose - size will be set into the - file name length field - - The Base 16 value assigned as a masked file name is simply a sequentially - incremented value for each file starting with 1 for the first file. - Modifications to a ZIP file MAY cause different values to be stored for - each file. For compatibility, the file name field in the Local Header - SHOULD NOT be left blank. As of Version 6.2 of this specification, - the Compression Method and Compressed Size fields are not yet masked. - Fields having a value of 0xFFFF or 0xFFFFFFFF for the ZIP64 format - SHOULD NOT be masked. - -### 7.3.3 - Encrypting the Central Directory - - Encryption of the Central Directory does not include encryption of the - Central Directory Signature data, the Zip64 End of Central Directory - record, the Zip64 End of Central Directory Locator, or the End - of Central Directory record. The ZIP file comment data is never - encrypted. - - Before encrypting the Central Directory, it MAY optionally be compressed. - Compression is not required, but for storage efficiency it is assumed - this structure will be compressed before encrypting. Similarly, this - specification supports compressing the Central Directory without - requiring that it also be encrypted. Early implementations of this - feature will assume the encryption method applied to files matches the - encryption applied to the Central Directory. - - Encryption of the Central Directory is done in a manner similar to - that of file encryption. The encrypted data is preceded by a - decryption header. The decryption header is known as the Archive - Decryption Header. The fields of this record are identical to - the decryption header preceding each encrypted file. The location - of the Archive Decryption Header is determined by the value in the - Start of the Central Directory field in the Zip64 End of Central - Directory record. When the Central Directory is encrypted, the - Zip64 End of Central Directory record will always be present. - - The layout of the Zip64 End of Central Directory record for all - versions starting with 6.2 of this specification will follow the - Version 2 format. The Version 2 format is as follows: - - The leading fixed size fields within the Version 1 format for this - record remain unchanged. The record signature for both Version 1 - and Version 2 will be 0x06064b50. Immediately following the last - byte of the field known as the Offset of Start of Central - Directory With Respect to the Starting Disk Number will begin the - new fields defining Version 2 of this record. - -### 7.3.4 - New fields for Version 2 - - Note: all fields stored in Intel low-byte/high-byte order. - - Value Size Description - ----- ---- ----------- - Compression Method 2 bytes Method used to compress the - Central Directory - Compressed Size 8 bytes Size of the compressed data - Original Size 8 bytes Original uncompressed size - AlgId 2 bytes Encryption algorithm ID - BitLen 2 bytes Encryption key length - Flags 2 bytes Encryption flags - HashID 2 bytes Hash algorithm identifier - Hash Length 2 bytes Length of hash data - Hash Data (variable) Hash data - - The Compression Method accepts the same range of values as the - corresponding field in the Central Header. - - The Compressed Size and Original Size values will not include the - data of the Central Directory Signature which is compressed or - encrypted. - - The AlgId, BitLen, and Flags fields accept the same range of values - the corresponding fields within the 0x0017 record. - - Hash ID identifies the algorithm used to hash the Central Directory - data. This data does not have to be hashed, in which case the - values for both the HashID and Hash Length will be 0. Possible - values for HashID are: - - Value Algorithm - ------ --------- - 0x0000 none - 0x0001 CRC32 - 0x8003 MD5 - 0x8004 SHA1 - 0x8007 RIPEMD160 - 0x800C SHA256 - 0x800D SHA384 - 0x800E SHA512 - -### 7.3.5 - When the Central Directory data is signed, the same hash algorithm - used to hash the Central Directory for signing SHOULD be used. - This is recommended for processing efficiency, however, it is - permissible for any of the above algorithms to be used independent - of the signing process. - - The Hash Data will contain the hash data for the Central Directory. - The length of this data will vary depending on the algorithm used. - - The Version Needed to Extract SHOULD be set to 62. - - The value for the Total Number of Entries on the Current Disk will - be 0. These records will no longer support random access when - encrypting the Central Directory. - -### 7.3.6 - When the Central Directory is compressed and/or encrypted, the - End of Central Directory record will store the value 0xFFFFFFFF - as the value for the Total Number of Entries in the Central - Directory. The value stored in the Total Number of Entries in - the Central Directory on this Disk field will be 0. The actual - values will be stored in the equivalent fields of the Zip64 - End of Central Directory record. - -### 7.3.7 - Decrypting and decompressing the Central Directory is accomplished - in the same manner as decrypting and decompressing a file. - - 7.4 Certificate Processing Method - --------------------------------- - - The Certificate Processing Method for ZIP file encryption - defines the following additional data fields: - -### 7.4.1 - Certificate Flag Values - - Additional processing flags that can be present in the Flags field of both - the 0x0017 field of the central directory Extra Field and the Decryption - header record preceding compressed file data are: - - 0x0007 - reserved for future use - 0x000F - reserved for future use - 0x0100 - Indicates non-OAEP key wrapping was used. If this - this field is set, the version needed to extract MUST - be at least 61. This means OAEP key wrapping is not - used when generating a Master Session Key using - ErdData. - 0x4000 - ErdData MUST be decrypted using 3DES-168, otherwise use the - same algorithm used for encrypting the file contents. - 0x8000 - reserved for future use - - -### 7.4.2 - CertData - Extra Field 0x0017 record certificate data structure - - The data structure used to store certificate data within the section - of the Extra Field defined by the CertData field of the 0x0017 - record are as shown: - - Value Size Description - ----- ---- ----------- - RCount 4 bytes Number of recipients. - HashAlg 2 bytes Hash algorithm identifier - HSize 2 bytes Hash size - SRList (var) Simple list of recipients hashed public keys - - - RCount This defines the number intended recipients whose - public keys were used for encryption. This identifies - the number of elements in the SRList. - - HashAlg This defines the hash algorithm used to calculate - the public key hash of each public key used - for encryption. This field currently supports - only the following value for SHA-1 - - 0x8004 - SHA1 - - HSize This defines the size of a hashed public key. - - SRList This is a variable length list of the hashed - public keys for each intended recipient. Each - element in this list is HSize. The total size of - SRList is determined using RCount * HSize. - - -### 7.4.3 - Reserved1 - Certificate Decryption Header Reserved1 Data - - Value Size Description - ----- ---- ----------- - RCount 4 bytes Number of recipients. - - RCount This defines the number intended recipients whose - public keys were used for encryption. This defines - the number of elements in the REList field defined below. - - -### 7.4.4 - Reserved2 - Certificate Decryption Header Reserved2 Data Structures - - - Value Size Description - ----- ---- ----------- - HashAlg 2 bytes Hash algorithm identifier - HSize 2 bytes Hash size - REList (var) List of recipient data elements - - - HashAlg This defines the hash algorithm used to calculate - the public key hash of each public key used - for encryption. This field currently supports - only the following value for SHA-1 - - 0x8004 - SHA1 - - HSize This defines the size of a hashed public key - defined in REHData. - - REList This is a variable length of list of recipient data. - Each element in this list consists of a Recipient - Element data structure as follows: - - - Recipient Element (REList) Data Structure: - - Value Size Description - ----- ---- ----------- - RESize 2 bytes Size of REHData + REKData - REHData HSize Hash of recipients public key - REKData (var) Simple key blob - - - RESize This defines the size of an individual REList - element. This value is the combined size of the - REHData field + REKData field. REHData is defined by - HSize. REKData is variable and can be calculated - for each REList element using RESize and HSize. - - REHData Hashed public key for this recipient. - - REKData Simple Key Blob. The format of this data structure - is identical to that defined in the Microsoft - CryptoAPI and generated using the CryptExportKey() - function. The version of the Simple Key Blob - supported at this time is 0x02 as defined by - Microsoft. - -## 7.5 Certificate Processing - Central Directory Encryption - -### 7.5.1 - Central Directory Encryption using Digital Certificates will - operate in a manner similar to that of Single Password Central - Directory Encryption. This record will only be present when there - is data to place into it. Currently, data is placed into this - record when digital certificates are used for either encrypting - or signing the files within a ZIP file. When only password - encryption is used with no certificate encryption or digital - signing, this record is not currently needed. When present, this - record will appear before the start of the actual Central Directory - data structure and will be located immediately after the Archive - Decryption Header if the Central Directory is encrypted. - -### 7.5.2 - The Archive Extra Data record will be used to store the following - information. Additional data MAY be added in future versions. - - Extra Data Fields: - - 0x0014 - PKCS#7 Store for X.509 Certificates - 0x0016 - X.509 Certificate ID and Signature for central directory - 0x0019 - PKCS#7 Encryption Recipient Certificate List - - The 0x0014 and 0x0016 Extra Data records that otherwise would be - located in the first record of the Central Directory for digital - certificate processing. When encrypting or compressing the Central - Directory, the 0x0014 and 0x0016 records MUST be located in the - Archive Extra Data record and they SHOULD NOT remain in the first - Central Directory record. The Archive Extra Data record will also - be used to store the 0x0019 data. - -### 7.5.3 - When present, the size of the Archive Extra Data record will be - included in the size of the Central Directory. The data of the - Archive Extra Data record will also be compressed and encrypted - along with the Central Directory data structure. - -## 7.6 Certificate Processing Differences - -### 7.6.1 - The Certificate Processing Method of encryption differs from the - Single Password Symmetric Encryption Method as follows. Instead - of using a user-defined password to generate a master session key, - cryptographically random data is used. The key material is then - wrapped using standard key-wrapping techniques. This key material - is wrapped using the public key of each recipient that will need - to decrypt the file using their corresponding private key. - -### 7.6.2 - This specification currently assumes digital certificates will follow - the X.509 V3 format for 1024 bit and higher RSA format digital - certificates. Implementation of this Certificate Processing Method - requires supporting logic for key access and management. This logic - is outside the scope of this specification. - -## 7.7 OAEP Processing with Certificate-based Encryption - -### 7.7.1 - OAEP stands for Optimal Asymmetric Encryption Padding. It is a - strengthening technique used for small encoded items such as decryption - keys. This is commonly applied in cryptographic key-wrapping techniques - and is supported by PKCS #1. Versions 5.0 and 6.0 of this specification - were designed to support OAEP key-wrapping for certificate-based - decryption keys for additional security. - -### 7.7.2 - Support for private keys stored on Smartcards or Tokens introduced - a conflict with this OAEP logic. Most card and token products do - not support the additional strengthening applied to OAEP key-wrapped - data. In order to resolve this conflict, versions 6.1 and above of this - specification will no longer support OAEP when encrypting using - digital certificates. - -### 7.7.3 - Versions of PKZIP available during initial development of the - certificate processing method set a value of 61 into the - version needed to extract field for a file. This indicates that - non-OAEP key wrapping is used. This affects certificate encryption - only, and password encryption functions SHOULD NOT be affected by - this value. This means values of 61 MAY be found on files encrypted - with certificates only, or on files encrypted with both password - encryption and certificate encryption. Files encrypted with both - methods can safely be decrypted using the password methods documented. - -## 7.8 Additional Encryption/Decryption Data Records - -### 7.8.1 - Additional information MAY be stored within a ZIP file in support - of the strong password and certificate encryption methods defined above. - These include, but are not limited to the following record types. - - 0x0021 Policy Decryption Key Record - 0x0022 Smartcrypt Key Provider Record - 0x0023 Smartcrypt Policy Key Data Record - -## 8.0 Splitting and Spanning ZIP files - - 8.1 Spanned ZIP files - -### 8.1.1 - Spanning is the process of segmenting a ZIP file across - multiple removable media. This support has typically only - been provided for DOS formatted floppy diskettes. - - 8.2 Split ZIP files - -### 8.2.1 - File splitting is a newer derivation of spanning. - Splitting follows the same segmentation process as - spanning, however, it does not require writing each - segment to a unique removable medium and instead supports - placing all pieces onto local or non-removable locations - such as file systems, local drives, folders, etc. - - 8.3 File Naming Differences - -### 8.3.1 - A key difference between spanned and split ZIP files is - that all pieces of a spanned ZIP file have the same name. - Since each piece is written to a separate volume, no name - collisions occur and each segment can reuse the original - .ZIP file name given to the archive. - -### 8.3.2 - Sequence ordering for DOS spanned archives uses the DOS - volume label to determine segment numbers. Volume labels - for each segment are written using the form PKBACK#xxx, - where xxx is the segment number written as a decimal - value from 001 - nnn. - -### 8.3.3 - Split ZIP files are typically written to the same location - and are subject to name collisions if the spanned name - format is used since each segment will reside on the same - drive. To avoid name collisions, split archives are named - as follows. - - Segment 1 = filename.z01 - Segment n-1 = filename.z(n-1) - Segment n = filename.zip - -### 8.3.4 - The .ZIP extension is used on the last segment to support - quickly reading the central directory. The segment number - n SHOULD be a decimal value. - - 8.4 Spanned Self-extracting ZIP Files - -### 8.4.1 - Spanned ZIP files MAY be PKSFX Self-extracting ZIP files. - PKSFX files MAY also be split, however, in this case - the first segment MUST be named filename.exe. The first - segment of a split PKSFX archive MUST be large enough to - include the entire executable program. - - 8.5 Capacities and Markers - -### 8.5.1 - Capacities for split archives are as follows: - - Maximum number of segments = 4,294,967,295 - 1 - Maximum .ZIP segment size = 4,294,967,295 bytes - Minimum segment size = 64K - Maximum PKSFX segment size = 2,147,483,647 bytes - -### 8.5.2 - Segment sizes MAY be different however by convention, all - segment sizes SHOULD be the same with the exception of the - last, which MAY be smaller. Local and central directory - header records MUST NOT be split across a segment boundary. - When writing a header record, if the number of bytes remaining - within a segment is less than the size of the header record, - end the current segment and write the header at the start - of the next segment. The central directory MAY span segment - boundaries, but no single record in the central directory - SHOULD be split across segments. - -### 8.5.3 - Spanned/Split archives created using PKZIP for Windows - (V2.50 or greater), PKZIP Command Line (V2.50 or greater), - or PKZIP Explorer will include a special spanning - signature as the first 4 bytes of the first segment of - the archive. This signature (0x08074b50) will be - followed immediately by the local header signature for - the first file in the archive. - -### 8.5.4 - A special spanning marker MAY also appear in spanned/split - archives if the spanning or splitting process starts but - only requires one segment. In this case the 0x08074b50 - signature will be replaced with the temporary spanning - marker signature of 0x30304b50. Split archives can - only be uncompressed by other versions of PKZIP that - know how to create a split archive. - -### 8.5.5 - The signature value 0x08074b50 is also used by some - ZIP implementations as a marker for the Data Descriptor - record. Conflict in this alternate assignment can be - avoided by ensuring the position of the signature - within the ZIP file to determine the use for which it - is intended. - -## 9.0 Change Process - - 9.1 In order for the .ZIP file format to remain a viable technology, this - specification SHOULD be considered as open for periodic review and - revision. Although this format was originally designed with a - certain level of extensibility, not all changes in technology - (present or future) were or will be necessarily considered in its - design. - - 9.2 If your application requires new definitions to the - extensible sections in this format, or if you would like to - submit new data structures or new capabilities, please forward - your request to zipformat@pkware.com. All submissions will be - reviewed by the ZIP File Specification Committee for possible - inclusion into future versions of this specification. - - 9.3 Periodic revisions to this specification will be published as - DRAFT or as FINAL status to ensure interoperability. We encourage - comments and feedback that MAY help improve clarity or content. - - -## 10.0 Incorporating PKWARE Proprietary Technology into Your Product - - 10.1 The Use or Implementation in a product of APPNOTE technological - components pertaining to either strong encryption or patching requires - a separate, executed license agreement from PKWARE. Please contact - PKWARE at zipformat@pkware.com or +1-414-289-9788 with regard to - acquiring such a license. - - 10.2 Additional information regarding PKWARE proprietary technology is - available at http://www.pkware.com/appnote. - -## 11.0 Acknowledgements - - In addition to the above mentioned contributors to PKZIP and PKUNZIP, - PKWARE would like to extend special thanks to Robert Mahoney for - suggesting the extension .ZIP for this software. - -## 12.0 References - - Fiala, Edward R., and Greene, Daniel H., "Data compression with - finite windows", Communications of the ACM, Volume 32, Number 4, - April 1989, pages 490-505. - - Held, Gilbert, "Data Compression, Techniques and Applications, - Hardware and Software Considerations", John Wiley & Sons, 1987. - - Huffman, D.A., "A method for the construction of minimum-redundancy - codes", Proceedings of the IRE, Volume 40, Number 9, September 1952, - pages 1098-1101. - - Nelson, Mark, "LZW Data Compression", Dr. Dobbs Journal, Volume 14, - Number 10, October 1989, pages 29-37. - - Nelson, Mark, "The Data Compression Book", M&T Books, 1991. - - Storer, James A., "Data Compression, Methods and Theory", - Computer Science Press, 1988 - - Welch, Terry, "A Technique for High-Performance Data Compression", - IEEE Computer, Volume 17, Number 6, June 1984, pages 8-19. - - Ziv, J. and Lempel, A., "A universal algorithm for sequential data - compression", Communications of the ACM, Volume 30, Number 6, - June 1987, pages 520-540. - - Ziv, J. and Lempel, A., "Compression of individual sequences via - variable-rate coding", IEEE Transactions on Information Theory, - Volume 24, Number 5, September 1978, pages 530-536. - - -APPENDIX A - AS/400 Extra Field (0x0065) Attribute Definitions --------------------------------------------------------------- - -A.1 Field Definition Structure: - - a. field length including length 2 bytes Big Endian - b. field code 2 bytes - c. data x bytes - -A.2 Field Code Description - - 4001 Source type i.e. CLP etc - 4002 The text description of the library - 4003 The text description of the file - 4004 The text description of the member - 4005 x'F0' or 0 is PF-DTA, x'F1' or 1 is PF_SRC - 4007 Database Type Code 1 byte - 4008 Database file and fields definition - 4009 GZIP file type 2 bytes - 400B IFS code page 2 bytes - 400C IFS Time of last file status change 4 bytes - 400D IFS Access Time 4 bytes - 400E IFS Modification time 4 bytes - 005C Length of the records in the file 2 bytes - 0068 GZIP two words 8 bytes - -APPENDIX B - z/OS Extra Field (0x0065) Attribute Definitions ------------------------------------------------------------- - -B.1 Field Definition Structure: - - a. field length including length 2 bytes Big Endian - b. field code 2 bytes - c. data x bytes - -B.2 Field Code Description - - 0001 File Type 2 bytes - 0002 NonVSAM Record Format 1 byte - 0003 Reserved - 0004 NonVSAM Block Size 2 bytes Big Endian - 0005 Primary Space Allocation 3 bytes Big Endian - 0006 Secondary Space Allocation 3 bytes Big Endian - 0007 Space Allocation Type1 byte flag - 0008 Modification Date Retired with PKZIP 5.0 + - 0009 Expiration Date Retired with PKZIP 5.0 + - 000A PDS Directory Block Allocation 3 bytes Big Endian binary value - 000B NonVSAM Volume List variable - 000C UNIT Reference Retired with PKZIP 5.0 + - 000D DF/SMS Management Class 8 bytes EBCDIC Text Value - 000E DF/SMS Storage Class 8 bytes EBCDIC Text Value - 000F DF/SMS Data Class 8 bytes EBCDIC Text Value - 0010 PDS/PDSE Member Info. 30 bytes - 0011 VSAM sub-filetype 2 bytes - 0012 VSAM LRECL 13 bytes EBCDIC "(num_avg num_max)" - 0013 VSAM Cluster Name Retired with PKZIP 5.0 + - 0014 VSAM KSDS Key Information 13 bytes EBCDIC "(num_length num_position)" - 0015 VSAM Average LRECL 5 bytes EBCDIC num_value padded with blanks - 0016 VSAM Maximum LRECL 5 bytes EBCDIC num_value padded with blanks - 0017 VSAM KSDS Key Length 5 bytes EBCDIC num_value padded with blanks - 0018 VSAM KSDS Key Position 5 bytes EBCDIC num_value padded with blanks - 0019 VSAM Data Name 1-44 bytes EBCDIC text string - 001A VSAM KSDS Index Name 1-44 bytes EBCDIC text string - 001B VSAM Catalog Name 1-44 bytes EBCDIC text string - 001C VSAM Data Space Type 9 bytes EBCDIC text string - 001D VSAM Data Space Primary 9 bytes EBCDIC num_value left-justified - 001E VSAM Data Space Secondary 9 bytes EBCDIC num_value left-justified - 001F VSAM Data Volume List variable EBCDIC text list of 6-character Volume IDs - 0020 VSAM Data Buffer Space 8 bytes EBCDIC num_value left-justified - 0021 VSAM Data CISIZE 5 bytes EBCDIC num_value left-justified - 0022 VSAM Erase Flag 1 byte flag - 0023 VSAM Free CI % 3 bytes EBCDIC num_value left-justified - 0024 VSAM Free CA % 3 bytes EBCDIC num_value left-justified - 0025 VSAM Index Volume List variable EBCDIC text list of 6-character Volume IDs - 0026 VSAM Ordered Flag 1 byte flag - 0027 VSAM REUSE Flag 1 byte flag - 0028 VSAM SPANNED Flag 1 byte flag - 0029 VSAM Recovery Flag 1 byte flag - 002A VSAM WRITECHK Flag 1 byte flag - 002B VSAM Cluster/Data SHROPTS 3 bytes EBCDIC "n,y" - 002C VSAM Index SHROPTS 3 bytes EBCDIC "n,y" - 002D VSAM Index Space Type 9 bytes EBCDIC text string - 002E VSAM Index Space Primary 9 bytes EBCDIC num_value left-justified - 002F VSAM Index Space Secondary 9 bytes EBCDIC num_value left-justified - 0030 VSAM Index CISIZE 5 bytes EBCDIC num_value left-justified - 0031 VSAM Index IMBED 1 byte flag - 0032 VSAM Index Ordered Flag 1 byte flag - 0033 VSAM REPLICATE Flag 1 byte flag - 0034 VSAM Index REUSE Flag 1 byte flag - 0035 VSAM Index WRITECHK Flag 1 byte flag Retired with PKZIP 5.0 + - 0036 VSAM Owner 8 bytes EBCDIC text string - 0037 VSAM Index Owner 8 bytes EBCDIC text string - 0038 Reserved - 0039 Reserved - 003A Reserved - 003B Reserved - 003C Reserved - 003D Reserved - 003E Reserved - 003F Reserved - 0040 Reserved - 0041 Reserved - 0042 Reserved - 0043 Reserved - 0044 Reserved - 0045 Reserved - 0046 Reserved - 0047 Reserved - 0048 Reserved - 0049 Reserved - 004A Reserved - 004B Reserved - 004C Reserved - 004D Reserved - 004E Reserved - 004F Reserved - 0050 Reserved - 0051 Reserved - 0052 Reserved - 0053 Reserved - 0054 Reserved - 0055 Reserved - 0056 Reserved - 0057 Reserved - 0058 PDS/PDSE Member TTR Info. 6 bytes Big Endian - 0059 PDS 1st LMOD Text TTR 3 bytes Big Endian - 005A PDS LMOD EP Rec # 4 bytes Big Endian - 005B Reserved - 005C Max Length of records 2 bytes Big Endian - 005D PDSE Flag 1 byte flag - 005E Reserved - 005F Reserved - 0060 Reserved - 0061 Reserved - 0062 Reserved - 0063 Reserved - 0064 Reserved - 0065 Last Date Referenced 4 bytes Packed Hex "yyyymmdd" - 0066 Date Created 4 bytes Packed Hex "yyyymmdd" - 0068 GZIP two words 8 bytes - 0071 Extended NOTE Location 12 bytes Big Endian - 0072 Archive device UNIT 6 bytes EBCDIC - 0073 Archive 1st Volume 6 bytes EBCDIC - 0074 Archive 1st VOL File Seq# 2 bytes Binary - 0075 Native I/O Flags 2 bytes - 0081 Unix File Type 1 byte enumerated - 0082 Unix File Format 1 byte enumerated - 0083 Unix File Character Set Tag Info 4 bytes - 0090 ZIP Environmental Processing Info 4 bytes - 0091 EAV EATTR Flags 1 byte - 0092 DSNTYPE Flags 1 byte - 0093 Total Space Allocation (Cyls) 4 bytes Big Endian - 009D NONVSAM DSORG 2 bytes - 009E Program Virtual Object Info 3 bytes - 009F Encapsulated file Info 9 bytes - 400C Unix File Creation Time 4 bytes - 400D Unix File Access Time 4 bytes - 400E Unix File Modification time 4 bytes - 4101 IBMCMPSC Compression Info variable - 4102 IBMCMPSC Compression Size 8 bytes Big Endian - -APPENDIX C - Zip64 Extensible Data Sector Mappings ---------------------------------------------------- - - -Z390 Extra Field: - - The following is the general layout of the attributes for the - ZIP 64 "extra" block for extended tape operations. - - Note: some fields stored in Big Endian format. All text is - in EBCDIC format unless otherwise specified. - - Value Size Description - ----- ---- ----------- - (Z390) 0x0065 2 bytes Tag for this "extra" block type - Size 4 bytes Size for the following data block - Tag 4 bytes EBCDIC "Z390" - Length71 2 bytes Big Endian - Subcode71 2 bytes Enote type code - FMEPos 1 byte - Length72 2 bytes Big Endian - Subcode72 2 bytes Unit type code - Unit 1 byte Unit - Length73 2 bytes Big Endian - Subcode73 2 bytes Volume1 type code - FirstVol 1 byte Volume - Length74 2 bytes Big Endian - Subcode74 2 bytes FirstVol file sequence - FileSeq 2 bytes Sequence - -APPENDIX D - Language Encoding (EFS) ------------------------------------- - -D.1 The ZIP format has historically supported only the original IBM PC character -encoding set, commonly referred to as IBM Code Page 437. This limits storing -file name characters to only those within the original MS-DOS range of values -and does not properly support file names in other character encodings, or -languages. To address this limitation, this specification will support the -following change. - -D.2 If general purpose bit 11 is unset, the file name and comment SHOULD conform -to the original ZIP character encoding. If general purpose bit 11 is set, the -filename and comment MUST support The Unicode Standard, Version 4.1.0 or -greater using the character encoding form defined by the UTF-8 storage -specification. The Unicode Standard is published by the The Unicode -Consortium (www.unicode.org). UTF-8 encoded data stored within ZIP files -is expected to not include a byte order mark (BOM). - -D.3 Applications MAY choose to supplement this file name storage through the use -of the 0x0008 Extra Field. Storage for this optional field is currently -undefined, however it will be used to allow storing extended information -on source or target encoding that MAY further assist applications with file -name, or file content encoding tasks. Please contact PKWARE with any -requirements on how this field SHOULD be used. - -D.4 The 0x0008 Extra Field storage MAY be used with either setting for general -purpose bit 11. Examples of the intended usage for this field is to store -whether "modified-UTF-8" (JAVA) is used, or UTF-8-MAC. Similarly, other -commonly used character encoding (code page) designations can be indicated -through this field. Formalized values for use of the 0x0008 record remain -undefined at this time. The definition for the layout of the 0x0008 field -will be published when available. Use of the 0x0008 Extra Field provides -for storing data within a ZIP file in an encoding other than IBM Code -Page 437 or UTF-8. - -D.5 General purpose bit 11 will not imply any encoding of file content or -password. Values defining character encoding for file content or -password MUST be stored within the 0x0008 Extended Language Encoding -Extra Field. - -D.6 Ed Gordon of the Info-ZIP group has defined a pair of "extra field" records -that can be used to store UTF-8 file name and file comment fields. These -records can be used for cases when the general purpose bit 11 method -for storing UTF-8 data in the standard file name and comment fields is -not desirable. A common case for this alternate method is if backward -compatibility with older programs is required. - -D.7 Definitions for the record structure of these fields are included above -in the section on 3rd party mappings for "extra field" records. These -records are identified by Header ID's 0x6375 (Info-ZIP Unicode Comment -Extra Field) and 0x7075 (Info-ZIP Unicode Path Extra Field). - -D.8 The choice of which storage method to use when writing a ZIP file is left -to the implementation. Developers SHOULD expect that a ZIP file MAY -contain either method and SHOULD provide support for reading data in -either format. Use of general purpose bit 11 reduces storage requirements -for file name data by not requiring additional "extra field" data for -each file, but can result in older ZIP programs not being able to extract -files. Use of the 0x6375 and 0x7075 records will result in a ZIP file -that SHOULD always be readable by older ZIP programs, but requires more -storage per file to write file name and/or file comment fields. - -APPENDIX E - AE-x encryption marker ------------------------------------ - -E.1 AE-x defines an alternate password-based encryption method used -in ZIP files that is based on a file encryption utility developed by -Dr. Brian Gladman. Information on Dr. Gladman's method is available at - - http://www.gladman.me.uk/cryptography_technology/fileencrypt/ - -E.2 AE-x uses AES with CTR (counter mode) and HMAC-SHA1. It defines -encryption using key sizes of 128 bits or 256 bits. It does not -restrict support for decrypting 192 bits. - -E.3 This method uses the standard ZIP encryption bit (bit 0) -of the general purpose bit flag (section 4.4.4) to indicate a -file is encrypted. - -E.4 The compression method field (section 4.4.5) is set to 99 -to indicate a file has been encrypted using this method. - -E.5 The actual compression method is stored in an extra field -structure identified by a Header ID of 0x9901. Information on this -record structure can be found at http://www.winzip.com/aes_info.htm. - -E.6 Two versions are defined for the 0x9901 structure. - - E.6.1 Version 1 stores the file CRC value in the CRC-32 field - (section 4.4.7). - - E.6.2 Version 2 stores a value of 0 in the CRC-32 field. diff --git a/crates/async_zip/rustfmt.toml b/crates/async_zip/rustfmt.toml deleted file mode 100644 index c775577..0000000 --- a/crates/async_zip/rustfmt.toml +++ /dev/null @@ -1,2 +0,0 @@ -max_width = 120 -use_small_heuristics = "Max" diff --git a/crates/async_zip/src/base/mod.rs b/crates/async_zip/src/base/mod.rs deleted file mode 100644 index 67b5b60..0000000 --- a/crates/async_zip/src/base/mod.rs +++ /dev/null @@ -1,7 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A base runtime-agnostic implementation using `futures`'s IO types. - -pub mod read; -pub mod write; diff --git a/crates/async_zip/src/base/read/io/combined_record.rs b/crates/async_zip/src/base/read/io/combined_record.rs deleted file mode 100644 index d3d41d9..0000000 --- a/crates/async_zip/src/base/read/io/combined_record.rs +++ /dev/null @@ -1,68 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// Copyright (c) 2023 Cognite AS -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::spec::header::{EndOfCentralDirectoryHeader, Zip64EndOfCentralDirectoryRecord}; - -/// Combines all the fields in EOCDR and Zip64EOCDR into one struct. -#[derive(Debug)] -pub struct CombinedCentralDirectoryRecord { - pub version_made_by: Option, - pub version_needed_to_extract: Option, - pub disk_number: u32, - pub disk_number_start_of_cd: u32, - pub num_entries_in_directory_on_disk: u64, - pub num_entries_in_directory: u64, - pub directory_size: u64, - pub offset_of_start_of_directory: u64, - pub file_comment_length: u16, -} - -impl CombinedCentralDirectoryRecord { - /// Combine an EOCDR with an optional Zip64EOCDR. - /// - /// Fields that are set to their max value in the EOCDR will be overwritten by the contents of - /// the corresponding Zip64EOCDR field. - pub fn combine(eocdr: EndOfCentralDirectoryHeader, zip64eocdr: Zip64EndOfCentralDirectoryRecord) -> Self { - let mut combined = Self::from(&eocdr); - if eocdr.disk_num == u16::MAX { - combined.disk_number = zip64eocdr.disk_number; - } - if eocdr.start_cent_dir_disk == u16::MAX { - combined.disk_number_start_of_cd = zip64eocdr.disk_number_start_of_cd; - } - if eocdr.num_of_entries_disk == u16::MAX { - combined.num_entries_in_directory_on_disk = zip64eocdr.num_entries_in_directory_on_disk; - } - if eocdr.num_of_entries == u16::MAX { - combined.num_entries_in_directory = zip64eocdr.num_entries_in_directory; - } - if eocdr.size_cent_dir == u32::MAX { - combined.directory_size = zip64eocdr.directory_size; - } - if eocdr.cent_dir_offset == u32::MAX { - combined.offset_of_start_of_directory = zip64eocdr.offset_of_start_of_directory; - } - combined.version_made_by = Some(zip64eocdr.version_made_by); - combined.version_needed_to_extract = Some(zip64eocdr.version_needed_to_extract); - - combined - } -} - -// An implementation for the case of no zip64EOCDR. -impl From<&EndOfCentralDirectoryHeader> for CombinedCentralDirectoryRecord { - fn from(header: &EndOfCentralDirectoryHeader) -> Self { - Self { - version_made_by: None, - version_needed_to_extract: None, - disk_number: header.disk_num as u32, - disk_number_start_of_cd: header.start_cent_dir_disk as u32, - num_entries_in_directory_on_disk: header.num_of_entries_disk as u64, - num_entries_in_directory: header.num_of_entries as u64, - directory_size: header.size_cent_dir as u64, - offset_of_start_of_directory: header.cent_dir_offset as u64, - file_comment_length: header.file_comm_length, - } - } -} diff --git a/crates/async_zip/src/base/read/io/compressed.rs b/crates/async_zip/src/base/read/io/compressed.rs deleted file mode 100644 index 8fc6b87..0000000 --- a/crates/async_zip/src/base/read/io/compressed.rs +++ /dev/null @@ -1,103 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::spec::Compression; - -use std::pin::Pin; -use std::task::{Context, Poll}; - -#[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" -))] -use async_compression::futures::bufread; -use futures_lite::io::{AsyncBufRead, AsyncRead}; -use pin_project::pin_project; - -/// A wrapping reader which holds concrete types for all respective compression method readers. -#[pin_project(project = CompressedReaderProj)] -pub(crate) enum CompressedReader { - Stored(#[pin] R), - #[cfg(feature = "deflate")] - Deflate(#[pin] bufread::DeflateDecoder), - #[cfg(feature = "deflate64")] - Deflate64(#[pin] bufread::Deflate64Decoder), - #[cfg(feature = "bzip2")] - Bz(#[pin] bufread::BzDecoder), - #[cfg(feature = "lzma")] - Lzma(#[pin] bufread::LzmaDecoder), - #[cfg(feature = "zstd")] - Zstd(#[pin] bufread::ZstdDecoder), - #[cfg(feature = "xz")] - Xz(#[pin] bufread::XzDecoder), -} - -impl CompressedReader -where - R: AsyncBufRead + Unpin, -{ - /// Constructs a new wrapping reader from a generic [`AsyncBufRead`] implementer. - pub(crate) fn new(reader: R, compression: Compression) -> Self { - match compression { - Compression::Stored => CompressedReader::Stored(reader), - #[cfg(feature = "deflate")] - Compression::Deflate => CompressedReader::Deflate(bufread::DeflateDecoder::new(reader)), - #[cfg(feature = "deflate64")] - Compression::Deflate64 => CompressedReader::Deflate64(bufread::Deflate64Decoder::new(reader)), - #[cfg(feature = "bzip2")] - Compression::Bz => CompressedReader::Bz(bufread::BzDecoder::new(reader)), - #[cfg(feature = "lzma")] - Compression::Lzma => CompressedReader::Lzma(bufread::LzmaDecoder::new(reader)), - #[cfg(feature = "zstd")] - Compression::Zstd => CompressedReader::Zstd(bufread::ZstdDecoder::new(reader)), - #[cfg(feature = "xz")] - Compression::Xz => CompressedReader::Xz(bufread::XzDecoder::new(reader)), - } - } - - /// Consumes this reader and returns the inner value. - pub(crate) fn into_inner(self) -> R { - match self { - CompressedReader::Stored(inner) => inner, - #[cfg(feature = "deflate")] - CompressedReader::Deflate(inner) => inner.into_inner(), - #[cfg(feature = "deflate64")] - CompressedReader::Deflate64(inner) => inner.into_inner(), - #[cfg(feature = "bzip2")] - CompressedReader::Bz(inner) => inner.into_inner(), - #[cfg(feature = "lzma")] - CompressedReader::Lzma(inner) => inner.into_inner(), - #[cfg(feature = "zstd")] - CompressedReader::Zstd(inner) => inner.into_inner(), - #[cfg(feature = "xz")] - CompressedReader::Xz(inner) => inner.into_inner(), - } - } -} - -impl AsyncRead for CompressedReader -where - R: AsyncBufRead + Unpin, -{ - fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { - match self.project() { - CompressedReaderProj::Stored(inner) => inner.poll_read(c, b), - #[cfg(feature = "deflate")] - CompressedReaderProj::Deflate(inner) => inner.poll_read(c, b), - #[cfg(feature = "deflate64")] - CompressedReaderProj::Deflate64(inner) => inner.poll_read(c, b), - #[cfg(feature = "bzip2")] - CompressedReaderProj::Bz(inner) => inner.poll_read(c, b), - #[cfg(feature = "lzma")] - CompressedReaderProj::Lzma(inner) => inner.poll_read(c, b), - #[cfg(feature = "zstd")] - CompressedReaderProj::Zstd(inner) => inner.poll_read(c, b), - #[cfg(feature = "xz")] - CompressedReaderProj::Xz(inner) => inner.poll_read(c, b), - } - } -} diff --git a/crates/async_zip/src/base/read/io/entry.rs b/crates/async_zip/src/base/read/io/entry.rs deleted file mode 100644 index 64e81c6..0000000 --- a/crates/async_zip/src/base/read/io/entry.rs +++ /dev/null @@ -1,128 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::base::read::io::{compressed::CompressedReader, hashed::HashedReader, owned::OwnedReader}; -use crate::entry::ZipEntry; -use crate::error::{Result, ZipError}; -use crate::spec::Compression; - -use std::pin::Pin; -use std::task::{Context, Poll}; - -use futures_lite::io::{AsyncBufRead, AsyncRead, AsyncReadExt, Take}; -use pin_project::pin_project; - -/// A type which encodes that [`ZipEntryReader`] has associated entry data. -pub struct WithEntry<'a>(OwnedEntry<'a>); - -/// A type which encodes that [`ZipEntryReader`] has no associated entry data. -pub struct WithoutEntry; - -/// A ZIP entry reader which may implement decompression. -#[pin_project] -pub struct ZipEntryReader<'a, R, E> { - #[pin] - reader: HashedReader>>>, - entry: E, -} - -impl<'a, R> ZipEntryReader<'a, R, WithoutEntry> -where - R: AsyncBufRead + Unpin, -{ - /// Constructs a new entry reader from its required parameters (incl. an owned R). - pub fn new_with_owned(reader: R, compression: Compression, size: u64) -> Self { - let reader = HashedReader::new(CompressedReader::new(OwnedReader::Owned(reader).take(size), compression)); - Self { reader, entry: WithoutEntry } - } - - /// Constructs a new entry reader from its required parameters (incl. a mutable borrow of an R). - pub(crate) fn new_with_borrow(reader: &'a mut R, compression: Compression, size: u64) -> Self { - let reader = HashedReader::new(CompressedReader::new(OwnedReader::Borrow(reader).take(size), compression)); - Self { reader, entry: WithoutEntry } - } - - pub(crate) fn into_with_entry(self, entry: &'a ZipEntry) -> ZipEntryReader<'a, R, WithEntry<'a>> { - ZipEntryReader { reader: self.reader, entry: WithEntry(OwnedEntry::Borrow(entry)) } - } - - pub(crate) fn into_with_entry_owned(self, entry: ZipEntry) -> ZipEntryReader<'a, R, WithEntry<'a>> { - ZipEntryReader { reader: self.reader, entry: WithEntry(OwnedEntry::Owned(entry)) } - } -} - -impl<'a, R, E> AsyncRead for ZipEntryReader<'a, R, E> -where - R: AsyncBufRead + Unpin, -{ - fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { - self.project().reader.poll_read(c, b) - } -} - -impl<'a, R, E> ZipEntryReader<'a, R, E> -where - R: AsyncBufRead + Unpin, -{ - /// Computes and returns the CRC32 hash of bytes read by this reader so far. - /// - /// This hash should only be computed once EOF has been reached. - pub fn compute_hash(&mut self) -> u32 { - self.reader.swap_and_compute_hash() - } - - /// Consumes this reader and returns the inner value. - pub(crate) fn into_inner(self) -> R { - self.reader.into_inner().into_inner().into_inner().owned_into_inner() - } -} - -impl ZipEntryReader<'_, R, WithEntry<'_>> -where - R: AsyncBufRead + Unpin, -{ - /// Returns an immutable reference to the associated entry data. - pub fn entry(&self) -> &'_ ZipEntry { - self.entry.0.entry() - } - - /// Reads all bytes until EOF has been reached, appending them to buf, and verifies the CRC32 values. - /// - /// This is a helper function synonymous to [`AsyncReadExt::read_to_end()`]. - pub async fn read_to_end_checked(&mut self, buf: &mut Vec) -> Result { - let read = self.read_to_end(buf).await?; - - if self.compute_hash() == self.entry.0.entry().crc32() { - Ok(read) - } else { - Err(ZipError::CRC32CheckError) - } - } - - /// Reads all bytes until EOF has been reached, placing them into buf, and verifies the CRC32 values. - /// - /// This is a helper function synonymous to [`AsyncReadExt::read_to_string()`]. - pub async fn read_to_string_checked(&mut self, buf: &mut String) -> Result { - let read = self.read_to_string(buf).await?; - - if self.compute_hash() == self.entry.0.entry().crc32() { - Ok(read) - } else { - Err(ZipError::CRC32CheckError) - } - } -} - -enum OwnedEntry<'a> { - Owned(ZipEntry), - Borrow(&'a ZipEntry), -} - -impl<'a> OwnedEntry<'a> { - pub fn entry(&self) -> &'_ ZipEntry { - match self { - OwnedEntry::Owned(entry) => entry, - OwnedEntry::Borrow(entry) => entry, - } - } -} diff --git a/crates/async_zip/src/base/read/io/hashed.rs b/crates/async_zip/src/base/read/io/hashed.rs deleted file mode 100644 index 1190f0d..0000000 --- a/crates/async_zip/src/base/read/io/hashed.rs +++ /dev/null @@ -1,56 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::base::read::io::poll_result_ok; - -use std::pin::Pin; -use std::task::{ready, Context, Poll}; - -use crc32fast::Hasher; -use futures_lite::io::AsyncRead; -use pin_project::pin_project; - -/// A wrapping reader which computes the CRC32 hash of data read via [`AsyncRead`]. -#[pin_project] -pub(crate) struct HashedReader { - #[pin] - pub(crate) reader: R, - pub(crate) hasher: Hasher, -} - -impl HashedReader -where - R: AsyncRead + Unpin, -{ - /// Constructs a new wrapping reader from a generic [`AsyncRead`] implementer. - pub(crate) fn new(reader: R) -> Self { - Self { reader, hasher: Hasher::default() } - } - - /// Swaps the internal hasher and returns the computed CRC32 hash. - /// - /// The internal hasher is taken and replaced with a newly-constructed one. As a result, this method should only be - /// called once EOF has been reached and it's known that no more data will be read, else the computed hash(s) won't - /// accurately represent the data read in. - pub(crate) fn swap_and_compute_hash(&mut self) -> u32 { - std::mem::take(&mut self.hasher).finalize() - } - - /// Consumes this reader and returns the inner value. - pub(crate) fn into_inner(self) -> R { - self.reader - } -} - -impl AsyncRead for HashedReader -where - R: AsyncRead + Unpin, -{ - fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { - let project = self.project(); - let written = poll_result_ok!(ready!(project.reader.poll_read(c, b))); - project.hasher.update(&b[..written]); - - Poll::Ready(Ok(written)) - } -} diff --git a/crates/async_zip/src/base/read/io/locator.rs b/crates/async_zip/src/base/read/io/locator.rs deleted file mode 100644 index a2e9c5f..0000000 --- a/crates/async_zip/src/base/read/io/locator.rs +++ /dev/null @@ -1,96 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! -//! -//! As with other ZIP libraries, we face the predicament that the end of central directory record may contain a -//! variable-length file comment. As a result, we cannot just make the assumption that the start of this record is -//! 18 bytes (the length of the EOCDR) offset from the end of the data - we must locate it ourselves. -//! -//! The `zip-rs` crate handles this by reading in reverse from the end of the data. This involves seeking backwards -//! by a single byte each iteration and reading 4 bytes into a u32. Whether this is performant/acceptable within a -//! a non-async context, I'm unsure, but it isn't desirable within an async context. Especially since we cannot just -//! place a [`BufReader`] infront of the upstream reader (as its internal buffer is invalidated on each seek). -//! -//! Reading in reverse is still desirable as the use of file comments is limited and they're unlikely to be large. -//! -//! The below method is one that compromises on these two contention points. Please submit an issue or PR if you know -//! of a better algorithm for this (and have tested/verified its performance). - -#[cfg(doc)] -use futures_lite::io::BufReader; - -use crate::error::{Result as ZipResult, ZipError}; -use crate::spec::consts::{EOCDR_LENGTH, EOCDR_SIGNATURE, SIGNATURE_LENGTH}; - -use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom}; - -/// The buffer size used when locating the EOCDR, equal to 2KiB. -const BUFFER_SIZE: usize = 2048; - -/// The upper bound of where the EOCDR signature cannot be located. -const EOCDR_UPPER_BOUND: u64 = EOCDR_LENGTH as u64; - -/// The lower bound of where the EOCDR signature cannot be located. -const EOCDR_LOWER_BOUND: u64 = EOCDR_UPPER_BOUND + SIGNATURE_LENGTH as u64 + u16::MAX as u64; - -/// Locate the `end of central directory record` offset, if one exists. -/// The returned offset excludes the signature (4 bytes) -/// -/// This method involves buffered reading in reverse and reverse linear searching along those buffers for the EOCDR -/// signature. As a result of this buffered approach, we reduce seeks when compared to `zip-rs`'s method by a factor -/// of the buffer size. We also then don't have to do individual u32 reads against the upstream reader. -/// -/// Whilst I haven't done any in-depth benchmarks, when reading a ZIP file with the maximum length comment, this method -/// saw a reduction in location time by a factor of 500 when compared with the `zip-rs` method. -pub async fn eocdr(mut reader: R) -> ZipResult -where - R: AsyncRead + AsyncSeek + Unpin, -{ - let length = reader.seek(SeekFrom::End(0)).await?; - let signature = &EOCDR_SIGNATURE.to_le_bytes(); - let mut buffer: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE]; - - let mut position = length.saturating_sub((EOCDR_LENGTH + BUFFER_SIZE) as u64); - reader.seek(SeekFrom::Start(position)).await?; - - loop { - let read = reader.read(&mut buffer).await?; - - if let Some(match_index) = reverse_search_buffer(&buffer[..read], signature) { - return Ok(position + (match_index + 1) as u64); - } - - // If we hit the start of the data or the lower bound, we're unable to locate the EOCDR. - if position == 0 || position <= length.saturating_sub(EOCDR_LOWER_BOUND) { - return Err(ZipError::UnableToLocateEOCDR); - } - - // To handle the case where the EOCDR signature crosses buffer boundaries, we simply overlap reads by the - // signature length. This significantly reduces the complexity of handling partial matches with very little - // overhead. - position = position.saturating_sub((BUFFER_SIZE - SIGNATURE_LENGTH) as u64); - reader.seek(SeekFrom::Start(position)).await?; - } -} - -/// A naive reverse linear search along the buffer for the specified signature bytes. -/// -/// This is already surprisingly performant. For instance, using memchr::memchr() to match for the first byte of the -/// signature, and then manual byte comparisons for the remaining signature bytes was actually slower by a factor of -/// 2.25. This method was explored as tokio's `read_until()` implementation uses memchr::memchr(). -pub(crate) fn reverse_search_buffer(buffer: &[u8], signature: &[u8]) -> Option { - 'outer: for index in (0..buffer.len()).rev() { - for (signature_index, signature_byte) in signature.iter().rev().enumerate() { - if let Some(next_index) = index.checked_sub(signature_index) { - if buffer[next_index] != *signature_byte { - continue 'outer; - } - } else { - break 'outer; - } - } - return Some(index); - } - None -} diff --git a/crates/async_zip/src/base/read/io/mod.rs b/crates/async_zip/src/base/read/io/mod.rs deleted file mode 100644 index 86af934..0000000 --- a/crates/async_zip/src/base/read/io/mod.rs +++ /dev/null @@ -1,88 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub(crate) mod combined_record; -pub(crate) mod compressed; -pub(crate) mod entry; -pub(crate) mod hashed; -pub(crate) mod locator; -pub(crate) mod owned; - -use std::{ - future::Future, - io::ErrorKind, - pin::Pin, - task::{ready, Context, Poll}, -}; - -pub use combined_record::CombinedCentralDirectoryRecord; -use futures_lite::io::AsyncBufRead; -use pin_project::pin_project; - -use crate::{ - spec::consts::{DATA_DESCRIPTOR_LENGTH, DATA_DESCRIPTOR_SIGNATURE, SIGNATURE_LENGTH}, - string::{StringEncoding, ZipString}, -}; -use futures_lite::io::{AsyncRead, AsyncReadExt}; - -/// Read and return a dynamic length string from a reader which impls AsyncRead. -pub(crate) async fn read_string(reader: R, length: usize, encoding: StringEncoding) -> std::io::Result -where - R: AsyncRead + Unpin, -{ - Ok(ZipString::new(read_bytes(reader, length).await?, encoding)) -} - -/// Read and return a dynamic length vector of bytes from a reader which impls AsyncRead. -pub(crate) async fn read_bytes(reader: R, length: usize) -> std::io::Result> -where - R: AsyncRead + Unpin, -{ - let mut buffer = Vec::with_capacity(length); - reader.take(length as u64).read_to_end(&mut buffer).await?; - - Ok(buffer) -} - -#[pin_project] -pub(crate) struct ConsumeDataDescriptor<'a, R>(#[pin] pub(crate) &'a mut R); - -impl Future for ConsumeDataDescriptor<'_, R> -where - R: AsyncBufRead + Unpin, -{ - type Output = std::io::Result<()>; - - fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - let mut project = self.project(); - - let data = poll_result_ok!(ready!(project.0.as_mut().poll_fill_buf(cx))); - let signature = data.get(0..4).ok_or(ErrorKind::UnexpectedEof)?; - let mut consumed = DATA_DESCRIPTOR_LENGTH; - - if signature == DATA_DESCRIPTOR_SIGNATURE.to_le_bytes() { - consumed += SIGNATURE_LENGTH; - } - if consumed > data.len() { - return Poll::Ready(Err(ErrorKind::UnexpectedEof.into())); - } - - project.0.as_mut().consume(consumed); - Poll::Ready(Ok(())) - } -} - -/// A macro that returns the inner value of an Ok or early-returns in the case of an Err. -/// -/// This is almost identical to the ? operator but handles the situation when a Result is used in combination with -/// Poll (eg. tokio's IO traits such as AsyncRead). -macro_rules! poll_result_ok { - ($poll:expr) => { - match $poll { - Ok(inner) => inner, - Err(err) => return Poll::Ready(Err(err)), - } - }; -} - -use poll_result_ok; diff --git a/crates/async_zip/src/base/read/io/owned.rs b/crates/async_zip/src/base/read/io/owned.rs deleted file mode 100644 index 371ffab..0000000 --- a/crates/async_zip/src/base/read/io/owned.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use std::pin::Pin; -use std::task::{Context, Poll}; - -use futures_lite::io::{AsyncBufRead, AsyncRead}; -use pin_project::pin_project; - -/// A wrapping reader which holds an owned R or a mutable borrow to R. -/// -/// This is used to represent whether the supplied reader can be acted on concurrently or not (with an owned value -/// suggesting that R implements some method of synchronisation & cloning). -#[pin_project(project = OwnedReaderProj)] -pub(crate) enum OwnedReader<'a, R> { - Owned(#[pin] R), - Borrow(#[pin] &'a mut R), -} - -impl<'a, R> OwnedReader<'a, R> -where - R: AsyncBufRead + Unpin, -{ - /// Consumes an owned reader and returns the inner value. - pub(crate) fn owned_into_inner(self) -> R { - match self { - OwnedReader::Owned(inner) => inner, - OwnedReader::Borrow(_) => panic!("not OwnedReader::Owned value"), - } - } -} - -impl<'a, R> AsyncBufRead for OwnedReader<'a, R> -where - R: AsyncBufRead + Unpin, -{ - fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { - match self.project() { - OwnedReaderProj::Owned(inner) => inner.poll_fill_buf(cx), - OwnedReaderProj::Borrow(inner) => inner.poll_fill_buf(cx), - } - } - - fn consume(self: Pin<&mut Self>, amt: usize) { - match self.project() { - OwnedReaderProj::Owned(inner) => inner.consume(amt), - OwnedReaderProj::Borrow(inner) => inner.consume(amt), - } - } -} - -impl<'a, R> AsyncRead for OwnedReader<'a, R> -where - R: AsyncBufRead + Unpin, -{ - fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll> { - match self.project() { - OwnedReaderProj::Owned(inner) => inner.poll_read(c, b), - OwnedReaderProj::Borrow(inner) => inner.poll_read(c, b), - } - } -} diff --git a/crates/async_zip/src/base/read/mem.rs b/crates/async_zip/src/base/read/mem.rs deleted file mode 100644 index c8fa9f1..0000000 --- a/crates/async_zip/src/base/read/mem.rs +++ /dev/null @@ -1,147 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A concurrent ZIP reader which acts over an owned vector of bytes. -//! -//! Concurrency is achieved as a result of: -//! - Wrapping the provided vector of bytes within an [`Arc`] to allow shared ownership. -//! - Wrapping this [`Arc`] around a [`Cursor`] when reading (as the [`Arc`] can deref and coerce into a `&[u8]`). -//! -//! ### Usage -//! Unlike the [`seek`] module, we no longer hold a mutable reference to any inner reader which in turn, allows the -//! construction of concurrent [`ZipEntryReader`]s. Though, note that each individual [`ZipEntryReader`] cannot be sent -//! between thread boundaries due to the masked lifetime requirement. Therefore, the overarching [`ZipFileReader`] -//! should be cloned and moved into those contexts when needed. -//! -//! ### Concurrent Example -//! ```no_run -//! # use async_zip::base::read::mem::ZipFileReader; -//! # use async_zip::error::Result; -//! # use futures_lite::io::AsyncReadExt; -//! # -//! async fn run() -> Result<()> { -//! let reader = ZipFileReader::new(Vec::new()).await?; -//! let result = tokio::join!(read(&reader, 0), read(&reader, 1)); -//! -//! let data_0 = result.0?; -//! let data_1 = result.1?; -//! -//! // Use data within current scope. -//! -//! Ok(()) -//! } -//! -//! async fn read(reader: &ZipFileReader, index: usize) -> Result> { -//! let mut entry = reader.reader_without_entry(index).await?; -//! let mut data = Vec::new(); -//! entry.read_to_end(&mut data).await?; -//! Ok(data) -//! } -//! ``` -//! -//! ### Parallel Example -//! ```no_run -//! # use async_zip::base::read::mem::ZipFileReader; -//! # use async_zip::error::Result; -//! # use futures_lite::io::AsyncReadExt; -//! # -//! async fn run() -> Result<()> { -//! let reader = ZipFileReader::new(Vec::new()).await?; -//! -//! let handle_0 = tokio::spawn(read(reader.clone(), 0)); -//! let handle_1 = tokio::spawn(read(reader.clone(), 1)); -//! -//! let data_0 = handle_0.await.expect("thread panicked")?; -//! let data_1 = handle_1.await.expect("thread panicked")?; -//! -//! // Use data within current scope. -//! -//! Ok(()) -//! } -//! -//! async fn read(reader: ZipFileReader, index: usize) -> Result> { -//! let mut entry = reader.reader_without_entry(index).await?; -//! let mut data = Vec::new(); -//! entry.read_to_end(&mut data).await?; -//! Ok(data) -//! } -//! ``` - -#[cfg(doc)] -use crate::base::read::seek; - -use crate::base::read::io::entry::ZipEntryReader; -use crate::error::{Result, ZipError}; -use crate::file::ZipFile; - -use std::sync::Arc; - -use futures_lite::io::Cursor; - -use super::io::entry::{WithEntry, WithoutEntry}; - -struct Inner { - data: Vec, - file: ZipFile, -} - -// A concurrent ZIP reader which acts over an owned vector of bytes. -#[derive(Clone)] -pub struct ZipFileReader { - inner: Arc, -} - -impl ZipFileReader { - /// Constructs a new ZIP reader from an owned vector of bytes. - pub async fn new(data: Vec) -> Result { - let file = crate::base::read::file(Cursor::new(&data)).await?; - Ok(ZipFileReader::from_raw_parts(data, file)) - } - - /// Constructs a ZIP reader from an owned vector of bytes and ZIP file information derived from those bytes. - /// - /// Providing a [`ZipFile`] that wasn't derived from those bytes may lead to inaccurate parsing. - pub fn from_raw_parts(data: Vec, file: ZipFile) -> ZipFileReader { - ZipFileReader { inner: Arc::new(Inner { data, file }) } - } - - /// Returns this ZIP file's information. - pub fn file(&self) -> &ZipFile { - &self.inner.file - } - - /// Returns the raw bytes provided to the reader during construction. - pub fn data(&self) -> &[u8] { - &self.inner.data - } - - /// Returns a new entry reader if the provided index is valid. - pub async fn reader_without_entry(&self, index: usize) -> Result, WithoutEntry>> { - let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; - let mut cursor = Cursor::new(&self.inner.data[..]); - - stored_entry.seek_to_data_offset(&mut cursor).await?; - - Ok(ZipEntryReader::new_with_owned( - cursor, - stored_entry.entry.compression(), - stored_entry.entry.compressed_size(), - )) - } - - /// Returns a new entry reader if the provided index is valid. - pub async fn reader_with_entry(&self, index: usize) -> Result, WithEntry<'_>>> { - let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; - let mut cursor = Cursor::new(&self.inner.data[..]); - - stored_entry.seek_to_data_offset(&mut cursor).await?; - - let reader = ZipEntryReader::new_with_owned( - cursor, - stored_entry.entry.compression(), - stored_entry.entry.compressed_size(), - ); - - Ok(reader.into_with_entry(stored_entry)) - } -} diff --git a/crates/async_zip/src/base/read/mod.rs b/crates/async_zip/src/base/read/mod.rs deleted file mode 100644 index e07cd16..0000000 --- a/crates/async_zip/src/base/read/mod.rs +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright (c) 2022-2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A module which supports reading ZIP files. - -pub mod mem; -pub mod seek; -pub mod stream; - -pub(crate) mod io; - -use crate::ZipString; -// Re-exported as part of the public API. -pub use crate::base::read::io::entry::WithEntry; -pub use crate::base::read::io::entry::WithoutEntry; -pub use crate::base::read::io::entry::ZipEntryReader; - -use crate::date::ZipDateTime; -use crate::entry::{StoredZipEntry, ZipEntry}; -use crate::error::{Result, ZipError}; -use crate::file::ZipFile; -use crate::spec::attribute::AttributeCompatibility; -use crate::spec::consts::LFH_LENGTH; -use crate::spec::consts::{CDH_SIGNATURE, LFH_SIGNATURE, NON_ZIP64_MAX_SIZE, SIGNATURE_LENGTH, ZIP64_EOCDL_LENGTH}; -use crate::spec::header::InfoZipUnicodeCommentExtraField; -use crate::spec::header::InfoZipUnicodePathExtraField; -use crate::spec::header::{ - CentralDirectoryRecord, EndOfCentralDirectoryHeader, ExtraField, LocalFileHeader, - Zip64EndOfCentralDirectoryLocator, Zip64EndOfCentralDirectoryRecord, Zip64ExtendedInformationExtraField, -}; -use crate::spec::Compression; -use crate::string::StringEncoding; - -use crate::base::read::io::CombinedCentralDirectoryRecord; -use crate::spec::parse::parse_extra_fields; - -use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom}; - -pub(crate) async fn file(mut reader: R) -> Result -where - R: AsyncRead + AsyncSeek + Unpin, -{ - // First find and parse the EOCDR. - let eocdr_offset = crate::base::read::io::locator::eocdr(&mut reader).await?; - - reader.seek(SeekFrom::Start(eocdr_offset)).await?; - let eocdr = EndOfCentralDirectoryHeader::from_reader(&mut reader).await?; - - let comment = io::read_string(&mut reader, eocdr.file_comm_length.into(), crate::StringEncoding::Utf8).await?; - - // Check the 20 bytes before the EOCDR for the Zip64 EOCDL, plus an extra 4 bytes because the offset - // does not include the signature. If the ECODL exists we are dealing with a Zip64 file. - let (eocdr, zip64) = match eocdr_offset.checked_sub(ZIP64_EOCDL_LENGTH + SIGNATURE_LENGTH as u64) { - None => (CombinedCentralDirectoryRecord::from(&eocdr), false), - Some(offset) => { - reader.seek(SeekFrom::Start(offset)).await?; - let zip64_locator = Zip64EndOfCentralDirectoryLocator::try_from_reader(&mut reader).await?; - - match zip64_locator { - Some(locator) => { - reader.seek(SeekFrom::Start(locator.relative_offset + SIGNATURE_LENGTH as u64)).await?; - let zip64_eocdr = Zip64EndOfCentralDirectoryRecord::from_reader(&mut reader).await?; - (CombinedCentralDirectoryRecord::combine(eocdr, zip64_eocdr), true) - } - None => (CombinedCentralDirectoryRecord::from(&eocdr), false), - } - } - }; - - // Outdated feature so unlikely to ever make it into this crate. - if eocdr.disk_number != eocdr.disk_number_start_of_cd - || eocdr.num_entries_in_directory != eocdr.num_entries_in_directory_on_disk - { - return Err(ZipError::FeatureNotSupported("Spanned/split files")); - } - - // Find and parse the central directory. - reader.seek(SeekFrom::Start(eocdr.offset_of_start_of_directory)).await?; - let entries = crate::base::read::cd(reader, eocdr.num_entries_in_directory, zip64).await?; - - Ok(ZipFile { entries, comment, zip64 }) -} - -pub(crate) async fn cd(mut reader: R, num_of_entries: u64, zip64: bool) -> Result> -where - R: AsyncRead + Unpin, -{ - let num_of_entries = num_of_entries.try_into().map_err(|_| ZipError::TargetZip64NotSupported)?; - let mut entries = Vec::with_capacity(num_of_entries); - - for _ in 0..num_of_entries { - let entry = cd_record(&mut reader, zip64).await?; - entries.push(entry); - } - - Ok(entries) -} - -pub(crate) fn get_zip64_extra_field(extra_fields: &[ExtraField]) -> Option<&Zip64ExtendedInformationExtraField> { - for field in extra_fields { - if let ExtraField::Zip64ExtendedInformation(zip64field) = field { - return Some(zip64field); - } - } - None -} - -pub(crate) fn get_zip64_extra_field_mut( - extra_fields: &mut [ExtraField], -) -> Option<&mut Zip64ExtendedInformationExtraField> { - for field in extra_fields { - if let ExtraField::Zip64ExtendedInformation(zip64field) = field { - return Some(zip64field); - } - } - None -} - -fn get_combined_sizes( - uncompressed_size: u32, - compressed_size: u32, - extra_field: &Option<&Zip64ExtendedInformationExtraField>, -) -> Result<(u64, u64)> { - let mut uncompressed_size = uncompressed_size as u64; - let mut compressed_size = compressed_size as u64; - - if let Some(extra_field) = extra_field { - if let Some(s) = extra_field.uncompressed_size { - uncompressed_size = s; - } - if let Some(s) = extra_field.compressed_size { - compressed_size = s; - } - } - - Ok((uncompressed_size, compressed_size)) -} - -pub(crate) async fn cd_record(mut reader: R, _zip64: bool) -> Result -where - R: AsyncRead + Unpin, -{ - crate::utils::assert_signature(&mut reader, CDH_SIGNATURE).await?; - - let header = CentralDirectoryRecord::from_reader(&mut reader).await?; - let header_size = (SIGNATURE_LENGTH + LFH_LENGTH) as u64; - let trailing_size = header.file_name_length as u64 + header.extra_field_length as u64; - let filename_basic = io::read_bytes(&mut reader, header.file_name_length.into()).await?; - let compression = Compression::try_from(header.compression)?; - let extra_field = io::read_bytes(&mut reader, header.extra_field_length.into()).await?; - let extra_fields = parse_extra_fields(extra_field, header.uncompressed_size, header.compressed_size)?; - let comment_basic = io::read_bytes(reader, header.file_comment_length.into()).await?; - - let zip64_extra_field = get_zip64_extra_field(&extra_fields); - let (uncompressed_size, compressed_size) = - get_combined_sizes(header.uncompressed_size, header.compressed_size, &zip64_extra_field)?; - - let mut file_offset = header.lh_offset as u64; - if let Some(zip64_extra_field) = zip64_extra_field { - if file_offset == NON_ZIP64_MAX_SIZE as u64 { - if let Some(offset) = zip64_extra_field.relative_header_offset { - file_offset = offset; - } - } - } - - let filename = detect_filename(filename_basic, header.flags.filename_unicode, extra_fields.as_ref()); - let comment = detect_comment(comment_basic, header.flags.filename_unicode, extra_fields.as_ref()); - - let entry = ZipEntry { - filename, - compression, - #[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" - ))] - compression_level: async_compression::Level::Default, - attribute_compatibility: AttributeCompatibility::Unix, - // FIXME: Default to Unix for the moment - crc32: header.crc, - uncompressed_size, - compressed_size, - last_modification_date: ZipDateTime { date: header.mod_date, time: header.mod_time }, - internal_file_attribute: header.inter_attr, - external_file_attribute: header.exter_attr, - extra_fields, - comment, - data_descriptor: header.flags.data_descriptor, - }; - - Ok(StoredZipEntry { entry, file_offset, header_size: header_size + trailing_size }) -} - -pub(crate) async fn lfh(mut reader: R) -> Result> -where - R: AsyncRead + Unpin, -{ - let signature = { - let mut buffer = [0; 4]; - reader.read_exact(&mut buffer).await?; - u32::from_le_bytes(buffer) - }; - match signature { - actual if actual == LFH_SIGNATURE => (), - actual if actual == CDH_SIGNATURE => return Ok(None), - actual => return Err(ZipError::UnexpectedHeaderError(actual, LFH_SIGNATURE)), - }; - - let header = LocalFileHeader::from_reader(&mut reader).await?; - let filename_basic = io::read_bytes(&mut reader, header.file_name_length.into()).await?; - let compression = Compression::try_from(header.compression)?; - let extra_field = io::read_bytes(&mut reader, header.extra_field_length.into()).await?; - let extra_fields = parse_extra_fields(extra_field, header.uncompressed_size, header.compressed_size)?; - - let zip64_extra_field = get_zip64_extra_field(&extra_fields); - let (uncompressed_size, compressed_size) = - get_combined_sizes(header.uncompressed_size, header.compressed_size, &zip64_extra_field)?; - - if header.flags.data_descriptor && compression == Compression::Stored { - return Err(ZipError::FeatureNotSupported( - "stream reading entries with data descriptors & Stored compression mode", - )); - } - if header.flags.encrypted { - return Err(ZipError::FeatureNotSupported("encryption")); - } - - let filename = detect_filename(filename_basic, header.flags.filename_unicode, extra_fields.as_ref()); - - let entry = ZipEntry { - filename, - compression, - #[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" - ))] - compression_level: async_compression::Level::Default, - attribute_compatibility: AttributeCompatibility::Unix, - // FIXME: Default to Unix for the moment - crc32: header.crc, - uncompressed_size, - compressed_size, - last_modification_date: ZipDateTime { date: header.mod_date, time: header.mod_time }, - internal_file_attribute: 0, - external_file_attribute: 0, - extra_fields, - comment: String::new().into(), - data_descriptor: header.flags.data_descriptor, - }; - - Ok(Some(entry)) -} - -fn detect_comment(basic: Vec, basic_is_utf8: bool, extra_fields: &[ExtraField]) -> ZipString { - if basic_is_utf8 { - ZipString::new(basic, StringEncoding::Utf8) - } else { - let unicode_extra = extra_fields.iter().find_map(|field| match field { - ExtraField::InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField::V1 { crc32, unicode }) => { - if *crc32 == crc32fast::hash(&basic) { - Some(std::string::String::from_utf8(unicode.clone())) - } else { - None - } - } - _ => None, - }); - if let Some(Ok(s)) = unicode_extra { - ZipString::new_with_alternative(s, basic) - } else { - // Do not treat as UTF-8 if UTF-8 flags are not set, - // some string in MBCS may be valid UTF-8 in form, but they are not in truth. - if basic.is_ascii() { - // SAFETY: - // a valid ASCII string is always a valid UTF-8 string - unsafe { std::string::String::from_utf8_unchecked(basic).into() } - } else { - ZipString::new(basic, StringEncoding::Raw) - } - } - } -} - -fn detect_filename(basic: Vec, basic_is_utf8: bool, extra_fields: &[ExtraField]) -> ZipString { - if basic_is_utf8 { - ZipString::new(basic, StringEncoding::Utf8) - } else { - let unicode_extra = extra_fields.iter().find_map(|field| match field { - ExtraField::InfoZipUnicodePath(InfoZipUnicodePathExtraField::V1 { crc32, unicode }) => { - if *crc32 == crc32fast::hash(&basic) { - Some(std::string::String::from_utf8(unicode.clone())) - } else { - None - } - } - _ => None, - }); - if let Some(Ok(s)) = unicode_extra { - ZipString::new_with_alternative(s, basic) - } else { - // Do not treat as UTF-8 if UTF-8 flags are not set, - // some string in MBCS may be valid UTF-8 in form, but they are not in truth. - if basic.is_ascii() { - // SAFETY: - // a valid ASCII string is always a valid UTF-8 string - unsafe { std::string::String::from_utf8_unchecked(basic).into() } - } else { - ZipString::new(basic, StringEncoding::Raw) - } - } - } -} diff --git a/crates/async_zip/src/base/read/seek.rs b/crates/async_zip/src/base/read/seek.rs deleted file mode 100644 index bd1f1ab..0000000 --- a/crates/async_zip/src/base/read/seek.rs +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A ZIP reader which acts over a seekable source. -//! -//! ### Example -//! ```no_run -//! # use async_zip::base::read::seek::ZipFileReader; -//! # use async_zip::error::Result; -//! # use futures_lite::io::AsyncReadExt; -//! # use tokio::fs::File; -//! # use tokio_util::compat::TokioAsyncReadCompatExt; -//! # use tokio::io::BufReader; -//! # -//! async fn run() -> Result<()> { -//! let mut data = BufReader::new(File::open("./foo.zip").await?); -//! let mut reader = ZipFileReader::new(data.compat()).await?; -//! -//! let mut data = Vec::new(); -//! let mut entry = reader.reader_without_entry(0).await?; -//! entry.read_to_end(&mut data).await?; -//! -//! // Use data within current scope. -//! -//! Ok(()) -//! } -//! ``` - -use crate::base::read::io::entry::ZipEntryReader; -use crate::error::{Result, ZipError}; -use crate::file::ZipFile; - -#[cfg(feature = "tokio")] -use crate::tokio::read::seek::ZipFileReader as TokioZipFileReader; - -use futures_lite::io::{AsyncBufRead, AsyncSeek}; - -#[cfg(feature = "tokio")] -use tokio_util::compat::{Compat, TokioAsyncReadCompatExt}; - -use super::io::entry::{WithEntry, WithoutEntry}; - -/// A ZIP reader which acts over a seekable source. -#[derive(Clone)] -pub struct ZipFileReader { - reader: R, - file: ZipFile, -} - -impl ZipFileReader -where - R: AsyncBufRead + AsyncSeek + Unpin, -{ - /// Constructs a new ZIP reader from a seekable source. - pub async fn new(mut reader: R) -> Result> { - let file = crate::base::read::file(&mut reader).await?; - Ok(ZipFileReader::from_raw_parts(reader, file)) - } - - /// Constructs a ZIP reader from a seekable source and ZIP file information derived from that source. - /// - /// Providing a [`ZipFile`] that wasn't derived from that source may lead to inaccurate parsing. - pub fn from_raw_parts(reader: R, file: ZipFile) -> ZipFileReader { - ZipFileReader { reader, file } - } - - /// Returns this ZIP file's information. - pub fn file(&self) -> &ZipFile { - &self.file - } - - /// Returns a mutable reference to the inner seekable source. - /// - /// Swapping the source (eg. via std::mem operations) may lead to inaccurate parsing. - pub fn inner_mut(&mut self) -> &mut R { - &mut self.reader - } - - /// Returns the inner seekable source by consuming self. - pub fn into_inner(self) -> R { - self.reader - } - - /// Returns a new entry reader if the provided index is valid. - pub async fn reader_without_entry(&mut self, index: usize) -> Result> { - let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; - stored_entry.seek_to_data_offset(&mut self.reader).await?; - - Ok(ZipEntryReader::new_with_borrow( - &mut self.reader, - stored_entry.entry.compression(), - stored_entry.entry.compressed_size(), - )) - } - - /// Returns a new entry reader if the provided index is valid. - pub async fn reader_with_entry(&mut self, index: usize) -> Result>> { - let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; - - stored_entry.seek_to_data_offset(&mut self.reader).await?; - - let reader = ZipEntryReader::new_with_borrow( - &mut self.reader, - stored_entry.entry.compression(), - stored_entry.entry.compressed_size(), - ); - - Ok(reader.into_with_entry(stored_entry)) - } - - /// Returns a new entry reader if the provided index is valid. - /// Consumes self - pub async fn into_entry<'a>(mut self, index: usize) -> Result> - where - R: 'a, - { - let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; - - stored_entry.seek_to_data_offset(&mut self.reader).await?; - - Ok(ZipEntryReader::new_with_owned( - self.reader, - stored_entry.entry.compression(), - stored_entry.entry.compressed_size(), - )) - } -} - -#[cfg(feature = "tokio")] -impl ZipFileReader> -where - R: tokio::io::AsyncBufRead + tokio::io::AsyncSeek + Unpin, -{ - /// Constructs a new tokio-specific ZIP reader from a seekable source. - pub async fn with_tokio(reader: R) -> Result> { - let mut reader = reader.compat(); - let file = crate::base::read::file(&mut reader).await?; - Ok(ZipFileReader::from_raw_parts(reader, file)) - } -} diff --git a/crates/async_zip/src/base/read/stream.rs b/crates/async_zip/src/base/read/stream.rs deleted file mode 100644 index d276941..0000000 --- a/crates/async_zip/src/base/read/stream.rs +++ /dev/null @@ -1,174 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A ZIP reader which acts over a non-seekable source. -//! -//! # API Design -//! As opposed to other readers provided by this crate, it's important that the data of an entry is fully read before -//! the proceeding entry is read. This is as a result of not being able to seek forwards or backwards, so we must end -//! up at the start of the next entry. -//! -//! **We encode this invariant within Rust's type system so that it can be enforced at compile time.** -//! -//! This requires that any transition methods between these encoded types consume the reader and provide a new owned -//! reader back. This is certainly something to keep in mind when working with this reader, but idiomatic code can -//! still be produced nevertheless. -//! -//! # Considerations -//! As the central directory of a ZIP archive is stored at the end of it, a non-seekable reader doesn't have access -//! to it. We have to rely on information provided within the local file header which may not be accurate or complete. -//! This results in: -//! - The inability to read ZIP entries using the combination of a data descriptor and the Stored compression method. -//! - No file comment being available (defaults to an empty string). -//! - No internal or external file attributes being available (defaults to 0). -//! - The extra field data potentially being inconsistent with what's stored in the central directory. -//! - None of the following being available when the entry was written with a data descriptor (defaults to 0): -//! - CRC -//! - compressed size -//! - uncompressed size -//! -//! # Example -//! ```no_run -//! # use futures_lite::io::Cursor; -//! # use async_zip::error::Result; -//! # use async_zip::base::read::stream::ZipFileReader; -//! # -//! # async fn run() -> Result<()> { -//! let mut zip = ZipFileReader::new(Cursor::new([0; 0])); -//! -//! // Print the name of every file in a ZIP archive. -//! while let Some(entry) = zip.next_with_entry().await? { -//! println!("File: {}", entry.reader().entry().filename().as_str().unwrap()); -//! zip = entry.skip().await?; -//! } -//! # -//! # Ok(()) -//! # } -//! ``` - -use super::io::ConsumeDataDescriptor; - -use crate::base::read::io::entry::ZipEntryReader; -use crate::error::Result; -use crate::error::ZipError; - -#[cfg(feature = "tokio")] -use crate::tokio::read::stream::Ready as TokioReady; - -use futures_lite::io::AsyncBufRead; -use futures_lite::io::AsyncReadExt; - -#[cfg(feature = "tokio")] -use tokio_util::compat::TokioAsyncReadCompatExt; - -use super::io::entry::WithEntry; -use super::io::entry::WithoutEntry; - -/// A type which encodes that [`ZipFileReader`] is ready to open a new entry. -pub struct Ready(R); - -/// A type which encodes that [`ZipFileReader`] is currently reading an entry. -pub struct Reading<'a, R, E>(ZipEntryReader<'a, R, E>, bool); - -/// A ZIP reader which acts over a non-seekable source. -/// -/// See the [module-level docs](.) for more information. -#[derive(Clone)] -pub struct ZipFileReader(S); - -impl<'a, R> ZipFileReader> -where - R: AsyncBufRead + Unpin + 'a, -{ - /// Constructs a new ZIP reader from a non-seekable source. - pub fn new(reader: R) -> Self { - Self(Ready(reader)) - } - - /// Opens the next entry for reading if the central directory hasn’t yet been reached. - pub async fn next_without_entry(mut self) -> Result>>> { - let entry = match crate::base::read::lfh(&mut self.0 .0).await? { - Some(entry) => entry, - None => return Ok(None), - }; - - let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size }; - let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length); - - Ok(Some(ZipFileReader(Reading(reader, entry.data_descriptor)))) - } - - /// Opens the next entry for reading if the central directory hasn’t yet been reached. - pub async fn next_with_entry(mut self) -> Result>>>> { - let entry = match crate::base::read::lfh(&mut self.0 .0).await? { - Some(entry) => entry, - None => return Ok(None), - }; - - let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size }; - let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length); - let data_descriptor = entry.data_descriptor; - - Ok(Some(ZipFileReader(Reading(reader.into_with_entry_owned(entry), data_descriptor)))) - } - - /// Consumes the `ZipFileReader` returning the original `reader` - pub async fn into_inner(self) -> R { - self.0 .0 - } -} - -#[cfg(feature = "tokio")] -impl ZipFileReader> -where - R: tokio::io::AsyncBufRead + Unpin, -{ - /// Constructs a new tokio-specific ZIP reader from a non-seekable source. - pub fn with_tokio(reader: R) -> ZipFileReader> { - Self(Ready(reader.compat())) - } -} - -impl<'a, R, E> ZipFileReader> -where - R: AsyncBufRead + Unpin, -{ - /// Returns an immutable reference to the inner entry reader. - pub fn reader(&self) -> &ZipEntryReader<'a, R, E> { - &self.0 .0 - } - - /// Returns a mutable reference to the inner entry reader. - pub fn reader_mut(&mut self) -> &mut ZipEntryReader<'a, R, E> { - &mut self.0 .0 - } - - /// Converts the reader back into the Ready state if EOF has been reached. - pub async fn done(mut self) -> Result>> { - if self.0 .0.read(&mut [0; 1]).await? != 0 { - return Err(ZipError::EOFNotReached); - } - - let mut inner = self.0 .0.into_inner(); - - // Has data descriptor. - if self.0 .1 { - ConsumeDataDescriptor(&mut inner).await?; - } - - Ok(ZipFileReader(Ready(inner))) - } - - /// Reads until EOF and converts the reader back into the Ready state. - pub async fn skip(mut self) -> Result>> { - while self.0 .0.read(&mut [0; 2048]).await? != 0 {} - let mut inner = self.0 .0.into_inner(); - - // Has data descriptor. - if self.0 .1 { - ConsumeDataDescriptor(&mut inner).await?; - } - - Ok(ZipFileReader(Ready(inner))) - } -} diff --git a/crates/async_zip/src/base/write/compressed_writer.rs b/crates/async_zip/src/base/write/compressed_writer.rs deleted file mode 100644 index 3b71421..0000000 --- a/crates/async_zip/src/base/write/compressed_writer.rs +++ /dev/null @@ -1,137 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::base::write::io::offset::AsyncOffsetWriter; -use crate::spec::Compression; - -use std::io::Error; -use std::pin::Pin; -use std::task::{Context, Poll}; - -#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] -use async_compression::futures::write; -use futures_lite::io::AsyncWrite; - -pub enum CompressedAsyncWriter<'b, W: AsyncWrite + Unpin> { - Stored(ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter>), - #[cfg(feature = "deflate")] - Deflate(write::DeflateEncoder>>), - #[cfg(feature = "bzip2")] - Bz(write::BzEncoder>>), - #[cfg(feature = "lzma")] - Lzma(write::LzmaEncoder>>), - #[cfg(feature = "zstd")] - Zstd(write::ZstdEncoder>>), - #[cfg(feature = "xz")] - Xz(write::XzEncoder>>), -} - -impl<'b, W: AsyncWrite + Unpin> CompressedAsyncWriter<'b, W> { - pub fn from_raw(writer: &'b mut AsyncOffsetWriter, compression: Compression) -> Self { - match compression { - Compression::Stored => CompressedAsyncWriter::Stored(ShutdownIgnoredWriter(writer)), - #[cfg(feature = "deflate")] - Compression::Deflate => { - CompressedAsyncWriter::Deflate(write::DeflateEncoder::new(ShutdownIgnoredWriter(writer))) - } - #[cfg(feature = "deflate64")] - Compression::Deflate64 => panic!("writing deflate64 is not supported"), - #[cfg(feature = "bzip2")] - Compression::Bz => CompressedAsyncWriter::Bz(write::BzEncoder::new(ShutdownIgnoredWriter(writer))), - #[cfg(feature = "lzma")] - Compression::Lzma => CompressedAsyncWriter::Lzma(write::LzmaEncoder::new(ShutdownIgnoredWriter(writer))), - #[cfg(feature = "zstd")] - Compression::Zstd => CompressedAsyncWriter::Zstd(write::ZstdEncoder::new(ShutdownIgnoredWriter(writer))), - #[cfg(feature = "xz")] - Compression::Xz => CompressedAsyncWriter::Xz(write::XzEncoder::new(ShutdownIgnoredWriter(writer))), - } - } - - pub fn into_inner(self) -> &'b mut AsyncOffsetWriter { - match self { - CompressedAsyncWriter::Stored(inner) => inner.into_inner(), - #[cfg(feature = "deflate")] - CompressedAsyncWriter::Deflate(inner) => inner.into_inner().into_inner(), - #[cfg(feature = "bzip2")] - CompressedAsyncWriter::Bz(inner) => inner.into_inner().into_inner(), - #[cfg(feature = "lzma")] - CompressedAsyncWriter::Lzma(inner) => inner.into_inner().into_inner(), - #[cfg(feature = "zstd")] - CompressedAsyncWriter::Zstd(inner) => inner.into_inner().into_inner(), - #[cfg(feature = "xz")] - CompressedAsyncWriter::Xz(inner) => inner.into_inner().into_inner(), - } - } -} - -impl<'b, W: AsyncWrite + Unpin> AsyncWrite for CompressedAsyncWriter<'b, W> { - fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { - match *self { - CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_write(cx, buf), - #[cfg(feature = "deflate")] - CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_write(cx, buf), - #[cfg(feature = "bzip2")] - CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_write(cx, buf), - #[cfg(feature = "lzma")] - CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_write(cx, buf), - #[cfg(feature = "zstd")] - CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_write(cx, buf), - #[cfg(feature = "xz")] - CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_write(cx, buf), - } - } - - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - match *self { - CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_flush(cx), - #[cfg(feature = "deflate")] - CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_flush(cx), - #[cfg(feature = "bzip2")] - CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_flush(cx), - #[cfg(feature = "lzma")] - CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_flush(cx), - #[cfg(feature = "zstd")] - CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_flush(cx), - #[cfg(feature = "xz")] - CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_flush(cx), - } - } - - fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - match *self { - CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_close(cx), - #[cfg(feature = "deflate")] - CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_close(cx), - #[cfg(feature = "bzip2")] - CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_close(cx), - #[cfg(feature = "lzma")] - CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_close(cx), - #[cfg(feature = "zstd")] - CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_close(cx), - #[cfg(feature = "xz")] - CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_close(cx), - } - } -} - -pub struct ShutdownIgnoredWriter(W); - -impl ShutdownIgnoredWriter { - pub fn into_inner(self) -> W { - self.0 - } -} - -impl AsyncWrite for ShutdownIgnoredWriter { - fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { - Pin::new(&mut self.0).poll_write(cx, buf) - } - - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - Pin::new(&mut self.0).poll_flush(cx) - } - - fn poll_close(self: Pin<&mut Self>, _: &mut Context) -> Poll> { - Poll::Ready(Ok(())) - } -} diff --git a/crates/async_zip/src/base/write/entry_stream.rs b/crates/async_zip/src/base/write/entry_stream.rs deleted file mode 100644 index cc41f0e..0000000 --- a/crates/async_zip/src/base/write/entry_stream.rs +++ /dev/null @@ -1,272 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::base::write::compressed_writer::CompressedAsyncWriter; -use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut; -use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut; -use crate::base::write::io::offset::AsyncOffsetWriter; -use crate::base::write::CentralDirectoryEntry; -use crate::base::write::ZipFileWriter; -use crate::entry::ZipEntry; -use crate::error::{Result, Zip64ErrorCase, ZipError}; -use crate::spec::extra_field::ExtraFieldAsBytes; -use crate::spec::header::InfoZipUnicodeCommentExtraField; -use crate::spec::header::InfoZipUnicodePathExtraField; -use crate::spec::header::{ - CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, HeaderId, LocalFileHeader, - Zip64ExtendedInformationExtraField, -}; -use crate::string::StringEncoding; - -use std::io::Error; -use std::pin::Pin; -use std::task::{Context, Poll}; - -use crate::base::read::get_zip64_extra_field_mut; -use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE}; -use crc32fast::Hasher; -use futures_lite::io::{AsyncWrite, AsyncWriteExt}; - -/// An entry writer which supports the streaming of data (ie. the writing of unknown size or data at runtime). -/// -/// # Note -/// - This writer cannot be manually constructed; instead, use [`ZipFileWriter::write_entry_stream()`]. -/// - [`EntryStreamWriter::close()`] must be called before a stream writer goes out of scope. -/// - Utilities for working with [`AsyncWrite`] values are provided by [`AsyncWriteExt`]. -pub struct EntryStreamWriter<'b, W: AsyncWrite + Unpin> { - writer: AsyncOffsetWriter>, - cd_entries: &'b mut Vec, - entry: ZipEntry, - hasher: Hasher, - lfh: LocalFileHeader, - lfh_offset: u64, - data_offset: u64, - force_no_zip64: bool, - /// To write back to the original writer if zip64 is required. - is_zip64: &'b mut bool, -} - -impl<'b, W: AsyncWrite + Unpin> EntryStreamWriter<'b, W> { - pub(crate) async fn from_raw( - writer: &'b mut ZipFileWriter, - mut entry: ZipEntry, - ) -> Result> { - let lfh_offset = writer.writer.offset(); - let lfh = EntryStreamWriter::write_lfh(writer, &mut entry).await?; - let data_offset = writer.writer.offset(); - let force_no_zip64 = writer.force_no_zip64; - - let cd_entries = &mut writer.cd_entries; - let is_zip64 = &mut writer.is_zip64; - let writer = AsyncOffsetWriter::new(CompressedAsyncWriter::from_raw(&mut writer.writer, entry.compression())); - - Ok(EntryStreamWriter { - writer, - cd_entries, - entry, - lfh, - lfh_offset, - data_offset, - hasher: Hasher::new(), - force_no_zip64, - is_zip64, - }) - } - - async fn write_lfh(writer: &'b mut ZipFileWriter, entry: &mut ZipEntry) -> Result { - // Always emit a zip64 extended field, even if we don't need it, because we *might* need it. - // If we are forcing no zip, we will have to error later if the file is too large. - let (lfh_compressed, lfh_uncompressed) = if !writer.force_no_zip64 { - if !writer.is_zip64 { - writer.is_zip64 = true; - } - entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(Zip64ExtendedInformationExtraField { - header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD, - uncompressed_size: Some(entry.uncompressed_size), - compressed_size: Some(entry.compressed_size), - relative_header_offset: None, - disk_start_number: None, - })); - - (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE) - } else { - if entry.compressed_size > NON_ZIP64_MAX_SIZE as u64 || entry.uncompressed_size > NON_ZIP64_MAX_SIZE as u64 - { - return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); - } - - (entry.compressed_size as u32, entry.uncompressed_size as u32) - }; - - let utf8_without_alternative = - entry.filename().is_utf8_without_alternative() && entry.comment().is_utf8_without_alternative(); - if !utf8_without_alternative { - if matches!(entry.filename().encoding(), StringEncoding::Utf8) { - let u_file_name = entry.filename().as_bytes().to_vec(); - if !u_file_name.is_empty() { - let basic_crc32 = - crc32fast::hash(entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes())); - let upath_field = get_or_put_info_zip_unicode_path_extra_field_mut(entry.extra_fields.as_mut()); - if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field { - *crc32 = basic_crc32; - *unicode = u_file_name; - } - } - } - if matches!(entry.comment().encoding(), StringEncoding::Utf8) { - let u_comment = entry.comment().as_bytes().to_vec(); - if !u_comment.is_empty() { - let basic_crc32 = - crc32fast::hash(entry.comment().alternative().unwrap_or_else(|| entry.comment().as_bytes())); - let ucom_field = get_or_put_info_zip_unicode_comment_extra_field_mut(entry.extra_fields.as_mut()); - if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field { - *crc32 = basic_crc32; - *unicode = u_comment; - } - } - } - } - - let filename_basic = entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes()); - - let lfh = LocalFileHeader { - compressed_size: lfh_compressed, - uncompressed_size: lfh_uncompressed, - compression: entry.compression().into(), - crc: entry.crc32, - extra_field_length: entry - .extra_fields() - .count_bytes() - .try_into() - .map_err(|_| ZipError::ExtraFieldTooLarge)?, - file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?, - mod_time: entry.last_modification_date().time, - mod_date: entry.last_modification_date().date, - version: crate::spec::version::as_needed_to_extract(entry), - flags: GeneralPurposeFlag { - data_descriptor: true, - encrypted: false, - filename_unicode: utf8_without_alternative, - }, - }; - - writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?; - writer.writer.write_all(&lfh.as_slice()).await?; - writer.writer.write_all(filename_basic).await?; - writer.writer.write_all(&entry.extra_fields().as_bytes()).await?; - - Ok(lfh) - } - - /// Consumes this entry writer and completes all closing tasks. - /// - /// This includes: - /// - Finalising the CRC32 hash value for the written data. - /// - Calculating the compressed and uncompressed byte sizes. - /// - Constructing a central directory header. - /// - Pushing that central directory header to the [`ZipFileWriter`]'s store. - /// - /// Failure to call this function before going out of scope would result in a corrupted ZIP file. - pub async fn close(mut self) -> Result<()> { - self.writer.close().await?; - - let crc = self.hasher.finalize(); - let uncompressed_size = self.writer.offset(); - let inner_writer = self.writer.into_inner().into_inner(); - let compressed_size = inner_writer.offset() - self.data_offset; - - let (cdr_compressed_size, cdr_uncompressed_size, lh_offset) = if self.force_no_zip64 { - if uncompressed_size > NON_ZIP64_MAX_SIZE as u64 - || compressed_size > NON_ZIP64_MAX_SIZE as u64 - || self.lfh_offset > NON_ZIP64_MAX_SIZE as u64 - { - return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); - } - (uncompressed_size as u32, compressed_size as u32, self.lfh_offset as u32) - } else { - // When streaming an entry, we are always using a zip64 field. - match get_zip64_extra_field_mut(&mut self.entry.extra_fields) { - // This case shouldn't be necessary but is included for completeness. - None => { - self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation( - Zip64ExtendedInformationExtraField { - header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD, - uncompressed_size: Some(uncompressed_size), - compressed_size: Some(compressed_size), - relative_header_offset: Some(self.lfh_offset), - disk_start_number: None, - }, - )); - } - Some(zip64) => { - zip64.uncompressed_size = Some(uncompressed_size); - zip64.compressed_size = Some(compressed_size); - zip64.relative_header_offset = Some(self.lfh_offset); - } - } - self.lfh.extra_field_length = - self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?; - - (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE) - }; - - inner_writer.write_all(&crate::spec::consts::DATA_DESCRIPTOR_SIGNATURE.to_le_bytes()).await?; - inner_writer.write_all(&crc.to_le_bytes()).await?; - inner_writer.write_all(&cdr_compressed_size.to_le_bytes()).await?; - inner_writer.write_all(&cdr_uncompressed_size.to_le_bytes()).await?; - - let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes()); - - let cdh = CentralDirectoryRecord { - compressed_size: cdr_compressed_size, - uncompressed_size: cdr_uncompressed_size, - crc, - v_made_by: crate::spec::version::as_made_by(), - v_needed: self.lfh.version, - compression: self.lfh.compression, - extra_field_length: self.lfh.extra_field_length, - file_name_length: self.lfh.file_name_length, - file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?, - mod_time: self.lfh.mod_time, - mod_date: self.lfh.mod_date, - flags: self.lfh.flags, - disk_start: 0, - inter_attr: self.entry.internal_file_attribute(), - exter_attr: self.entry.external_file_attribute(), - lh_offset, - }; - - self.cd_entries.push(CentralDirectoryEntry { header: cdh, entry: self.entry }); - // Ensure that we can fit this many files in this archive if forcing no zip64 - if self.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize { - if self.force_no_zip64 { - return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)); - } - if !*self.is_zip64 { - *self.is_zip64 = true; - } - } - - Ok(()) - } -} - -impl<'a, W: AsyncWrite + Unpin> AsyncWrite for EntryStreamWriter<'a, W> { - fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { - let poll = Pin::new(&mut self.writer).poll_write(cx, buf); - - if let Poll::Ready(Ok(written)) = poll { - self.hasher.update(&buf[0..written]); - } - - poll - } - - fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - Pin::new(&mut self.writer).poll_flush(cx) - } - - fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - Pin::new(&mut self.writer).poll_close(cx) - } -} diff --git a/crates/async_zip/src/base/write/entry_whole.rs b/crates/async_zip/src/base/write/entry_whole.rs deleted file mode 100644 index 34594b6..0000000 --- a/crates/async_zip/src/base/write/entry_whole.rs +++ /dev/null @@ -1,259 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut; -use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut; -use crate::base::write::{CentralDirectoryEntry, ZipFileWriter}; -use crate::entry::ZipEntry; -use crate::error::{Result, Zip64ErrorCase, ZipError}; -use crate::spec::extra_field::Zip64ExtendedInformationExtraFieldBuilder; -use crate::spec::header::{InfoZipUnicodeCommentExtraField, InfoZipUnicodePathExtraField}; -use crate::spec::{ - extra_field::ExtraFieldAsBytes, - header::{CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, LocalFileHeader}, - Compression, -}; -use crate::StringEncoding; -#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] -use futures_lite::io::Cursor; - -use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE}; -#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] -use async_compression::futures::write; -use futures_lite::io::{AsyncWrite, AsyncWriteExt}; - -pub struct EntryWholeWriter<'b, 'c, W: AsyncWrite + Unpin> { - writer: &'b mut ZipFileWriter, - entry: ZipEntry, - data: &'c [u8], -} - -impl<'b, 'c, W: AsyncWrite + Unpin> EntryWholeWriter<'b, 'c, W> { - pub fn from_raw(writer: &'b mut ZipFileWriter, entry: ZipEntry, data: &'c [u8]) -> Self { - Self { writer, entry, data } - } - - pub async fn write(mut self) -> Result<()> { - let mut _compressed_data: Option> = None; - let compressed_data = match self.entry.compression() { - Compression::Stored => self.data, - #[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" - ))] - _ => { - _compressed_data = - Some(compress(self.entry.compression(), self.data, self.entry.compression_level).await); - _compressed_data.as_ref().unwrap() - } - }; - - let mut zip64_extra_field_builder = None; - - let (lfh_uncompressed_size, lfh_compressed_size) = if self.data.len() as u64 > NON_ZIP64_MAX_SIZE as u64 - || compressed_data.len() as u64 > NON_ZIP64_MAX_SIZE as u64 - { - if self.writer.force_no_zip64 { - return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); - } - if !self.writer.is_zip64 { - self.writer.is_zip64 = true; - } - zip64_extra_field_builder = Some( - Zip64ExtendedInformationExtraFieldBuilder::new() - .sizes(compressed_data.len() as u64, self.data.len() as u64), - ); - (NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE) - } else { - (self.data.len() as u32, compressed_data.len() as u32) - }; - - let lh_offset = if self.writer.writer.offset() > NON_ZIP64_MAX_SIZE as u64 { - if self.writer.force_no_zip64 { - return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)); - } - if !self.writer.is_zip64 { - self.writer.is_zip64 = true; - } - - if let Some(zip64_extra_field) = zip64_extra_field_builder { - zip64_extra_field_builder = Some(zip64_extra_field.relative_header_offset(self.writer.writer.offset())); - } else { - zip64_extra_field_builder = Some( - Zip64ExtendedInformationExtraFieldBuilder::new() - .relative_header_offset(self.writer.writer.offset()), - ); - } - NON_ZIP64_MAX_SIZE - } else { - self.writer.writer.offset() as u32 - }; - - if let Some(builder) = zip64_extra_field_builder { - if !builder.eof_only() { - self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(builder.build()?)); - zip64_extra_field_builder = None; - } else { - zip64_extra_field_builder = Some(builder); - } - } - - let utf8_without_alternative = - self.entry.filename().is_utf8_without_alternative() && self.entry.comment().is_utf8_without_alternative(); - if !utf8_without_alternative { - if matches!(self.entry.filename().encoding(), StringEncoding::Utf8) { - let u_file_name = self.entry.filename().as_bytes().to_vec(); - if !u_file_name.is_empty() { - let basic_crc32 = crc32fast::hash( - self.entry.filename().alternative().unwrap_or_else(|| self.entry.filename().as_bytes()), - ); - let upath_field = - get_or_put_info_zip_unicode_path_extra_field_mut(self.entry.extra_fields.as_mut()); - if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field { - *crc32 = basic_crc32; - *unicode = u_file_name; - } - } - } - if matches!(self.entry.comment().encoding(), StringEncoding::Utf8) { - let u_comment = self.entry.comment().as_bytes().to_vec(); - if !u_comment.is_empty() { - let basic_crc32 = crc32fast::hash( - self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes()), - ); - let ucom_field = - get_or_put_info_zip_unicode_comment_extra_field_mut(self.entry.extra_fields.as_mut()); - if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field { - *crc32 = basic_crc32; - *unicode = u_comment; - } - } - } - } - - let filename_basic = self.entry.filename().alternative().unwrap_or_else(|| self.entry.filename().as_bytes()); - let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes()); - - let lf_header = LocalFileHeader { - compressed_size: lfh_compressed_size, - uncompressed_size: lfh_uncompressed_size, - compression: self.entry.compression().into(), - crc: crc32fast::hash(self.data), - extra_field_length: self - .entry - .extra_fields() - .count_bytes() - .try_into() - .map_err(|_| ZipError::ExtraFieldTooLarge)?, - file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?, - mod_time: self.entry.last_modification_date().time, - mod_date: self.entry.last_modification_date().date, - version: crate::spec::version::as_needed_to_extract(&self.entry), - flags: GeneralPurposeFlag { - data_descriptor: false, - encrypted: false, - filename_unicode: utf8_without_alternative, - }, - }; - - let mut header = CentralDirectoryRecord { - v_made_by: crate::spec::version::as_made_by(), - v_needed: lf_header.version, - compressed_size: lf_header.compressed_size, - uncompressed_size: lf_header.uncompressed_size, - compression: lf_header.compression, - crc: lf_header.crc, - extra_field_length: lf_header.extra_field_length, - file_name_length: lf_header.file_name_length, - file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?, - mod_time: lf_header.mod_time, - mod_date: lf_header.mod_date, - flags: lf_header.flags, - disk_start: 0, - inter_attr: self.entry.internal_file_attribute(), - exter_attr: self.entry.external_file_attribute(), - lh_offset, - }; - - self.writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?; - self.writer.writer.write_all(&lf_header.as_slice()).await?; - self.writer.writer.write_all(filename_basic).await?; - self.writer.writer.write_all(&self.entry.extra_fields().as_bytes()).await?; - self.writer.writer.write_all(compressed_data).await?; - - if let Some(builder) = zip64_extra_field_builder { - self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(builder.build()?)); - header.extra_field_length = - self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?; - } - - self.writer.cd_entries.push(CentralDirectoryEntry { header, entry: self.entry }); - // Ensure that we can fit this many files in this archive if forcing no zip64 - if self.writer.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize { - if self.writer.force_no_zip64 { - return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)); - } - if !self.writer.is_zip64 { - self.writer.is_zip64 = true; - } - } - Ok(()) - } -} - -#[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" -))] -async fn compress(compression: Compression, data: &[u8], level: async_compression::Level) -> Vec { - // TODO: Reduce reallocations of Vec by making a lower-bound estimate of the length reduction and - // pre-initialising the Vec to that length. Then truncate() to the actual number of bytes written. - match compression { - #[cfg(feature = "deflate")] - Compression::Deflate => { - let mut writer = write::DeflateEncoder::with_quality(Cursor::new(Vec::new()), level); - writer.write_all(data).await.unwrap(); - writer.close().await.unwrap(); - writer.into_inner().into_inner() - } - #[cfg(feature = "deflate64")] - Compression::Deflate64 => panic!("compressing deflate64 is not supported"), - #[cfg(feature = "bzip2")] - Compression::Bz => { - let mut writer = write::BzEncoder::with_quality(Cursor::new(Vec::new()), level); - writer.write_all(data).await.unwrap(); - writer.close().await.unwrap(); - writer.into_inner().into_inner() - } - #[cfg(feature = "lzma")] - Compression::Lzma => { - let mut writer = write::LzmaEncoder::with_quality(Cursor::new(Vec::new()), level); - writer.write_all(data).await.unwrap(); - writer.close().await.unwrap(); - writer.into_inner().into_inner() - } - #[cfg(feature = "xz")] - Compression::Xz => { - let mut writer = write::XzEncoder::with_quality(Cursor::new(Vec::new()), level); - writer.write_all(data).await.unwrap(); - writer.close().await.unwrap(); - writer.into_inner().into_inner() - } - #[cfg(feature = "zstd")] - Compression::Zstd => { - let mut writer = write::ZstdEncoder::with_quality(Cursor::new(Vec::new()), level); - writer.write_all(data).await.unwrap(); - writer.close().await.unwrap(); - writer.into_inner().into_inner() - } - _ => unreachable!(), - } -} diff --git a/crates/async_zip/src/base/write/io/mod.rs b/crates/async_zip/src/base/write/io/mod.rs deleted file mode 100644 index 326d7d9..0000000 --- a/crates/async_zip/src/base/write/io/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub(crate) mod offset; diff --git a/crates/async_zip/src/base/write/io/offset.rs b/crates/async_zip/src/base/write/io/offset.rs deleted file mode 100644 index 98d3777..0000000 --- a/crates/async_zip/src/base/write/io/offset.rs +++ /dev/null @@ -1,73 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use std::io::{Error, IoSlice}; -use std::pin::Pin; -use std::task::{Context, Poll}; - -use futures_lite::io::AsyncWrite; -use pin_project::pin_project; - -/// A wrapper around an [`AsyncWrite`] implementation which tracks the current byte offset. -#[pin_project(project = OffsetWriterProj)] -pub struct AsyncOffsetWriter { - #[pin] - inner: W, - offset: u64, -} - -impl AsyncOffsetWriter -where - W: AsyncWrite + Unpin, -{ - /// Constructs a new wrapper from an inner [`AsyncWrite`] writer. - pub fn new(inner: W) -> Self { - Self { inner, offset: 0 } - } - - /// Returns the current byte offset. - pub fn offset(&self) -> u64 { - self.offset - } - - /// Consumes this wrapper and returns the inner [`AsyncWrite`] writer. - pub fn into_inner(self) -> W { - self.inner - } - - pub fn inner_mut(&mut self) -> &mut W { - &mut self.inner - } -} - -impl AsyncWrite for AsyncOffsetWriter -where - W: AsyncWrite + Unpin, -{ - fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll> { - let this = self.project(); - let poll = this.inner.poll_write(cx, buf); - - if let Poll::Ready(Ok(inner)) = &poll { - *this.offset += *inner as u64; - } - - poll - } - - fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - self.project().inner.poll_flush(cx) - } - - fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll> { - self.project().inner.poll_close(cx) - } - - fn poll_write_vectored( - self: Pin<&mut Self>, - cx: &mut Context<'_>, - bufs: &[IoSlice<'_>], - ) -> Poll> { - self.project().inner.poll_write_vectored(cx, bufs) - } -} diff --git a/crates/async_zip/src/base/write/mod.rs b/crates/async_zip/src/base/write/mod.rs deleted file mode 100644 index a571d61..0000000 --- a/crates/async_zip/src/base/write/mod.rs +++ /dev/null @@ -1,290 +0,0 @@ -// Copyright (c) 2021-2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A module which supports writing ZIP files. -//! -//! # Example -//! ### Whole data (u8 slice) -//! ```no_run -//! # #[cfg(feature = "deflate")] -//! # { -//! # use async_zip::{Compression, ZipEntryBuilder, base::write::ZipFileWriter}; -//! # use async_zip::error::ZipError; -//! # -//! # async fn run() -> Result<(), ZipError> { -//! let mut writer = ZipFileWriter::new(Vec::::new()); -//! -//! let data = b"This is an example file."; -//! let opts = ZipEntryBuilder::new(String::from("foo.txt").into(), Compression::Deflate); -//! -//! writer.write_entry_whole(opts, data).await?; -//! writer.close().await?; -//! # Ok(()) -//! # } -//! # } -//! ``` -//! ### Stream data (unknown size & data) -//! ```no_run -//! # #[cfg(feature = "deflate")] -//! # { -//! # use async_zip::{Compression, ZipEntryBuilder, base::write::ZipFileWriter}; -//! # use std::io::Cursor; -//! # use async_zip::error::ZipError; -//! # use futures_lite::io::AsyncWriteExt; -//! # use tokio_util::compat::TokioAsyncWriteCompatExt; -//! # -//! # async fn run() -> Result<(), ZipError> { -//! let mut writer = ZipFileWriter::new(Vec::::new()); -//! -//! let data = b"This is an example file."; -//! let opts = ZipEntryBuilder::new(String::from("bar.txt").into(), Compression::Deflate); -//! -//! let mut entry_writer = writer.write_entry_stream(opts).await?; -//! entry_writer.write_all(data).await.unwrap(); -//! -//! entry_writer.close().await?; -//! writer.close().await?; -//! # Ok(()) -//! # } -//! # } -//! ``` - -pub(crate) mod compressed_writer; -pub(crate) mod entry_stream; -pub(crate) mod entry_whole; -pub(crate) mod io; - -pub use entry_stream::EntryStreamWriter; - -#[cfg(feature = "tokio")] -use tokio_util::compat::{Compat, TokioAsyncWriteCompatExt}; - -use crate::entry::ZipEntry; -use crate::error::Result; -use crate::spec::extra_field::ExtraFieldAsBytes; -use crate::spec::header::{ - CentralDirectoryRecord, EndOfCentralDirectoryHeader, ExtraField, InfoZipUnicodeCommentExtraField, - InfoZipUnicodePathExtraField, Zip64EndOfCentralDirectoryLocator, Zip64EndOfCentralDirectoryRecord, -}; - -#[cfg(feature = "tokio")] -use crate::tokio::write::ZipFileWriter as TokioZipFileWriter; - -use entry_whole::EntryWholeWriter; -use io::offset::AsyncOffsetWriter; - -use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE}; -use futures_lite::io::{AsyncWrite, AsyncWriteExt}; - -pub(crate) struct CentralDirectoryEntry { - pub header: CentralDirectoryRecord, - pub entry: ZipEntry, -} - -/// A ZIP file writer which acts over AsyncWrite implementers. -/// -/// # Note -/// - [`ZipFileWriter::close()`] must be called before a stream writer goes out of scope. -pub struct ZipFileWriter { - pub(crate) writer: AsyncOffsetWriter, - pub(crate) cd_entries: Vec, - /// If true, will error if a Zip64 struct must be written. - force_no_zip64: bool, - /// Whether to write Zip64 end of directory structs. - pub(crate) is_zip64: bool, - comment_opt: Option, -} - -impl ZipFileWriter { - /// Construct a new ZIP file writer from a mutable reference to a writer. - pub fn new(writer: W) -> Self { - Self { - writer: AsyncOffsetWriter::new(writer), - cd_entries: Vec::new(), - comment_opt: None, - is_zip64: false, - force_no_zip64: false, - } - } - - /// Force the ZIP writer to operate in non-ZIP64 mode. - /// If any files would need ZIP64, an error will be raised. - pub fn force_no_zip64(mut self) -> Self { - self.force_no_zip64 = true; - self - } - - /// Force the ZIP writer to emit Zip64 structs at the end of the archive. - /// Zip64 extended fields will only be written if needed. - pub fn force_zip64(mut self) -> Self { - self.is_zip64 = true; - self - } - - /// Write a new ZIP entry of known size and data. - pub async fn write_entry_whole>(&mut self, entry: E, data: &[u8]) -> Result<()> { - EntryWholeWriter::from_raw(self, entry.into(), data).write().await - } - - /// Write an entry of unknown size and data via streaming (ie. using a data descriptor). - /// The generated Local File Header will be invalid, with no compressed size, uncompressed size, - /// and a null CRC. This might cause problems with the destination reader. - pub async fn write_entry_stream>(&mut self, entry: E) -> Result> { - EntryStreamWriter::from_raw(self, entry.into()).await - } - - /// Set the ZIP file comment. - pub fn comment(&mut self, comment: String) { - self.comment_opt = Some(comment); - } - - /// Returns a mutable reference to the inner writer. - /// - /// Care should be taken when using this inner writer as doing so may invalidate internal state of this writer. - pub fn inner_mut(&mut self) -> &mut W { - self.writer.inner_mut() - } - - /// Consumes this ZIP writer and completes all closing tasks. - /// - /// This includes: - /// - Writing all central directory headers. - /// - Writing the end of central directory header. - /// - Writing the file comment. - /// - /// Failure to call this function before going out of scope would result in a corrupted ZIP file. - pub async fn close(mut self) -> Result { - let cd_offset = self.writer.offset(); - - for entry in &self.cd_entries { - let filename_basic = - entry.entry.filename().alternative().unwrap_or_else(|| entry.entry.filename().as_bytes()); - let comment_basic = entry.entry.comment().alternative().unwrap_or_else(|| entry.entry.comment().as_bytes()); - - self.writer.write_all(&crate::spec::consts::CDH_SIGNATURE.to_le_bytes()).await?; - self.writer.write_all(&entry.header.as_slice()).await?; - self.writer.write_all(filename_basic).await?; - self.writer.write_all(&entry.entry.extra_fields().as_bytes()).await?; - self.writer.write_all(comment_basic).await?; - } - - let central_directory_size = self.writer.offset() - cd_offset; - let central_directory_size_u32 = if central_directory_size > NON_ZIP64_MAX_SIZE as u64 { - NON_ZIP64_MAX_SIZE - } else { - central_directory_size as u32 - }; - let num_entries_in_directory = self.cd_entries.len() as u64; - let num_entries_in_directory_u16 = if num_entries_in_directory > NON_ZIP64_MAX_NUM_FILES as u64 { - NON_ZIP64_MAX_NUM_FILES - } else { - num_entries_in_directory as u16 - }; - let cd_offset_u32 = if cd_offset > NON_ZIP64_MAX_SIZE as u64 { - if self.force_no_zip64 { - return Err(crate::error::ZipError::Zip64Needed(crate::error::Zip64ErrorCase::LargeFile)); - } else { - self.is_zip64 = true; - } - NON_ZIP64_MAX_SIZE - } else { - cd_offset as u32 - }; - - // Add the zip64 EOCDR and EOCDL if we are in zip64 mode. - if self.is_zip64 { - let eocdr_offset = self.writer.offset(); - - let eocdr = Zip64EndOfCentralDirectoryRecord { - size_of_zip64_end_of_cd_record: 44, - version_made_by: crate::spec::version::as_made_by(), - version_needed_to_extract: 46, - disk_number: 0, - disk_number_start_of_cd: 0, - num_entries_in_directory_on_disk: num_entries_in_directory, - num_entries_in_directory, - directory_size: central_directory_size, - offset_of_start_of_directory: cd_offset, - }; - self.writer.write_all(&crate::spec::consts::ZIP64_EOCDR_SIGNATURE.to_le_bytes()).await?; - self.writer.write_all(&eocdr.as_bytes()).await?; - - let eocdl = Zip64EndOfCentralDirectoryLocator { - number_of_disk_with_start_of_zip64_end_of_central_directory: 0, - relative_offset: eocdr_offset, - total_number_of_disks: 1, - }; - self.writer.write_all(&crate::spec::consts::ZIP64_EOCDL_SIGNATURE.to_le_bytes()).await?; - self.writer.write_all(&eocdl.as_bytes()).await?; - } - - let header = EndOfCentralDirectoryHeader { - disk_num: 0, - start_cent_dir_disk: 0, - num_of_entries_disk: num_entries_in_directory_u16, - num_of_entries: num_entries_in_directory_u16, - size_cent_dir: central_directory_size_u32, - cent_dir_offset: cd_offset_u32, - file_comm_length: self.comment_opt.as_ref().map(|v| v.len() as u16).unwrap_or_default(), - }; - - self.writer.write_all(&crate::spec::consts::EOCDR_SIGNATURE.to_le_bytes()).await?; - self.writer.write_all(&header.as_slice()).await?; - if let Some(comment) = self.comment_opt { - self.writer.write_all(comment.as_bytes()).await?; - } - - Ok(self.writer.into_inner()) - } -} - -#[cfg(feature = "tokio")] -impl ZipFileWriter> -where - W: tokio::io::AsyncWrite + Unpin, -{ - /// Construct a new ZIP file writer from a mutable reference to a writer. - pub fn with_tokio(writer: W) -> TokioZipFileWriter { - Self { - writer: AsyncOffsetWriter::new(writer.compat_write()), - cd_entries: Vec::new(), - comment_opt: None, - is_zip64: false, - force_no_zip64: false, - } - } -} - -pub(crate) fn get_or_put_info_zip_unicode_path_extra_field_mut( - extra_fields: &mut Vec, -) -> &mut InfoZipUnicodePathExtraField { - if !extra_fields.iter().any(|field| matches!(field, ExtraField::InfoZipUnicodePath(_))) { - extra_fields - .push(ExtraField::InfoZipUnicodePath(InfoZipUnicodePathExtraField::V1 { crc32: 0, unicode: vec![] })); - } - - for field in extra_fields.iter_mut() { - if let ExtraField::InfoZipUnicodePath(extra_field) = field { - return extra_field; - } - } - - panic!("InfoZipUnicodePathExtraField not found after insertion") -} - -pub(crate) fn get_or_put_info_zip_unicode_comment_extra_field_mut( - extra_fields: &mut Vec, -) -> &mut InfoZipUnicodeCommentExtraField { - if !extra_fields.iter().any(|field| matches!(field, ExtraField::InfoZipUnicodeComment(_))) { - extra_fields - .push(ExtraField::InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField::V1 { crc32: 0, unicode: vec![] })); - } - - for field in extra_fields.iter_mut() { - if let ExtraField::InfoZipUnicodeComment(extra_field) = field { - return extra_field; - } - } - - panic!("InfoZipUnicodeCommentExtraField not found after insertion") -} diff --git a/crates/async_zip/src/date/builder.rs b/crates/async_zip/src/date/builder.rs deleted file mode 100644 index ea660f9..0000000 --- a/crates/async_zip/src/date/builder.rs +++ /dev/null @@ -1,83 +0,0 @@ -// Copyright (c) 2024 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::ZipDateTime; - -/// A builder for [`ZipDateTime`]. -pub struct ZipDateTimeBuilder(pub(crate) ZipDateTime); - -impl From for ZipDateTimeBuilder { - fn from(date: ZipDateTime) -> Self { - Self(date) - } -} - -impl Default for ZipDateTimeBuilder { - fn default() -> Self { - Self::new() - } -} - -impl ZipDateTimeBuilder { - /// Constructs a new builder which defines the raw underlying data of a ZIP entry. - pub fn new() -> Self { - Self(ZipDateTime { date: 0, time: 0 }) - } - - /// Sets the date and time's year. - pub fn year(mut self, year: i32) -> Self { - let year: u16 = (((year - 1980) << 9) & 0xFE00).try_into().unwrap(); - self.0.date |= year; - self - } - - /// Sets the date and time's month. - pub fn month(mut self, month: u32) -> Self { - let month: u16 = ((month << 5) & 0x1E0).try_into().unwrap(); - self.0.date |= month; - self - } - - /// Sets the date and time's day. - pub fn day(mut self, day: u32) -> Self { - let day: u16 = (day & 0x1F).try_into().unwrap(); - self.0.date |= day; - self - } - - /// Sets the date and time's hour. - pub fn hour(mut self, hour: u32) -> Self { - let hour: u16 = ((hour << 11) & 0xF800).try_into().unwrap(); - self.0.time |= hour; - self - } - - /// Sets the date and time's minute. - pub fn minute(mut self, minute: u32) -> Self { - let minute: u16 = ((minute << 5) & 0x7E0).try_into().unwrap(); - self.0.time |= minute; - self - } - - /// Sets the date and time's second. - /// - /// Note that MS-DOS has a maximum granularity of two seconds. - pub fn second(mut self, second: u32) -> Self { - let second: u16 = ((second >> 1) & 0x1F).try_into().unwrap(); - self.0.time |= second; - self - } - - /// Consumes this builder and returns a final [`ZipDateTime`]. - /// - /// This is equivalent to: - /// ``` - /// # use async_zip::{ZipDateTime, ZipDateTimeBuilder, Compression}; - /// # - /// # let builder = ZipDateTimeBuilder::new().year(2024).month(3).day(2); - /// let date: ZipDateTime = builder.into(); - /// ``` - pub fn build(self) -> ZipDateTime { - self.into() - } -} diff --git a/crates/async_zip/src/date/mod.rs b/crates/async_zip/src/date/mod.rs deleted file mode 100644 index 3b4fd4a..0000000 --- a/crates/async_zip/src/date/mod.rs +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2021-2024 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub mod builder; - -#[cfg(feature = "chrono")] -use chrono::{DateTime, Datelike, LocalResult, TimeZone, Timelike, Utc}; - -use self::builder::ZipDateTimeBuilder; - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#446 -// https://learn.microsoft.com/en-us/windows/win32/api/oleauto/nf-oleauto-dosdatetimetovarianttime - -/// A date and time stored as per the MS-DOS representation used by ZIP files. -#[derive(Debug, Default, PartialEq, Eq, Clone, Copy, Hash)] -pub struct ZipDateTime { - pub(crate) date: u16, - pub(crate) time: u16, -} - -impl ZipDateTime { - /// Returns the year of this date & time. - pub fn year(&self) -> i32 { - (((self.date & 0xFE00) >> 9) + 1980).into() - } - - /// Returns the month of this date & time. - pub fn month(&self) -> u32 { - ((self.date & 0x1E0) >> 5).into() - } - - /// Returns the day of this date & time. - pub fn day(&self) -> u32 { - (self.date & 0x1F).into() - } - - /// Returns the hour of this date & time. - pub fn hour(&self) -> u32 { - ((self.time & 0xF800) >> 11).into() - } - - /// Returns the minute of this date & time. - pub fn minute(&self) -> u32 { - ((self.time & 0x7E0) >> 5).into() - } - - /// Returns the second of this date & time. - /// - /// Note that MS-DOS has a maximum granularity of two seconds. - pub fn second(&self) -> u32 { - ((self.time & 0x1F) << 1).into() - } - - /// Constructs chrono's [`DateTime`] representation of this date & time. - /// - /// Note that this requires the `chrono` feature. - #[cfg(feature = "chrono")] - pub fn as_chrono(&self) -> LocalResult> { - self.into() - } - - /// Constructs this date & time from chrono's [`DateTime`] representation. - /// - /// Note that this requires the `chrono` feature. - #[cfg(feature = "chrono")] - pub fn from_chrono(dt: &DateTime) -> Self { - dt.into() - } -} - -impl From for ZipDateTime { - fn from(builder: ZipDateTimeBuilder) -> Self { - builder.0 - } -} - -#[cfg(feature = "chrono")] -impl From<&DateTime> for ZipDateTime { - fn from(value: &DateTime) -> Self { - let mut builder = ZipDateTimeBuilder::new(); - - builder = builder.year(value.date_naive().year()); - builder = builder.month(value.date_naive().month()); - builder = builder.day(value.date_naive().day()); - builder = builder.hour(value.time().hour()); - builder = builder.minute(value.time().minute()); - builder = builder.second(value.time().second()); - - builder.build() - } -} - -#[cfg(feature = "chrono")] -impl From<&ZipDateTime> for LocalResult> { - fn from(value: &ZipDateTime) -> Self { - Utc.with_ymd_and_hms(value.year(), value.month(), value.day(), value.hour(), value.minute(), value.second()) - } -} - -#[cfg(feature = "chrono")] -impl From> for ZipDateTime { - fn from(value: DateTime) -> Self { - (&value).into() - } -} - -#[cfg(feature = "chrono")] -impl From for LocalResult> { - fn from(value: ZipDateTime) -> Self { - (&value).into() - } -} diff --git a/crates/async_zip/src/entry/builder.rs b/crates/async_zip/src/entry/builder.rs deleted file mode 100644 index 34993c6..0000000 --- a/crates/async_zip/src/entry/builder.rs +++ /dev/null @@ -1,113 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::entry::ZipEntry; -use crate::spec::{attribute::AttributeCompatibility, header::ExtraField, Compression}; -use crate::{date::ZipDateTime, string::ZipString}; - -/// A builder for [`ZipEntry`]. -pub struct ZipEntryBuilder(pub(crate) ZipEntry); - -impl From for ZipEntryBuilder { - fn from(entry: ZipEntry) -> Self { - Self(entry) - } -} - -impl ZipEntryBuilder { - /// Constructs a new builder which defines the raw underlying data of a ZIP entry. - /// - /// A filename and compression method are needed to construct the builder as minimal parameters. - pub fn new(filename: ZipString, compression: Compression) -> Self { - Self(ZipEntry::new(filename, compression)) - } - - /// Sets the entry's filename. - pub fn filename(mut self, filename: ZipString) -> Self { - self.0.filename = filename; - self - } - - /// Sets the entry's compression method. - pub fn compression(mut self, compression: Compression) -> Self { - self.0.compression = compression; - self - } - - /// Set a size hint for the file, to be written into the local file header. - /// Unlikely to be useful except for the case of streaming files to be Store'd. - /// This size hint does not affect the central directory, nor does it affect whole files. - pub fn size, M: Into>(mut self, compressed_size: N, uncompressed_size: M) -> Self { - self.0.compressed_size = compressed_size.into(); - self.0.uncompressed_size = uncompressed_size.into(); - self - } - - /// Set the deflate compression option. - /// - /// If the compression type isn't deflate, this option has no effect. - #[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] - pub fn deflate_option(mut self, option: crate::DeflateOption) -> Self { - self.0.compression_level = option.into_level(); - self - } - - /// Sets the entry's attribute host compatibility. - pub fn attribute_compatibility(mut self, compatibility: AttributeCompatibility) -> Self { - self.0.attribute_compatibility = compatibility; - self - } - - /// Sets the entry's last modification date. - pub fn last_modification_date(mut self, date: ZipDateTime) -> Self { - self.0.last_modification_date = date; - self - } - - /// Sets the entry's internal file attribute. - pub fn internal_file_attribute(mut self, attribute: u16) -> Self { - self.0.internal_file_attribute = attribute; - self - } - - /// Sets the entry's external file attribute. - pub fn external_file_attribute(mut self, attribute: u32) -> Self { - self.0.external_file_attribute = attribute; - self - } - - /// Sets the entry's extra field data. - pub fn extra_fields(mut self, field: Vec) -> Self { - self.0.extra_fields = field; - self - } - - /// Sets the entry's file comment. - pub fn comment(mut self, comment: ZipString) -> Self { - self.0.comment = comment; - self - } - - /// Sets the entry's Unix permissions mode. - /// - /// If the attribute host compatibility isn't set to Unix, this will have no effect. - pub fn unix_permissions(mut self, mode: u16) -> Self { - if matches!(self.0.attribute_compatibility, AttributeCompatibility::Unix) { - self.0.external_file_attribute = (self.0.external_file_attribute & 0xFFFF) | (mode as u32) << 16; - } - self - } - - /// Consumes this builder and returns a final [`ZipEntry`]. - /// - /// This is equivalent to: - /// ``` - /// # use async_zip::{ZipEntry, ZipEntryBuilder, Compression}; - /// # - /// # let builder = ZipEntryBuilder::new(String::from("foo.bar").into(), Compression::Stored); - /// let entry: ZipEntry = builder.into(); - /// ``` - pub fn build(self) -> ZipEntry { - self.into() - } -} diff --git a/crates/async_zip/src/entry/mod.rs b/crates/async_zip/src/entry/mod.rs deleted file mode 100644 index a0bd841..0000000 --- a/crates/async_zip/src/entry/mod.rs +++ /dev/null @@ -1,219 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub mod builder; - -use std::ops::Deref; - -use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom}; - -use crate::entry::builder::ZipEntryBuilder; -use crate::error::{Result, ZipError}; -use crate::spec::{ - attribute::AttributeCompatibility, - consts::LFH_SIGNATURE, - header::{ExtraField, LocalFileHeader}, - Compression, -}; -use crate::{string::ZipString, ZipDateTime}; - -/// An immutable store of data about a ZIP entry. -/// -/// This type cannot be directly constructed so instead, the [`ZipEntryBuilder`] must be used. Internally this builder -/// stores a [`ZipEntry`] so conversions between these two types via the [`From`] implementations will be -/// non-allocating. -#[derive(Clone, Debug)] -pub struct ZipEntry { - pub(crate) filename: ZipString, - pub(crate) compression: Compression, - #[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" - ))] - pub(crate) compression_level: async_compression::Level, - pub(crate) crc32: u32, - pub(crate) uncompressed_size: u64, - pub(crate) compressed_size: u64, - pub(crate) attribute_compatibility: AttributeCompatibility, - pub(crate) last_modification_date: ZipDateTime, - pub(crate) internal_file_attribute: u16, - pub(crate) external_file_attribute: u32, - pub(crate) extra_fields: Vec, - pub(crate) comment: ZipString, - pub(crate) data_descriptor: bool, -} - -impl From for ZipEntry { - fn from(builder: ZipEntryBuilder) -> Self { - builder.0 - } -} - -impl ZipEntry { - pub(crate) fn new(filename: ZipString, compression: Compression) -> Self { - ZipEntry { - filename, - compression, - #[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" - ))] - compression_level: async_compression::Level::Default, - crc32: 0, - uncompressed_size: 0, - compressed_size: 0, - attribute_compatibility: AttributeCompatibility::Unix, - last_modification_date: ZipDateTime::default(), - internal_file_attribute: 0, - external_file_attribute: 0, - extra_fields: Vec::new(), - comment: String::new().into(), - data_descriptor: false, - } - } - - /// Returns the entry's filename. - /// - /// ## Note - /// This will return the raw filename stored during ZIP creation. If calling this method on entries retrieved from - /// untrusted ZIP files, the filename should be sanitised before being used as a path to prevent [directory - /// traversal attacks](https://en.wikipedia.org/wiki/Directory_traversal_attack). - pub fn filename(&self) -> &ZipString { - &self.filename - } - - /// Returns the entry's compression method. - pub fn compression(&self) -> Compression { - self.compression - } - - /// Returns the entry's CRC32 value. - pub fn crc32(&self) -> u32 { - self.crc32 - } - - /// Returns the entry's uncompressed size. - pub fn uncompressed_size(&self) -> u64 { - self.uncompressed_size - } - - /// Returns the entry's compressed size. - pub fn compressed_size(&self) -> u64 { - self.compressed_size - } - - /// Returns the entry's attribute's host compatibility. - pub fn attribute_compatibility(&self) -> AttributeCompatibility { - self.attribute_compatibility - } - - /// Returns the entry's last modification time & date. - pub fn last_modification_date(&self) -> &ZipDateTime { - &self.last_modification_date - } - - /// Returns the entry's internal file attribute. - pub fn internal_file_attribute(&self) -> u16 { - self.internal_file_attribute - } - - /// Returns the entry's external file attribute - pub fn external_file_attribute(&self) -> u32 { - self.external_file_attribute - } - - /// Returns the entry's extra field data. - pub fn extra_fields(&self) -> &[ExtraField] { - &self.extra_fields - } - - /// Returns the entry's file comment. - pub fn comment(&self) -> &ZipString { - &self.comment - } - - /// Returns the entry's integer-based UNIX permissions. - /// - /// # Note - /// This will return None if the attribute host compatibility is not listed as Unix. - pub fn unix_permissions(&self) -> Option { - if !matches!(self.attribute_compatibility, AttributeCompatibility::Unix) { - return None; - } - - Some(((self.external_file_attribute) >> 16) as u16) - } - - /// Returns whether or not the entry represents a directory. - pub fn dir(&self) -> Result { - Ok(self.filename.as_str()?.ends_with('/')) - } -} - -/// An immutable store of data about how a ZIP entry is stored within a specific archive. -/// -/// Besides storing archive independent information like the size and timestamp it can also be used to query -/// information about how the entry is stored in an archive. -#[derive(Clone)] -pub struct StoredZipEntry { - pub(crate) entry: ZipEntry, - // pub(crate) general_purpose_flag: GeneralPurposeFlag, - pub(crate) file_offset: u64, - pub(crate) header_size: u64, -} - -impl StoredZipEntry { - /// Returns the offset in bytes to where the header of the entry starts. - pub fn header_offset(&self) -> u64 { - self.file_offset - } - - /// Returns the combined size in bytes of the header, the filename, and any extra fields. - /// - /// Note: This uses the extra field length stored in the central directory, which may differ from that stored in - /// the local file header. See specification: - pub fn header_size(&self) -> u64 { - self.header_size - } - - /// Seek to the offset in bytes where the data of the entry starts. - pub(crate) async fn seek_to_data_offset(&self, mut reader: &mut R) -> Result<()> { - // Seek to the header - reader.seek(SeekFrom::Start(self.file_offset)).await?; - - // Check the signature - let signature = { - let mut buffer = [0; 4]; - reader.read_exact(&mut buffer).await?; - u32::from_le_bytes(buffer) - }; - - match signature { - LFH_SIGNATURE => (), - actual => return Err(ZipError::UnexpectedHeaderError(actual, LFH_SIGNATURE)), - }; - - // Skip the local file header and trailing data - let header = LocalFileHeader::from_reader(&mut reader).await?; - let trailing_size = (header.file_name_length as i64) + (header.extra_field_length as i64); - reader.seek(SeekFrom::Current(trailing_size)).await?; - - Ok(()) - } -} - -impl Deref for StoredZipEntry { - type Target = ZipEntry; - - fn deref(&self) -> &Self::Target { - &self.entry - } -} diff --git a/crates/async_zip/src/error.rs b/crates/async_zip/src/error.rs deleted file mode 100644 index f383112..0000000 --- a/crates/async_zip/src/error.rs +++ /dev/null @@ -1,72 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A module which holds relevant error reporting structures/types. - -use std::fmt::{Display, Formatter}; -use thiserror::Error; - -/// A Result type alias over ZipError to minimise repetition. -pub type Result = std::result::Result; - -#[derive(Debug, PartialEq, Eq)] -pub enum Zip64ErrorCase { - TooManyFiles, - LargeFile, -} - -impl Display for Zip64ErrorCase { - fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { - match self { - Self::TooManyFiles => write!(f, "More than 65536 files in archive"), - Self::LargeFile => write!(f, "File is larger than 4 GiB"), - } - } -} - -/// An enum of possible errors and their descriptions. -#[non_exhaustive] -#[derive(Debug, Error)] -pub enum ZipError { - #[error("feature not supported: '{0}'")] - FeatureNotSupported(&'static str), - #[error("compression not supported: {0}")] - CompressionNotSupported(u16), - #[error("host attribute compatibility not supported: {0}")] - AttributeCompatibilityNotSupported(u16), - #[error("attempted to read a ZIP64 file whilst on a 32-bit target")] - TargetZip64NotSupported, - #[error("attempted to write a ZIP file with force_no_zip64 when ZIP64 is needed: {0}")] - Zip64Needed(Zip64ErrorCase), - #[error("end of file has not been reached")] - EOFNotReached, - #[error("extra fields exceeded maximum size")] - ExtraFieldTooLarge, - #[error("comment exceeded maximum size")] - CommentTooLarge, - #[error("filename exceeded maximum size")] - FileNameTooLarge, - #[error("attempted to convert non-UTF8 bytes to a string/str")] - StringNotUtf8, - - #[error("unable to locate the end of central directory record")] - UnableToLocateEOCDR, - #[error("extra field size was indicated to be {0} but only {1} bytes remain")] - InvalidExtraFieldHeader(u16, usize), - #[error("zip64 extended information field was incomplete")] - Zip64ExtendedFieldIncomplete, - - #[error("an upstream reader returned an error: {0}")] - UpstreamReadError(#[from] std::io::Error), - #[error("a computed CRC32 value did not match the expected value")] - CRC32CheckError, - #[error("entry index was out of bounds")] - EntryIndexOutOfBounds, - #[error("Encountered an unexpected header (actual: {0:#x}, expected: {1:#x}).")] - UnexpectedHeaderError(u32, u32), - - #[error("Info-ZIP Unicode Comment Extra Field was incomplete")] - InfoZipUnicodeCommentFieldIncomplete, - #[error("Info-ZIP Unicode Path Extra Field was incomplete")] - InfoZipUnicodePathFieldIncomplete, -} diff --git a/crates/async_zip/src/file/builder.rs b/crates/async_zip/src/file/builder.rs deleted file mode 100644 index 209ad16..0000000 --- a/crates/async_zip/src/file/builder.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::{file::ZipFile, string::ZipString}; - -/// A builder for [`ZipFile`]. -pub struct ZipFileBuilder(pub(crate) ZipFile); - -impl From for ZipFileBuilder { - fn from(file: ZipFile) -> Self { - Self(file) - } -} - -impl Default for ZipFileBuilder { - fn default() -> Self { - ZipFileBuilder(ZipFile { entries: Vec::new(), zip64: false, comment: String::new().into() }) - } -} - -impl ZipFileBuilder { - pub fn new() -> Self { - Self::default() - } - - /// Sets the file's comment. - pub fn comment(mut self, comment: ZipString) -> Self { - self.0.comment = comment; - self - } - - /// Consumes this builder and returns a final [`ZipFile`]. - /// - /// This is equivalent to: - /// ``` - /// # use async_zip::{ZipFile, ZipFileBuilder}; - /// # - /// # let builder = ZipFileBuilder::new(); - /// let file: ZipFile = builder.into(); - /// ``` - pub fn build(self) -> ZipFile { - self.into() - } -} diff --git a/crates/async_zip/src/file/mod.rs b/crates/async_zip/src/file/mod.rs deleted file mode 100644 index f503a8d..0000000 --- a/crates/async_zip/src/file/mod.rs +++ /dev/null @@ -1,38 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub(crate) mod builder; - -use crate::{entry::StoredZipEntry, string::ZipString}; -use builder::ZipFileBuilder; - -/// An immutable store of data about a ZIP file. -#[derive(Clone)] -pub struct ZipFile { - pub(crate) entries: Vec, - pub(crate) zip64: bool, - pub(crate) comment: ZipString, -} - -impl From for ZipFile { - fn from(builder: ZipFileBuilder) -> Self { - builder.0 - } -} - -impl ZipFile { - /// Returns a list of this ZIP file's entries. - pub fn entries(&self) -> &[StoredZipEntry] { - &self.entries - } - - /// Returns this ZIP file's trailing comment. - pub fn comment(&self) -> &ZipString { - &self.comment - } - - /// Returns whether or not this ZIP file is zip64 - pub fn zip64(&self) -> bool { - self.zip64 - } -} diff --git a/crates/async_zip/src/lib.rs b/crates/async_zip/src/lib.rs deleted file mode 100644 index 67e8fd5..0000000 --- a/crates/async_zip/src/lib.rs +++ /dev/null @@ -1,62 +0,0 @@ -// Copyright (c) 2021-2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -// Document all features on docs.rs -#![cfg_attr(docsrs, feature(doc_cfg))] - -//! An asynchronous ZIP archive reading/writing crate. -//! -//! ## Features -//! - A base implementation atop `futures`'s IO traits. -//! - An extended implementation atop `tokio`'s IO traits. -//! - Support for Stored, Deflate, bzip2, LZMA, zstd, and xz compression methods. -//! - Various different reading approaches (seek, stream, filesystem, in-memory buffer). -//! - Support for writing complete data (u8 slices) or stream writing using data descriptors. -//! - Initial support for ZIP64 reading and writing. -//! - Aims for reasonable [specification](https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md) compliance. -//! -//! ## Installation -//! -//! ```toml -//! [dependencies] -//! async_zip = { version = "0.0.17", features = ["full"] } -//! ``` -//! -//! ### Feature Flags -//! - `full` - Enables all below features. -//! - `full-wasm` - Enables all below features that are compatible with WASM. -//! - `chrono` - Enables support for parsing dates via `chrono`. -//! - `tokio` - Enables support for the `tokio` implementation module. -//! - `tokio-fs` - Enables support for the `tokio::fs` reading module. -//! - `deflate` - Enables support for the Deflate compression method. -//! - `bzip2` - Enables support for the bzip2 compression method. -//! - `lzma` - Enables support for the LZMA compression method. -//! - `zstd` - Enables support for the zstd compression method. -//! - `xz` - Enables support for the xz compression method. -//! -//! [Read more.](https://github.com/Majored/rs-async-zip) - -pub mod base; -pub mod error; - -#[cfg(feature = "tokio")] -pub mod tokio; - -pub(crate) mod date; -pub(crate) mod entry; -pub(crate) mod file; -pub(crate) mod spec; -pub(crate) mod string; -pub(crate) mod utils; - -#[cfg(test)] -pub(crate) mod tests; - -pub use crate::spec::attribute::AttributeCompatibility; -pub use crate::spec::compression::{Compression, DeflateOption}; - -pub use crate::date::{builder::ZipDateTimeBuilder, ZipDateTime}; -pub use crate::entry::{builder::ZipEntryBuilder, StoredZipEntry, ZipEntry}; -pub use crate::file::{builder::ZipFileBuilder, ZipFile}; - -pub use crate::string::{StringEncoding, ZipString}; diff --git a/crates/async_zip/src/spec/attribute.rs b/crates/async_zip/src/spec/attribute.rs deleted file mode 100644 index 0764a88..0000000 --- a/crates/async_zip/src/spec/attribute.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::error::{Result, ZipError}; - -/// An attribute host compatibility supported by this crate. -#[non_exhaustive] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum AttributeCompatibility { - Unix, -} - -impl TryFrom for AttributeCompatibility { - type Error = ZipError; - - // Convert a u16 stored with little endianness into a supported attribute host compatibility. - // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4422 - fn try_from(value: u16) -> Result { - match value { - 3 => Ok(AttributeCompatibility::Unix), - _ => Err(ZipError::AttributeCompatibilityNotSupported(value)), - } - } -} - -impl From<&AttributeCompatibility> for u16 { - // Convert a supported attribute host compatibility into its relevant u16 stored with little endianness. - // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4422 - fn from(compatibility: &AttributeCompatibility) -> Self { - match compatibility { - AttributeCompatibility::Unix => 3, - } - } -} - -impl From for u16 { - // Convert a supported attribute host compatibility into its relevant u16 stored with little endianness. - fn from(compatibility: AttributeCompatibility) -> Self { - (&compatibility).into() - } -} diff --git a/crates/async_zip/src/spec/compression.rs b/crates/async_zip/src/spec/compression.rs deleted file mode 100644 index 01d56a7..0000000 --- a/crates/async_zip/src/spec/compression.rs +++ /dev/null @@ -1,111 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::error::{Result, ZipError}; - -#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] -use async_compression::Level; - -/// A compression method supported by this crate. -#[non_exhaustive] -#[derive(Debug, Clone, Copy, PartialEq, Eq)] -pub enum Compression { - Stored, - #[cfg(feature = "deflate")] - Deflate, - #[cfg(feature = "deflate64")] - Deflate64, - #[cfg(feature = "bzip2")] - Bz, - #[cfg(feature = "lzma")] - Lzma, - #[cfg(feature = "zstd")] - Zstd, - #[cfg(feature = "xz")] - Xz, -} - -impl TryFrom for Compression { - type Error = ZipError; - - // Convert a u16 stored with little endianness into a supported compression method. - // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#445 - fn try_from(value: u16) -> Result { - match value { - 0 => Ok(Compression::Stored), - #[cfg(feature = "deflate")] - 8 => Ok(Compression::Deflate), - #[cfg(feature = "deflate64")] - 9 => Ok(Compression::Deflate64), - #[cfg(feature = "bzip2")] - 12 => Ok(Compression::Bz), - #[cfg(feature = "lzma")] - 14 => Ok(Compression::Lzma), - #[cfg(feature = "zstd")] - 93 => Ok(Compression::Zstd), - #[cfg(feature = "xz")] - 95 => Ok(Compression::Xz), - _ => Err(ZipError::CompressionNotSupported(value)), - } - } -} - -impl From<&Compression> for u16 { - // Convert a supported compression method into its relevant u16 stored with little endianness. - // https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#445 - fn from(compression: &Compression) -> u16 { - match compression { - Compression::Stored => 0, - #[cfg(feature = "deflate")] - Compression::Deflate => 8, - #[cfg(feature = "deflate64")] - Compression::Deflate64 => 9, - #[cfg(feature = "bzip2")] - Compression::Bz => 12, - #[cfg(feature = "lzma")] - Compression::Lzma => 14, - #[cfg(feature = "zstd")] - Compression::Zstd => 93, - #[cfg(feature = "xz")] - Compression::Xz => 95, - } - } -} - -impl From for u16 { - fn from(compression: Compression) -> u16 { - (&compression).into() - } -} - -/// Level of compression data should be compressed with for deflate. -#[derive(Debug, Clone, Copy)] -pub enum DeflateOption { - // Normal (-en) compression option was used. - Normal, - - // Maximum (-exx/-ex) compression option was used. - Maximum, - - // Fast (-ef) compression option was used. - Fast, - - // Super Fast (-es) compression option was used. - Super, - - /// Other implementation defined level. - Other(i32), -} - -#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))] -impl DeflateOption { - pub(crate) fn into_level(self) -> Level { - // FIXME: There's no clear documentation on what these specific levels defined in the ZIP specification relate - // to. We want to be compatible with any other library, and not specific to `async_compression`'s levels. - if let Self::Other(l) = self { - Level::Precise(l) - } else { - Level::Default - } - } -} diff --git a/crates/async_zip/src/spec/consts.rs b/crates/async_zip/src/spec/consts.rs deleted file mode 100644 index 5500a24..0000000 --- a/crates/async_zip/src/spec/consts.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub const SIGNATURE_LENGTH: usize = 4; - -// Local file header constants -// -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#437 -pub const LFH_SIGNATURE: u32 = 0x4034b50; -#[allow(dead_code)] -pub const LFH_LENGTH: usize = 26; - -// Central directory header constants -// -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4312 -pub const CDH_SIGNATURE: u32 = 0x2014b50; -#[allow(dead_code)] -pub const CDH_LENGTH: usize = 42; - -// End of central directory record constants -// -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4316 -pub const EOCDR_SIGNATURE: u32 = 0x6054b50; -/// The minimum length of the EOCDR, excluding the signature. -pub const EOCDR_LENGTH: usize = 18; - -/// The signature for the zip64 end of central directory record. -/// Ref: https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4314 -pub const ZIP64_EOCDR_SIGNATURE: u32 = 0x06064b50; -/// The signature for the zip64 end of central directory locator. -/// Ref: https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4315 -pub const ZIP64_EOCDL_SIGNATURE: u32 = 0x07064b50; -/// The length of the ZIP64 EOCDL, including the signature. -/// The EOCDL has a fixed size, thankfully. -pub const ZIP64_EOCDL_LENGTH: u64 = 20; - -/// The contents of a header field when one must reference the zip64 version instead. -pub const NON_ZIP64_MAX_SIZE: u32 = 0xFFFFFFFF; -/// The maximum number of files or disks in a ZIP file before it requires ZIP64. -pub const NON_ZIP64_MAX_NUM_FILES: u16 = 0xFFFF; - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#439 -pub const DATA_DESCRIPTOR_SIGNATURE: u32 = 0x8074b50; -pub const DATA_DESCRIPTOR_LENGTH: usize = 12; diff --git a/crates/async_zip/src/spec/extra_field.rs b/crates/async_zip/src/spec/extra_field.rs deleted file mode 100644 index 7506e95..0000000 --- a/crates/async_zip/src/spec/extra_field.rs +++ /dev/null @@ -1,320 +0,0 @@ -// Copyright Cognite AS, 2023 - -use crate::error::{Result as ZipResult, ZipError}; -use crate::spec::header::{ - ExtraField, HeaderId, InfoZipUnicodeCommentExtraField, InfoZipUnicodePathExtraField, UnknownExtraField, - Zip64ExtendedInformationExtraField, -}; - -use super::consts::NON_ZIP64_MAX_SIZE; - -pub(crate) trait ExtraFieldAsBytes { - fn as_bytes(&self) -> Vec; - - fn count_bytes(&self) -> usize; -} - -impl ExtraFieldAsBytes for &[ExtraField] { - fn as_bytes(&self) -> Vec { - let mut buffer = Vec::new(); - for field in self.iter() { - buffer.append(&mut field.as_bytes()); - } - buffer - } - - fn count_bytes(&self) -> usize { - self.iter().map(|field| field.count_bytes()).sum() - } -} - -impl ExtraFieldAsBytes for ExtraField { - fn as_bytes(&self) -> Vec { - match self { - ExtraField::Zip64ExtendedInformation(field) => field.as_bytes(), - ExtraField::InfoZipUnicodeComment(field) => field.as_bytes(), - ExtraField::InfoZipUnicodePath(field) => field.as_bytes(), - ExtraField::Unknown(field) => field.as_bytes(), - } - } - - fn count_bytes(&self) -> usize { - match self { - ExtraField::Zip64ExtendedInformation(field) => field.count_bytes(), - ExtraField::InfoZipUnicodeComment(field) => field.count_bytes(), - ExtraField::InfoZipUnicodePath(field) => field.count_bytes(), - ExtraField::Unknown(field) => field.count_bytes(), - } - } -} - -impl ExtraFieldAsBytes for UnknownExtraField { - fn as_bytes(&self) -> Vec { - let mut bytes = Vec::new(); - let header_id: u16 = self.header_id.into(); - bytes.append(&mut header_id.to_le_bytes().to_vec()); - bytes.append(&mut self.data_size.to_le_bytes().to_vec()); - bytes.append(&mut self.content.clone()); - - bytes - } - - fn count_bytes(&self) -> usize { - 4 + self.content.len() - } -} - -impl ExtraFieldAsBytes for Zip64ExtendedInformationExtraField { - fn as_bytes(&self) -> Vec { - let mut bytes = Vec::new(); - let header_id: u16 = self.header_id.into(); - bytes.append(&mut header_id.to_le_bytes().to_vec()); - bytes.append(&mut (self.content_size() as u16).to_le_bytes().to_vec()); - if let Some(uncompressed_size) = &self.uncompressed_size { - bytes.append(&mut uncompressed_size.to_le_bytes().to_vec()); - } - if let Some(compressed_size) = &self.compressed_size { - bytes.append(&mut compressed_size.to_le_bytes().to_vec()); - } - if let Some(relative_header_offset) = &self.relative_header_offset { - bytes.append(&mut relative_header_offset.to_le_bytes().to_vec()); - } - if let Some(disk_start_number) = &self.disk_start_number { - bytes.append(&mut disk_start_number.to_le_bytes().to_vec()); - } - - bytes - } - - fn count_bytes(&self) -> usize { - 4 + self.content_size() - } -} - -impl ExtraFieldAsBytes for InfoZipUnicodeCommentExtraField { - fn as_bytes(&self) -> Vec { - let mut bytes = Vec::new(); - let header_id: u16 = HeaderId::INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD.into(); - bytes.append(&mut header_id.to_le_bytes().to_vec()); - match self { - InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } => { - let data_size: u16 = (5 + unicode.len()).try_into().unwrap(); - bytes.append(&mut data_size.to_le_bytes().to_vec()); - bytes.push(1); - bytes.append(&mut crc32.to_le_bytes().to_vec()); - bytes.append(&mut unicode.clone()); - } - InfoZipUnicodeCommentExtraField::Unknown { version, data } => { - let data_size: u16 = (1 + data.len()).try_into().unwrap(); - bytes.append(&mut data_size.to_le_bytes().to_vec()); - bytes.push(*version); - bytes.append(&mut data.clone()); - } - } - bytes - } - - fn count_bytes(&self) -> usize { - match self { - InfoZipUnicodeCommentExtraField::V1 { unicode, .. } => 9 + unicode.len(), - InfoZipUnicodeCommentExtraField::Unknown { data, .. } => 5 + data.len(), - } - } -} - -impl ExtraFieldAsBytes for InfoZipUnicodePathExtraField { - fn as_bytes(&self) -> Vec { - let mut bytes = Vec::new(); - let header_id: u16 = HeaderId::INFO_ZIP_UNICODE_PATH_EXTRA_FIELD.into(); - bytes.append(&mut header_id.to_le_bytes().to_vec()); - match self { - InfoZipUnicodePathExtraField::V1 { crc32, unicode } => { - let data_size: u16 = (5 + unicode.len()).try_into().unwrap(); - bytes.append(&mut data_size.to_le_bytes().to_vec()); - bytes.push(1); - bytes.append(&mut crc32.to_le_bytes().to_vec()); - bytes.append(&mut unicode.clone()); - } - InfoZipUnicodePathExtraField::Unknown { version, data } => { - let data_size: u16 = (1 + data.len()).try_into().unwrap(); - bytes.append(&mut data_size.to_le_bytes().to_vec()); - bytes.push(*version); - bytes.append(&mut data.clone()); - } - } - bytes - } - - fn count_bytes(&self) -> usize { - match self { - InfoZipUnicodePathExtraField::V1 { unicode, .. } => 9 + unicode.len(), - InfoZipUnicodePathExtraField::Unknown { data, .. } => 5 + data.len(), - } - } -} - -/// Parse a zip64 extra field from bytes. -/// The content of "data" should exclude the header. -fn zip64_extended_information_field_from_bytes( - header_id: HeaderId, - data: &[u8], - uncompressed_size: u32, - compressed_size: u32, -) -> ZipResult { - // slice.take is nightly-only so we'll just use an index to track the current position - let mut current_idx = 0; - let uncompressed_size = if uncompressed_size == NON_ZIP64_MAX_SIZE && data.len() >= current_idx + 8 { - let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap())); - current_idx += 8; - val - } else { - None - }; - - let compressed_size = if compressed_size == NON_ZIP64_MAX_SIZE && data.len() >= current_idx + 8 { - let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap())); - current_idx += 8; - val - } else { - None - }; - - let relative_header_offset = if data.len() >= current_idx + 8 { - let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap())); - current_idx += 8; - val - } else { - None - }; - - #[allow(unused_assignments)] - let disk_start_number = if data.len() >= current_idx + 4 { - let val = Some(u32::from_le_bytes(data[current_idx..current_idx + 4].try_into().unwrap())); - current_idx += 4; - val - } else { - None - }; - - Ok(Zip64ExtendedInformationExtraField { - header_id, - uncompressed_size, - compressed_size, - relative_header_offset, - disk_start_number, - }) -} - -fn info_zip_unicode_comment_extra_field_from_bytes( - _header_id: HeaderId, - data_size: u16, - data: &[u8], -) -> ZipResult { - if data.is_empty() { - return Err(ZipError::InfoZipUnicodeCommentFieldIncomplete); - } - let version = data[0]; - match version { - 1 => { - if data.len() < 5 { - return Err(ZipError::InfoZipUnicodeCommentFieldIncomplete); - } - let crc32 = u32::from_le_bytes(data[1..5].try_into().unwrap()); - let unicode = data[5..(data_size as usize)].to_vec(); - Ok(InfoZipUnicodeCommentExtraField::V1 { crc32, unicode }) - } - _ => Ok(InfoZipUnicodeCommentExtraField::Unknown { version, data: data[1..(data_size as usize)].to_vec() }), - } -} - -fn info_zip_unicode_path_extra_field_from_bytes( - _header_id: HeaderId, - data_size: u16, - data: &[u8], -) -> ZipResult { - if data.is_empty() { - return Err(ZipError::InfoZipUnicodePathFieldIncomplete); - } - let version = data[0]; - match version { - 1 => { - if data.len() < 5 { - return Err(ZipError::InfoZipUnicodePathFieldIncomplete); - } - let crc32 = u32::from_le_bytes(data[1..5].try_into().unwrap()); - let unicode = data[5..(data_size as usize)].to_vec(); - Ok(InfoZipUnicodePathExtraField::V1 { crc32, unicode }) - } - _ => Ok(InfoZipUnicodePathExtraField::Unknown { version, data: data[1..(data_size as usize)].to_vec() }), - } -} - -pub(crate) fn extra_field_from_bytes( - header_id: HeaderId, - data_size: u16, - data: &[u8], - uncompressed_size: u32, - compressed_size: u32, -) -> ZipResult { - match header_id { - HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD => Ok(ExtraField::Zip64ExtendedInformation( - zip64_extended_information_field_from_bytes(header_id, data, uncompressed_size, compressed_size)?, - )), - HeaderId::INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD => Ok(ExtraField::InfoZipUnicodeComment( - info_zip_unicode_comment_extra_field_from_bytes(header_id, data_size, data)?, - )), - HeaderId::INFO_ZIP_UNICODE_PATH_EXTRA_FIELD => Ok(ExtraField::InfoZipUnicodePath( - info_zip_unicode_path_extra_field_from_bytes(header_id, data_size, data)?, - )), - _ => Ok(ExtraField::Unknown(UnknownExtraField { header_id, data_size, content: data.to_vec() })), - } -} - -pub struct Zip64ExtendedInformationExtraFieldBuilder { - field: Zip64ExtendedInformationExtraField, -} - -impl Zip64ExtendedInformationExtraFieldBuilder { - pub fn new() -> Self { - Self { - field: Zip64ExtendedInformationExtraField { - header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD, - uncompressed_size: None, - compressed_size: None, - relative_header_offset: None, - disk_start_number: None, - }, - } - } - - pub fn sizes(mut self, compressed_size: u64, uncompressed_size: u64) -> Self { - self.field.compressed_size = Some(compressed_size); - self.field.uncompressed_size = Some(uncompressed_size); - self - } - - pub fn relative_header_offset(mut self, relative_header_offset: u64) -> Self { - self.field.relative_header_offset = Some(relative_header_offset); - self - } - - #[allow(dead_code)] - pub fn disk_start_number(mut self, disk_start_number: u32) -> Self { - self.field.disk_start_number = Some(disk_start_number); - self - } - - pub fn eof_only(&self) -> bool { - (self.field.uncompressed_size.is_none() && self.field.compressed_size.is_none()) - && (self.field.relative_header_offset.is_some() || self.field.disk_start_number.is_some()) - } - - pub fn build(self) -> ZipResult { - let field = self.field; - - if field.content_size() == 0 { - return Err(ZipError::Zip64ExtendedFieldIncomplete); - } - Ok(field) - } -} diff --git a/crates/async_zip/src/spec/header.rs b/crates/async_zip/src/spec/header.rs deleted file mode 100644 index f7c4392..0000000 --- a/crates/async_zip/src/spec/header.rs +++ /dev/null @@ -1,161 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#437 -pub struct LocalFileHeader { - pub version: u16, - pub flags: GeneralPurposeFlag, - pub compression: u16, - pub mod_time: u16, - pub mod_date: u16, - pub crc: u32, - pub compressed_size: u32, - pub uncompressed_size: u32, - pub file_name_length: u16, - pub extra_field_length: u16, -} - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#444 -#[derive(Copy, Clone)] -pub struct GeneralPurposeFlag { - pub encrypted: bool, - pub data_descriptor: bool, - pub filename_unicode: bool, -} - -/// 2 byte header ids -/// Ref https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#452 -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -pub struct HeaderId(pub u16); - -impl HeaderId { - pub const ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD: HeaderId = HeaderId(0x0001); - pub const INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD: HeaderId = HeaderId(0x6375); - pub const INFO_ZIP_UNICODE_PATH_EXTRA_FIELD: HeaderId = HeaderId(0x7075); -} - -impl From for HeaderId { - fn from(value: u16) -> Self { - HeaderId(value) - } -} - -impl From for u16 { - fn from(value: HeaderId) -> Self { - value.0 - } -} - -/// Represents each extra field. -/// Not strictly part of the spec, but is the most useful way to represent the data. -#[derive(Clone, Debug)] -#[non_exhaustive] -pub enum ExtraField { - Zip64ExtendedInformation(Zip64ExtendedInformationExtraField), - InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField), - InfoZipUnicodePath(InfoZipUnicodePathExtraField), - Unknown(UnknownExtraField), -} - -/// An extended information header for Zip64. -/// This field is used both for local file headers and central directory records. -/// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#453 -#[derive(Clone, Debug)] -pub struct Zip64ExtendedInformationExtraField { - pub header_id: HeaderId, - pub uncompressed_size: Option, - pub compressed_size: Option, - // While not specified in the spec, these two fields are often left out in practice. - pub relative_header_offset: Option, - pub disk_start_number: Option, -} - -impl Zip64ExtendedInformationExtraField { - pub(crate) fn content_size(&self) -> usize { - self.uncompressed_size.map(|_| 8).unwrap_or_default() - + self.compressed_size.map(|_| 8).unwrap_or_default() - + self.relative_header_offset.map(|_| 8).unwrap_or_default() - + self.disk_start_number.map(|_| 8).unwrap_or_default() - } -} - -/// Stores the UTF-8 version of the file comment as stored in the central directory header. -/// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#468 -#[derive(Clone, Debug)] -pub enum InfoZipUnicodeCommentExtraField { - V1 { crc32: u32, unicode: Vec }, - Unknown { version: u8, data: Vec }, -} - -/// Stores the UTF-8 version of the file name field as stored in the local header and central directory header. -/// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#469 -#[derive(Clone, Debug)] -pub enum InfoZipUnicodePathExtraField { - V1 { crc32: u32, unicode: Vec }, - Unknown { version: u8, data: Vec }, -} - -/// Represents any unparsed extra field. -#[derive(Clone, Debug)] -pub struct UnknownExtraField { - pub header_id: HeaderId, - pub data_size: u16, - pub content: Vec, -} - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4312 -pub struct CentralDirectoryRecord { - pub v_made_by: u16, - pub v_needed: u16, - pub flags: GeneralPurposeFlag, - pub compression: u16, - pub mod_time: u16, - pub mod_date: u16, - pub crc: u32, - pub compressed_size: u32, - pub uncompressed_size: u32, - pub file_name_length: u16, - pub extra_field_length: u16, - pub file_comment_length: u16, - pub disk_start: u16, - pub inter_attr: u16, - pub exter_attr: u32, - pub lh_offset: u32, -} - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4316 -#[derive(Debug)] -pub struct EndOfCentralDirectoryHeader { - pub(crate) disk_num: u16, - pub(crate) start_cent_dir_disk: u16, - pub(crate) num_of_entries_disk: u16, - pub(crate) num_of_entries: u16, - pub(crate) size_cent_dir: u32, - pub(crate) cent_dir_offset: u32, - pub(crate) file_comm_length: u16, -} - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4314 -#[derive(Debug, PartialEq)] -pub struct Zip64EndOfCentralDirectoryRecord { - /// The size of this Zip64EndOfCentralDirectoryRecord. - /// This is specified because there is a variable-length extra zip64 information sector. - /// However, we will gleefully ignore this sector because it is reserved for use by PKWare. - pub size_of_zip64_end_of_cd_record: u64, - pub version_made_by: u16, - pub version_needed_to_extract: u16, - pub disk_number: u32, - pub disk_number_start_of_cd: u32, - pub num_entries_in_directory_on_disk: u64, - pub num_entries_in_directory: u64, - pub directory_size: u64, - pub offset_of_start_of_directory: u64, -} - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4315 -#[derive(Debug, PartialEq)] -pub struct Zip64EndOfCentralDirectoryLocator { - pub number_of_disk_with_start_of_zip64_end_of_central_directory: u32, - pub relative_offset: u64, - pub total_number_of_disks: u32, -} diff --git a/crates/async_zip/src/spec/mod.rs b/crates/async_zip/src/spec/mod.rs deleted file mode 100644 index 1a91ef9..0000000 --- a/crates/async_zip/src/spec/mod.rs +++ /dev/null @@ -1,12 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub(crate) mod attribute; -pub(crate) mod compression; -pub(crate) mod consts; -pub(crate) mod extra_field; -pub(crate) mod header; -pub(crate) mod parse; -pub(crate) mod version; - -pub use compression::Compression; diff --git a/crates/async_zip/src/spec/parse.rs b/crates/async_zip/src/spec/parse.rs deleted file mode 100644 index 422d468..0000000 --- a/crates/async_zip/src/spec/parse.rs +++ /dev/null @@ -1,345 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::error::{Result, ZipError}; -use crate::spec::header::{ - CentralDirectoryRecord, EndOfCentralDirectoryHeader, ExtraField, GeneralPurposeFlag, HeaderId, LocalFileHeader, - Zip64EndOfCentralDirectoryLocator, Zip64EndOfCentralDirectoryRecord, -}; - -use futures_lite::io::{AsyncRead, AsyncReadExt}; - -impl LocalFileHeader { - pub fn as_slice(&self) -> [u8; 26] { - let mut array = [0; 26]; - let mut cursor = 0; - - array_push!(array, cursor, self.version.to_le_bytes()); - array_push!(array, cursor, self.flags.as_slice()); - array_push!(array, cursor, self.compression.to_le_bytes()); - array_push!(array, cursor, self.mod_time.to_le_bytes()); - array_push!(array, cursor, self.mod_date.to_le_bytes()); - array_push!(array, cursor, self.crc.to_le_bytes()); - array_push!(array, cursor, self.compressed_size.to_le_bytes()); - array_push!(array, cursor, self.uncompressed_size.to_le_bytes()); - array_push!(array, cursor, self.file_name_length.to_le_bytes()); - array_push!(array, cursor, self.extra_field_length.to_le_bytes()); - - array - } -} - -impl GeneralPurposeFlag { - pub fn as_slice(&self) -> [u8; 2] { - let encrypted: u16 = match self.encrypted { - false => 0x0, - true => 0b1, - }; - let data_descriptor: u16 = match self.data_descriptor { - false => 0x0, - true => 0x8, - }; - let filename_unicode: u16 = match self.filename_unicode { - false => 0x0, - true => 0x800, - }; - - (encrypted | data_descriptor | filename_unicode).to_le_bytes() - } -} - -impl CentralDirectoryRecord { - pub fn as_slice(&self) -> [u8; 42] { - let mut array = [0; 42]; - let mut cursor = 0; - - array_push!(array, cursor, self.v_made_by.to_le_bytes()); - array_push!(array, cursor, self.v_needed.to_le_bytes()); - array_push!(array, cursor, self.flags.as_slice()); - array_push!(array, cursor, self.compression.to_le_bytes()); - array_push!(array, cursor, self.mod_time.to_le_bytes()); - array_push!(array, cursor, self.mod_date.to_le_bytes()); - array_push!(array, cursor, self.crc.to_le_bytes()); - array_push!(array, cursor, self.compressed_size.to_le_bytes()); - array_push!(array, cursor, self.uncompressed_size.to_le_bytes()); - array_push!(array, cursor, self.file_name_length.to_le_bytes()); - array_push!(array, cursor, self.extra_field_length.to_le_bytes()); - array_push!(array, cursor, self.file_comment_length.to_le_bytes()); - array_push!(array, cursor, self.disk_start.to_le_bytes()); - array_push!(array, cursor, self.inter_attr.to_le_bytes()); - array_push!(array, cursor, self.exter_attr.to_le_bytes()); - array_push!(array, cursor, self.lh_offset.to_le_bytes()); - - array - } -} - -impl EndOfCentralDirectoryHeader { - pub fn as_slice(&self) -> [u8; 18] { - let mut array = [0; 18]; - let mut cursor = 0; - - array_push!(array, cursor, self.disk_num.to_le_bytes()); - array_push!(array, cursor, self.start_cent_dir_disk.to_le_bytes()); - array_push!(array, cursor, self.num_of_entries_disk.to_le_bytes()); - array_push!(array, cursor, self.num_of_entries.to_le_bytes()); - array_push!(array, cursor, self.size_cent_dir.to_le_bytes()); - array_push!(array, cursor, self.cent_dir_offset.to_le_bytes()); - array_push!(array, cursor, self.file_comm_length.to_le_bytes()); - - array - } -} - -impl From<[u8; 26]> for LocalFileHeader { - fn from(value: [u8; 26]) -> LocalFileHeader { - LocalFileHeader { - version: u16::from_le_bytes(value[0..2].try_into().unwrap()), - flags: GeneralPurposeFlag::from(u16::from_le_bytes(value[2..4].try_into().unwrap())), - compression: u16::from_le_bytes(value[4..6].try_into().unwrap()), - mod_time: u16::from_le_bytes(value[6..8].try_into().unwrap()), - mod_date: u16::from_le_bytes(value[8..10].try_into().unwrap()), - crc: u32::from_le_bytes(value[10..14].try_into().unwrap()), - compressed_size: u32::from_le_bytes(value[14..18].try_into().unwrap()), - uncompressed_size: u32::from_le_bytes(value[18..22].try_into().unwrap()), - file_name_length: u16::from_le_bytes(value[22..24].try_into().unwrap()), - extra_field_length: u16::from_le_bytes(value[24..26].try_into().unwrap()), - } - } -} - -impl From for GeneralPurposeFlag { - fn from(value: u16) -> GeneralPurposeFlag { - let encrypted = !matches!(value & 0x1, 0); - let data_descriptor = !matches!((value & 0x8) >> 3, 0); - let filename_unicode = !matches!((value & 0x800) >> 11, 0); - - GeneralPurposeFlag { encrypted, data_descriptor, filename_unicode } - } -} - -impl From<[u8; 42]> for CentralDirectoryRecord { - fn from(value: [u8; 42]) -> CentralDirectoryRecord { - CentralDirectoryRecord { - v_made_by: u16::from_le_bytes(value[0..2].try_into().unwrap()), - v_needed: u16::from_le_bytes(value[2..4].try_into().unwrap()), - flags: GeneralPurposeFlag::from(u16::from_le_bytes(value[4..6].try_into().unwrap())), - compression: u16::from_le_bytes(value[6..8].try_into().unwrap()), - mod_time: u16::from_le_bytes(value[8..10].try_into().unwrap()), - mod_date: u16::from_le_bytes(value[10..12].try_into().unwrap()), - crc: u32::from_le_bytes(value[12..16].try_into().unwrap()), - compressed_size: u32::from_le_bytes(value[16..20].try_into().unwrap()), - uncompressed_size: u32::from_le_bytes(value[20..24].try_into().unwrap()), - file_name_length: u16::from_le_bytes(value[24..26].try_into().unwrap()), - extra_field_length: u16::from_le_bytes(value[26..28].try_into().unwrap()), - file_comment_length: u16::from_le_bytes(value[28..30].try_into().unwrap()), - disk_start: u16::from_le_bytes(value[30..32].try_into().unwrap()), - inter_attr: u16::from_le_bytes(value[32..34].try_into().unwrap()), - exter_attr: u32::from_le_bytes(value[34..38].try_into().unwrap()), - lh_offset: u32::from_le_bytes(value[38..42].try_into().unwrap()), - } - } -} - -impl From<[u8; 18]> for EndOfCentralDirectoryHeader { - fn from(value: [u8; 18]) -> EndOfCentralDirectoryHeader { - EndOfCentralDirectoryHeader { - disk_num: u16::from_le_bytes(value[0..2].try_into().unwrap()), - start_cent_dir_disk: u16::from_le_bytes(value[2..4].try_into().unwrap()), - num_of_entries_disk: u16::from_le_bytes(value[4..6].try_into().unwrap()), - num_of_entries: u16::from_le_bytes(value[6..8].try_into().unwrap()), - size_cent_dir: u32::from_le_bytes(value[8..12].try_into().unwrap()), - cent_dir_offset: u32::from_le_bytes(value[12..16].try_into().unwrap()), - file_comm_length: u16::from_le_bytes(value[16..18].try_into().unwrap()), - } - } -} - -impl From<[u8; 52]> for Zip64EndOfCentralDirectoryRecord { - fn from(value: [u8; 52]) -> Self { - Self { - size_of_zip64_end_of_cd_record: u64::from_le_bytes(value[0..8].try_into().unwrap()), - version_made_by: u16::from_le_bytes(value[8..10].try_into().unwrap()), - version_needed_to_extract: u16::from_le_bytes(value[10..12].try_into().unwrap()), - disk_number: u32::from_le_bytes(value[12..16].try_into().unwrap()), - disk_number_start_of_cd: u32::from_le_bytes(value[16..20].try_into().unwrap()), - num_entries_in_directory_on_disk: u64::from_le_bytes(value[20..28].try_into().unwrap()), - num_entries_in_directory: u64::from_le_bytes(value[28..36].try_into().unwrap()), - directory_size: u64::from_le_bytes(value[36..44].try_into().unwrap()), - offset_of_start_of_directory: u64::from_le_bytes(value[44..52].try_into().unwrap()), - } - } -} - -impl From<[u8; 16]> for Zip64EndOfCentralDirectoryLocator { - fn from(value: [u8; 16]) -> Self { - Self { - number_of_disk_with_start_of_zip64_end_of_central_directory: u32::from_le_bytes( - value[0..4].try_into().unwrap(), - ), - relative_offset: u64::from_le_bytes(value[4..12].try_into().unwrap()), - total_number_of_disks: u32::from_le_bytes(value[12..16].try_into().unwrap()), - } - } -} - -impl LocalFileHeader { - pub async fn from_reader(reader: &mut R) -> Result { - let mut buffer: [u8; 26] = [0; 26]; - reader.read_exact(&mut buffer).await?; - Ok(LocalFileHeader::from(buffer)) - } -} - -impl EndOfCentralDirectoryHeader { - pub async fn from_reader(reader: &mut R) -> Result { - let mut buffer: [u8; 18] = [0; 18]; - reader.read_exact(&mut buffer).await?; - Ok(EndOfCentralDirectoryHeader::from(buffer)) - } -} - -impl CentralDirectoryRecord { - pub async fn from_reader(reader: &mut R) -> Result { - let mut buffer: [u8; 42] = [0; 42]; - reader.read_exact(&mut buffer).await?; - Ok(CentralDirectoryRecord::from(buffer)) - } -} - -impl Zip64EndOfCentralDirectoryRecord { - pub async fn from_reader(reader: &mut R) -> Result { - let mut buffer: [u8; 52] = [0; 52]; - reader.read_exact(&mut buffer).await?; - Ok(Self::from(buffer)) - } - - pub fn as_bytes(&self) -> [u8; 52] { - let mut array = [0; 52]; - let mut cursor = 0; - - array_push!(array, cursor, self.size_of_zip64_end_of_cd_record.to_le_bytes()); - array_push!(array, cursor, self.version_made_by.to_le_bytes()); - array_push!(array, cursor, self.version_needed_to_extract.to_le_bytes()); - array_push!(array, cursor, self.disk_number.to_le_bytes()); - array_push!(array, cursor, self.disk_number_start_of_cd.to_le_bytes()); - array_push!(array, cursor, self.num_entries_in_directory_on_disk.to_le_bytes()); - array_push!(array, cursor, self.num_entries_in_directory.to_le_bytes()); - array_push!(array, cursor, self.directory_size.to_le_bytes()); - array_push!(array, cursor, self.offset_of_start_of_directory.to_le_bytes()); - - array - } -} - -impl Zip64EndOfCentralDirectoryLocator { - /// Read 4 bytes from the reader and check whether its signature matches that of the EOCDL. - /// If it does, return Some(EOCDL), otherwise return None. - pub async fn try_from_reader( - reader: &mut R, - ) -> Result> { - let signature = { - let mut buffer = [0; 4]; - reader.read_exact(&mut buffer).await?; - u32::from_le_bytes(buffer) - }; - if signature != ZIP64_EOCDL_SIGNATURE { - return Ok(None); - } - let mut buffer: [u8; 16] = [0; 16]; - reader.read_exact(&mut buffer).await?; - Ok(Some(Self::from(buffer))) - } - - pub fn as_bytes(&self) -> [u8; 16] { - let mut array = [0; 16]; - let mut cursor = 0; - - array_push!(array, cursor, self.number_of_disk_with_start_of_zip64_end_of_central_directory.to_le_bytes()); - array_push!(array, cursor, self.relative_offset.to_le_bytes()); - array_push!(array, cursor, self.total_number_of_disks.to_le_bytes()); - - array - } -} - -/// Parse the extra fields. -pub fn parse_extra_fields(data: Vec, uncompressed_size: u32, compressed_size: u32) -> Result> { - let mut cursor = 0; - let mut extra_fields = Vec::new(); - while cursor + 4 < data.len() { - let header_id: HeaderId = u16::from_le_bytes(data[cursor..cursor + 2].try_into().unwrap()).into(); - let field_size = u16::from_le_bytes(data[cursor + 2..cursor + 4].try_into().unwrap()); - if cursor + 4 + field_size as usize > data.len() { - return Err(ZipError::InvalidExtraFieldHeader(field_size, data.len() - cursor - 8 - field_size as usize)); - } - let data = &data[cursor + 4..cursor + 4 + field_size as usize]; - extra_fields.push(extra_field_from_bytes(header_id, field_size, data, uncompressed_size, compressed_size)?); - cursor += 4 + field_size as usize; - } - Ok(extra_fields) -} - -/// Replace elements of an array at a given cursor index for use with a zero-initialised array. -macro_rules! array_push { - ($arr:ident, $cursor:ident, $value:expr) => {{ - for entry in $value { - $arr[$cursor] = entry; - $cursor += 1; - } - }}; -} - -use crate::spec::consts::ZIP64_EOCDL_SIGNATURE; -use crate::spec::extra_field::extra_field_from_bytes; -pub(crate) use array_push; - -#[cfg(test)] -mod tests { - use super::*; - - #[test] - fn test_parse_zip64_eocdr() { - let eocdr: [u8; 56] = [ - 0x50, 0x4B, 0x06, 0x06, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x03, 0x2D, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, - ]; - - let without_signature: [u8; 52] = eocdr[4..56].try_into().unwrap(); - let zip64eocdr = Zip64EndOfCentralDirectoryRecord::from(without_signature); - assert_eq!( - zip64eocdr, - Zip64EndOfCentralDirectoryRecord { - size_of_zip64_end_of_cd_record: 44, - version_made_by: 798, - version_needed_to_extract: 45, - disk_number: 0, - disk_number_start_of_cd: 0, - num_entries_in_directory_on_disk: 1, - num_entries_in_directory: 1, - directory_size: 47, - offset_of_start_of_directory: 64, - } - ) - } - - #[tokio::test] - async fn test_parse_zip64_eocdl() { - let eocdl: [u8; 20] = [ - 0x50, 0x4B, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, - 0x00, 0x00, - ]; - let mut cursor = futures_lite::io::Cursor::new(eocdl); - let zip64eocdl = Zip64EndOfCentralDirectoryLocator::try_from_reader(&mut cursor).await.unwrap().unwrap(); - assert_eq!( - zip64eocdl, - Zip64EndOfCentralDirectoryLocator { - number_of_disk_with_start_of_zip64_end_of_central_directory: 0, - relative_offset: 111, - total_number_of_disks: 1, - } - ) - } -} diff --git a/crates/async_zip/src/spec/version.rs b/crates/async_zip/src/spec/version.rs deleted file mode 100644 index cf0e69d..0000000 --- a/crates/async_zip/src/spec/version.rs +++ /dev/null @@ -1,42 +0,0 @@ -// Copyright (c) 2021 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::entry::ZipEntry; -#[cfg(any( - feature = "deflate", - feature = "bzip2", - feature = "zstd", - feature = "lzma", - feature = "xz", - feature = "deflate64" -))] -use crate::spec::Compression; - -pub(crate) const SPEC_VERSION_MADE_BY: u16 = 63; - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#443 -pub fn as_needed_to_extract(entry: &ZipEntry) -> u16 { - let mut version = match entry.compression() { - #[cfg(feature = "deflate")] - Compression::Deflate => 20, - #[cfg(feature = "deflate64")] - Compression::Deflate64 => 21, - #[cfg(feature = "bzip2")] - Compression::Bz => 46, - #[cfg(feature = "lzma")] - Compression::Lzma => 63, - _ => 10, - }; - - if let Ok(true) = entry.dir() { - version = std::cmp::max(version, 20); - } - - version -} - -// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#442 -pub fn as_made_by() -> u16 { - // Default to UNIX mapping for the moment. - 3 << 8 | SPEC_VERSION_MADE_BY -} diff --git a/crates/async_zip/src/string.rs b/crates/async_zip/src/string.rs deleted file mode 100644 index 4bab8ed..0000000 --- a/crates/async_zip/src/string.rs +++ /dev/null @@ -1,112 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::error::{Result, ZipError}; - -/// A string encoding supported by this crate. -#[derive(Debug, Clone, Copy)] -pub enum StringEncoding { - Utf8, - Raw, -} - -/// A string wrapper for handling different encodings. -#[derive(Debug, Clone)] -pub struct ZipString { - encoding: StringEncoding, - raw: Vec, - alternative: Option>, -} - -impl ZipString { - /// Constructs a new encoded string from its raw bytes and its encoding type. - /// - /// # Note - /// If the provided encoding is [`StringEncoding::Utf8`] but the raw bytes are not valid UTF-8 (ie. a call to - /// `std::str::from_utf8()` fails), the encoding is defaulted back to [`StringEncoding::Raw`]. - pub fn new(raw: Vec, mut encoding: StringEncoding) -> Self { - if let StringEncoding::Utf8 = encoding { - if std::str::from_utf8(&raw).is_err() { - encoding = StringEncoding::Raw; - } - } - - Self { encoding, raw, alternative: None } - } - - /// Constructs a new encoded string from utf-8 data, with an alternative in native MBCS encoding. - pub fn new_with_alternative(utf8: String, alternative: Vec) -> Self { - Self { encoding: StringEncoding::Utf8, raw: utf8.into_bytes(), alternative: Some(alternative) } - } - - /// Returns the raw bytes for this string. - pub fn as_bytes(&self) -> &[u8] { - &self.raw - } - - /// Returns the encoding type for this string. - pub fn encoding(&self) -> StringEncoding { - self.encoding - } - - /// Returns the alternative bytes (in native MBCS encoding) for this string. - pub fn alternative(&self) -> Option<&[u8]> { - self.alternative.as_deref() - } - - /// Returns the raw bytes converted into a string slice. - /// - /// # Note - /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`]. - pub fn as_str(&self) -> Result<&str> { - if !matches!(self.encoding, StringEncoding::Utf8) { - return Err(ZipError::StringNotUtf8); - } - - // SAFETY: - // "The bytes passed in must be valid UTF-8.' - // - // This function will error if self.encoding is not StringEncoding::Utf8. - // - // self.encoding is only ever StringEncoding::Utf8 if this variant was provided to the constructor AND the - // call to `std::str::from_utf8()` within the constructor succeeded. Mutable access to the inner vector is - // never given and no method implemented on this type mutates the inner vector. - - Ok(unsafe { std::str::from_utf8_unchecked(&self.raw) }) - } - - /// Returns the raw bytes converted to an owned string. - /// - /// # Note - /// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`]. - pub fn into_string(self) -> Result { - if !matches!(self.encoding, StringEncoding::Utf8) { - return Err(ZipError::StringNotUtf8); - } - - // SAFETY: See above. - Ok(unsafe { String::from_utf8_unchecked(self.raw) }) - } - - /// Returns the alternative bytes (in native MBCS encoding) converted to the owned. - pub fn into_alternative(self) -> Option> { - self.alternative - } - - /// Returns whether this string is encoded as utf-8 without an alternative. - pub fn is_utf8_without_alternative(&self) -> bool { - matches!(self.encoding, StringEncoding::Utf8) && self.alternative.is_none() - } -} - -impl From for ZipString { - fn from(value: String) -> Self { - Self { encoding: StringEncoding::Utf8, raw: value.into_bytes(), alternative: None } - } -} - -impl From<&str> for ZipString { - fn from(value: &str) -> Self { - Self { encoding: StringEncoding::Utf8, raw: value.as_bytes().to_vec(), alternative: None } - } -} diff --git a/crates/async_zip/src/tests/combined/mod.rs b/crates/async_zip/src/tests/combined/mod.rs deleted file mode 100644 index d17ab7d..0000000 --- a/crates/async_zip/src/tests/combined/mod.rs +++ /dev/null @@ -1,2 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) diff --git a/crates/async_zip/src/tests/mod.rs b/crates/async_zip/src/tests/mod.rs deleted file mode 100644 index 35ecf91..0000000 --- a/crates/async_zip/src/tests/mod.rs +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub(crate) mod combined; -pub(crate) mod read; -pub(crate) mod spec; -pub(crate) mod write; - -use std::sync::Once; -static ENV_LOGGER: Once = Once::new(); - -/// Initialize the env logger for any tests that require it. -/// Safe to call multiple times. -fn init_logger() { - ENV_LOGGER.call_once(|| env_logger::Builder::from_default_env().format_module_path(true).init()); -} diff --git a/crates/async_zip/src/tests/read/compression/bzip2.data b/crates/async_zip/src/tests/read/compression/bzip2.data deleted file mode 100644 index bff81d5..0000000 Binary files a/crates/async_zip/src/tests/read/compression/bzip2.data and /dev/null differ diff --git a/crates/async_zip/src/tests/read/compression/deflate.data b/crates/async_zip/src/tests/read/compression/deflate.data deleted file mode 100644 index 283d32b..0000000 Binary files a/crates/async_zip/src/tests/read/compression/deflate.data and /dev/null differ diff --git a/crates/async_zip/src/tests/read/compression/lzma.data b/crates/async_zip/src/tests/read/compression/lzma.data deleted file mode 100644 index 08f95f7..0000000 Binary files a/crates/async_zip/src/tests/read/compression/lzma.data and /dev/null differ diff --git a/crates/async_zip/src/tests/read/compression/mod.rs b/crates/async_zip/src/tests/read/compression/mod.rs deleted file mode 100644 index eadd8d6..0000000 --- a/crates/async_zip/src/tests/read/compression/mod.rs +++ /dev/null @@ -1,46 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::base::read::io::compressed::CompressedReader; -use crate::spec::Compression; - -compressed_test_helper!(stored_test, Compression::Stored, "foo bar", "foo bar"); - -#[cfg(feature = "deflate")] -compressed_test_helper!(deflate_test, Compression::Deflate, "foo bar", include_bytes!("deflate.data")); - -#[cfg(feature = "bzip2")] -compressed_test_helper!(bz_test, Compression::Bz, "foo bar", include_bytes!("bzip2.data")); - -#[cfg(feature = "lzma")] -compressed_test_helper!(lzma_test, Compression::Lzma, "foo bar", include_bytes!("lzma.data")); - -#[cfg(feature = "zstd")] -compressed_test_helper!(zstd_test, Compression::Zstd, "foo bar", include_bytes!("zstd.data")); - -#[cfg(feature = "xz")] -compressed_test_helper!(xz_test, Compression::Xz, "foo bar", include_bytes!("xz.data")); - -/// A helper macro for generating a CompressedReader test using a specific compression method. -macro_rules! compressed_test_helper { - ($name:ident, $typ:expr, $data_raw:expr, $data:expr) => { - #[cfg(test)] - #[tokio::test] - async fn $name() { - use futures_lite::io::{AsyncReadExt, Cursor}; - - let data = $data; - let data_raw = $data_raw; - - let cursor = Cursor::new(data); - let mut reader = CompressedReader::new(cursor, $typ); - - let mut read_data = String::new(); - reader.read_to_string(&mut read_data).await.expect("read into CompressedReader failed"); - - assert_eq!(read_data, data_raw); - } - }; -} - -use compressed_test_helper; diff --git a/crates/async_zip/src/tests/read/compression/xz.data b/crates/async_zip/src/tests/read/compression/xz.data deleted file mode 100644 index 058526b..0000000 Binary files a/crates/async_zip/src/tests/read/compression/xz.data and /dev/null differ diff --git a/crates/async_zip/src/tests/read/compression/zstd.data b/crates/async_zip/src/tests/read/compression/zstd.data deleted file mode 100644 index beaa09f..0000000 Binary files a/crates/async_zip/src/tests/read/compression/zstd.data and /dev/null differ diff --git a/crates/async_zip/src/tests/read/locator/empty-buffer-boundary.zip b/crates/async_zip/src/tests/read/locator/empty-buffer-boundary.zip deleted file mode 100644 index b6b7174..0000000 Binary files a/crates/async_zip/src/tests/read/locator/empty-buffer-boundary.zip and /dev/null differ diff --git a/crates/async_zip/src/tests/read/locator/empty-with-max-comment.zip b/crates/async_zip/src/tests/read/locator/empty-with-max-comment.zip deleted file mode 100644 index fc1f498..0000000 Binary files a/crates/async_zip/src/tests/read/locator/empty-with-max-comment.zip and /dev/null differ diff --git a/crates/async_zip/src/tests/read/locator/empty.zip b/crates/async_zip/src/tests/read/locator/empty.zip deleted file mode 100644 index 15cb0ec..0000000 Binary files a/crates/async_zip/src/tests/read/locator/empty.zip and /dev/null differ diff --git a/crates/async_zip/src/tests/read/locator/mod.rs b/crates/async_zip/src/tests/read/locator/mod.rs deleted file mode 100644 index d08950c..0000000 --- a/crates/async_zip/src/tests/read/locator/mod.rs +++ /dev/null @@ -1,64 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -#[test] -fn search_one_byte_test() { - let buffer: &[u8] = &[0x0, 0x0, 0x0, 0x0, 0x0, 0x0]; - let signature: &[u8] = &[0x1]; - - let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature); - assert!(matched.is_none()); - - let buffer: &[u8] = &[0x2, 0x1, 0x0, 0x0, 0x0, 0x0]; - let signature: &[u8] = &[0x1]; - - let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature); - assert!(matched.is_some()); - assert_eq!(1, matched.unwrap()); -} - -#[test] -fn search_two_byte_test() { - let buffer: &[u8] = &[0x2, 0x1, 0x0, 0x0, 0x0, 0x0]; - let signature: &[u8] = &[0x2, 0x1]; - - let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature); - assert!(matched.is_some()); - assert_eq!(1, matched.unwrap()); -} - -#[tokio::test] -async fn locator_empty_test() { - use futures_lite::io::Cursor; - - let data = &include_bytes!("empty.zip"); - let mut cursor = Cursor::new(data); - let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await; - - assert!(eocdr.is_ok()); - assert_eq!(eocdr.unwrap(), 4); -} - -#[tokio::test] -async fn locator_empty_max_comment_test() { - use futures_lite::io::Cursor; - - let data = &include_bytes!("empty-with-max-comment.zip"); - let mut cursor = Cursor::new(data); - let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await; - - assert!(eocdr.is_ok()); - assert_eq!(eocdr.unwrap(), 4); -} - -#[tokio::test] -async fn locator_buffer_boundary_test() { - use futures_lite::io::Cursor; - - let data = &include_bytes!("empty-buffer-boundary.zip"); - let mut cursor = Cursor::new(data); - let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await; - - assert!(eocdr.is_ok()); - assert_eq!(eocdr.unwrap(), 4); -} diff --git a/crates/async_zip/src/tests/read/mod.rs b/crates/async_zip/src/tests/read/mod.rs deleted file mode 100644 index 9c5f507..0000000 --- a/crates/async_zip/src/tests/read/mod.rs +++ /dev/null @@ -1,6 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub(crate) mod compression; -pub(crate) mod locator; -pub(crate) mod zip64; diff --git a/crates/async_zip/src/tests/read/zip64/mod.rs b/crates/async_zip/src/tests/read/zip64/mod.rs deleted file mode 100644 index 758d410..0000000 --- a/crates/async_zip/src/tests/read/zip64/mod.rs +++ /dev/null @@ -1,107 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// Copyright (c) 2023 Cognite AS -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use futures_lite::io::AsyncReadExt; - -use crate::tests::init_logger; - -const ZIP64_ZIP_CONTENTS: &str = "Hello World!\n"; - -/// Tests opening and reading a zip64 archive. -/// It contains one file named "-" with a zip 64 extended field header. -#[tokio::test] -async fn test_read_zip64_archive_mem() { - use crate::base::read::mem::ZipFileReader; - init_logger(); - - let data = include_bytes!("zip64.zip").to_vec(); - - let reader = ZipFileReader::new(data).await.unwrap(); - let mut entry_reader = reader.reader_without_entry(0).await.unwrap(); - - let mut read_data = String::new(); - entry_reader.read_to_string(&mut read_data).await.expect("read failed"); - - assert_eq!( - read_data.chars().count(), - ZIP64_ZIP_CONTENTS.chars().count(), - "{read_data:?} != {ZIP64_ZIP_CONTENTS:?}" - ); - assert_eq!(read_data, ZIP64_ZIP_CONTENTS); -} - -/// Like test_read_zip64_archive_mem() but for the streaming version -#[tokio::test] -async fn test_read_zip64_archive_stream() { - use crate::base::read::stream::ZipFileReader; - init_logger(); - - let data = include_bytes!("zip64.zip").to_vec(); - - let reader = ZipFileReader::new(data.as_slice()); - let mut entry_reader = reader.next_without_entry().await.unwrap().unwrap(); - - let mut read_data = String::new(); - entry_reader.reader_mut().read_to_string(&mut read_data).await.expect("read failed"); - - assert_eq!( - read_data.chars().count(), - ZIP64_ZIP_CONTENTS.chars().count(), - "{read_data:?} != {ZIP64_ZIP_CONTENTS:?}" - ); - assert_eq!(read_data, ZIP64_ZIP_CONTENTS); -} - -/// Generate an example file only if it doesn't exist already. -/// The file is placed adjacent to this rs file. -#[cfg(feature = "tokio")] -fn generate_zip64many_zip() -> std::path::PathBuf { - use std::io::Write; - use zip::write::FileOptions; - - let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR")); - path.push("src/tests/read/zip64/zip64many.zip"); - - // Only recreate the zip if it doesnt already exist. - if path.exists() { - return path; - } - - let zip_file = std::fs::File::create(&path).unwrap(); - let mut zip = zip::ZipWriter::new(zip_file); - let options = FileOptions::default().compression_method(zip::CompressionMethod::Stored); - - for i in 0..2_u32.pow(16) + 1 { - zip.start_file(format!("{i}.txt"), options).unwrap(); - zip.write_all(b"\n").unwrap(); - } - - zip.finish().unwrap(); - - path -} - -/// Test reading a generated zip64 archive that contains more than 2^16 entries. -#[cfg(feature = "tokio-fs")] -#[tokio::test] -async fn test_read_zip64_archive_many_entries() { - use crate::tokio::read::fs::ZipFileReader; - - init_logger(); - - let path = generate_zip64many_zip(); - - let reader = ZipFileReader::new(path).await.unwrap(); - - // Verify that each entry exists and is has the contents "\n" - for i in 0..2_u32.pow(16) + 1 { - let entry = reader.file().entries().get(i as usize).unwrap(); - eprintln!("{:?}", entry.filename().as_bytes()); - assert_eq!(entry.filename.as_str().unwrap(), format!("{i}.txt")); - let mut entry = reader.reader_without_entry(i as usize).await.unwrap(); - let mut contents = String::new(); - entry.read_to_string(&mut contents).await.unwrap(); - assert_eq!(contents, "\n"); - } -} diff --git a/crates/async_zip/src/tests/read/zip64/zip64.zip b/crates/async_zip/src/tests/read/zip64/zip64.zip deleted file mode 100644 index b07a4d4..0000000 Binary files a/crates/async_zip/src/tests/read/zip64/zip64.zip and /dev/null differ diff --git a/crates/async_zip/src/tests/spec/date.rs b/crates/async_zip/src/tests/spec/date.rs deleted file mode 100644 index 151bde4..0000000 --- a/crates/async_zip/src/tests/spec/date.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -#[cfg(feature = "chrono")] -use chrono::{TimeZone, Utc}; - -use crate::ZipDateTimeBuilder; - -#[test] -#[cfg(feature = "chrono")] -fn date_conversion_test_chrono() { - let original_dt = Utc.timestamp_opt(1666544102, 0).unwrap(); - let zip_dt = crate::ZipDateTime::from_chrono(&original_dt); - let result_dt = zip_dt.as_chrono().single().expect("expected single unique result"); - assert_eq!(result_dt, original_dt); -} - -#[test] -fn date_conversion_test() { - let year = 2000; - let month = 9; - let day = 8; - let hour = 7; - let minute = 5; - let second = 4; - - let mut builder = ZipDateTimeBuilder::new(); - - builder = builder.year(year); - builder = builder.month(month); - builder = builder.day(day); - builder = builder.hour(hour); - builder = builder.minute(minute); - builder = builder.second(second); - - let built = builder.build(); - - assert_eq!(year, built.year()); - assert_eq!(month, built.month()); - assert_eq!(day, built.day()); - assert_eq!(hour, built.hour()); - assert_eq!(minute, built.minute()); - assert_eq!(second, built.second()); -} diff --git a/crates/async_zip/src/tests/spec/mod.rs b/crates/async_zip/src/tests/spec/mod.rs deleted file mode 100644 index 162826e..0000000 --- a/crates/async_zip/src/tests/spec/mod.rs +++ /dev/null @@ -1,4 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -pub(crate) mod date; diff --git a/crates/async_zip/src/tests/write/mod.rs b/crates/async_zip/src/tests/write/mod.rs deleted file mode 100644 index 6ca7571..0000000 --- a/crates/async_zip/src/tests/write/mod.rs +++ /dev/null @@ -1,29 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use futures_lite::io::AsyncWrite; -use std::io::Error; -use std::pin::Pin; -use std::task::{Context, Poll}; - -pub(crate) mod offset; -mod zip64; - -/// /dev/null for AsyncWrite. -/// Useful for tests that involve writing, but not reading, large amounts of data. -pub(crate) struct AsyncSink; - -// AsyncSink is always ready to receive bytes and throw them away. -impl AsyncWrite for AsyncSink { - fn poll_write(self: Pin<&mut Self>, _: &mut Context<'_>, buf: &[u8]) -> Poll> { - Poll::Ready(Ok(buf.len())) - } - - fn poll_flush(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll> { - Poll::Ready(Ok(())) - } - - fn poll_close(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll> { - Poll::Ready(Ok(())) - } -} diff --git a/crates/async_zip/src/tests/write/offset/mod.rs b/crates/async_zip/src/tests/write/offset/mod.rs deleted file mode 100644 index 5ee9811..0000000 --- a/crates/async_zip/src/tests/write/offset/mod.rs +++ /dev/null @@ -1,22 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::base::write::io::offset::AsyncOffsetWriter; - -#[tokio::test] -async fn basic() { - use futures_lite::io::AsyncWriteExt; - use futures_lite::io::Cursor; - - let mut writer = AsyncOffsetWriter::new(Cursor::new(Vec::new())); - assert_eq!(writer.offset(), 0); - - writer.write_all(b"Foo. Bar. Foo. Bar.").await.expect("failed to write data"); - assert_eq!(writer.offset(), 19); - - writer.write_all(b"Foo. Foo.").await.expect("failed to write data"); - assert_eq!(writer.offset(), 28); - - writer.write_all(b"Bar. Bar.").await.expect("failed to write data"); - assert_eq!(writer.offset(), 37); -} diff --git a/crates/async_zip/src/tests/write/zip64/mod.rs b/crates/async_zip/src/tests/write/zip64/mod.rs deleted file mode 100644 index 01f3211..0000000 --- a/crates/async_zip/src/tests/write/zip64/mod.rs +++ /dev/null @@ -1,243 +0,0 @@ -// Copyright Cognite AS, 2023 - -use crate::base::write::ZipFileWriter; -use crate::error::{Zip64ErrorCase, ZipError}; -use crate::spec::consts::NON_ZIP64_MAX_SIZE; -use crate::tests::init_logger; -use crate::tests::write::AsyncSink; -use crate::{Compression, ZipEntryBuilder}; -use std::io::Read; - -use crate::spec::header::ExtraField; -use futures_lite::io::AsyncWriteExt; - -// Useful constants for writing a large file. -const BATCH_SIZE: usize = 100_000; -const NUM_BATCHES: usize = NON_ZIP64_MAX_SIZE as usize / BATCH_SIZE + 1; -const BATCHED_FILE_SIZE: usize = NUM_BATCHES * BATCH_SIZE; - -/// Test writing a small zip64 file. -/// No zip64 extra fields will be emitted for EntryWhole. -/// Z64 end of directory record & locator should be emitted -#[tokio::test] -async fn test_write_zip64_file() { - init_logger(); - - let mut buffer = Vec::new(); - let mut writer = ZipFileWriter::new(&mut buffer).force_zip64(); - let entry = ZipEntryBuilder::new("file1".to_string().into(), Compression::Stored); - writer.write_entry_whole(entry, &[0, 0, 0, 0]).await.unwrap(); - let entry = ZipEntryBuilder::new("file2".to_string().into(), Compression::Stored); - let mut entry_writer = writer.write_entry_stream(entry).await.unwrap(); - entry_writer.write_all(&[0, 0, 0, 0]).await.unwrap(); - entry_writer.close().await.unwrap(); - writer.close().await.unwrap(); - - let cursor = std::io::Cursor::new(buffer); - let mut zip = zip::read::ZipArchive::new(cursor).unwrap(); - let mut file1 = zip.by_name("file1").unwrap(); - assert_eq!(file1.extra_data(), &[] as &[u8]); - let mut buffer = Vec::new(); - file1.read_to_end(&mut buffer).unwrap(); - assert_eq!(buffer.as_slice(), &[0, 0, 0, 0]); - drop(file1); - - let mut file2 = zip.by_name("file2").unwrap(); - let mut buffer = Vec::new(); - file2.read_to_end(&mut buffer).unwrap(); - assert_eq!(buffer.as_slice(), &[0, 0, 0, 0]); -} - -/// Test writing a large zip64 file. This test will use upwards of 4GB of memory. -#[tokio::test] -async fn test_write_large_zip64_file() { - init_logger(); - - // Allocate space with some extra for metadata records - let mut buffer = Vec::with_capacity(BATCHED_FILE_SIZE + 100_000); - let mut writer = ZipFileWriter::new(&mut buffer); - - // Stream-written zip files are dubiously spec-conformant. We need to specify a valid file size - // in order for rs-zip (and unzip) to correctly read these files. - let entry = ZipEntryBuilder::new("file".to_string().into(), Compression::Stored) - .size(BATCHED_FILE_SIZE as u64, BATCHED_FILE_SIZE as u64); - let mut entry_writer = writer.write_entry_stream(entry).await.unwrap(); - for _ in 0..NUM_BATCHES { - entry_writer.write_all(&[0; BATCH_SIZE]).await.unwrap(); - } - entry_writer.close().await.unwrap(); - - assert!(writer.is_zip64); - let cd_entry = writer.cd_entries.last().unwrap(); - match &cd_entry.entry.extra_fields.last().unwrap() { - ExtraField::Zip64ExtendedInformation(zip64) => { - assert_eq!(zip64.compressed_size.unwrap(), BATCHED_FILE_SIZE as u64); - assert_eq!(zip64.uncompressed_size.unwrap(), BATCHED_FILE_SIZE as u64); - } - e => panic!("Expected a Zip64 extended field, got {:?}", e), - } - assert_eq!(cd_entry.header.uncompressed_size, NON_ZIP64_MAX_SIZE); - assert_eq!(cd_entry.header.compressed_size, NON_ZIP64_MAX_SIZE); - writer.close().await.unwrap(); - - let cursor = std::io::Cursor::new(buffer); - let mut archive = zip::read::ZipArchive::new(cursor).unwrap(); - let mut file = archive.by_name("file").unwrap(); - assert_eq!(file.compression(), zip::CompressionMethod::Stored); - assert_eq!(file.size(), BATCHED_FILE_SIZE as u64); - let mut buffer = [0; 100_000]; - let mut bytes_total = 0; - loop { - let read_bytes = file.read(&mut buffer).unwrap(); - if read_bytes == 0 { - break; - } - bytes_total += read_bytes; - } - assert_eq!(bytes_total, BATCHED_FILE_SIZE); -} - -/// Test writing a file, and reading it with async-zip -#[tokio::test] -async fn test_write_large_zip64_file_self_read() { - use futures_lite::io::AsyncReadExt; - - init_logger(); - - // Allocate space with some extra for metadata records - let mut buffer = Vec::with_capacity(BATCHED_FILE_SIZE + 100_000); - let mut writer = ZipFileWriter::new(&mut buffer); - - let entry = ZipEntryBuilder::new("file".into(), Compression::Stored); - let mut entry_writer = writer.write_entry_stream(entry).await.unwrap(); - for _ in 0..NUM_BATCHES { - entry_writer.write_all(&[0; BATCH_SIZE]).await.unwrap(); - } - entry_writer.close().await.unwrap(); - writer.close().await.unwrap(); - - let reader = crate::base::read::mem::ZipFileReader::new(buffer).await.unwrap(); - assert!(reader.file().zip64); - assert_eq!(reader.file().entries[0].entry.filename().as_str().unwrap(), "file"); - assert_eq!(reader.file().entries[0].entry.compressed_size, BATCHED_FILE_SIZE as u64); - let mut entry = reader.reader_without_entry(0).await.unwrap(); - - let mut buffer = [0; 100_000]; - let mut bytes_total = 0; - loop { - let read_bytes = entry.read(&mut buffer).await.unwrap(); - if read_bytes == 0 { - break; - } - bytes_total += read_bytes; - } - assert_eq!(bytes_total, BATCHED_FILE_SIZE); -} - -/// Test writing a zip64 file with more than u16::MAX files. -#[tokio::test] -async fn test_write_zip64_file_many_entries() { - init_logger(); - - // The generated file will likely be ~3MB in size. - let mut buffer = Vec::with_capacity(3_500_000); - - let mut writer = ZipFileWriter::new(&mut buffer); - for i in 0..=u16::MAX as u32 + 1 { - let entry = ZipEntryBuilder::new(i.to_string().into(), Compression::Stored); - writer.write_entry_whole(entry, &[]).await.unwrap(); - } - assert!(writer.is_zip64); - writer.close().await.unwrap(); - - let cursor = std::io::Cursor::new(buffer); - let mut zip = zip::read::ZipArchive::new(cursor).unwrap(); - assert_eq!(zip.len(), u16::MAX as usize + 2); - - for i in 0..=u16::MAX as u32 + 1 { - let mut file = zip.by_name(&i.to_string()).unwrap(); - let mut buf = Vec::new(); - file.read_to_end(&mut buf).unwrap(); - } -} - -/// Tests that EntryWholeWriter switches to Zip64 mode when writing too many files for a non-Zip64. -#[tokio::test] -async fn test_zip64_when_many_files_whole() { - let mut sink = AsyncSink; - let mut writer = ZipFileWriter::new(&mut sink); - for i in 0..=u16::MAX as u32 + 1 { - let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); - writer.write_entry_whole(entry, &[]).await.unwrap() - } - assert!(writer.is_zip64); - writer.close().await.unwrap(); -} - -/// Tests that EntryStreamWriter switches to Zip64 mode when writing too many files for a non-Zip64. -#[tokio::test] -async fn test_zip64_when_many_files_stream() { - let mut sink = AsyncSink; - let mut writer = ZipFileWriter::new(&mut sink); - for i in 0..=u16::MAX as u32 + 1 { - let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); - let entrywriter = writer.write_entry_stream(entry).await.unwrap(); - entrywriter.close().await.unwrap(); - } - - assert!(writer.is_zip64); - writer.close().await.unwrap(); -} - -/// Tests that when force_no_zip64 is true, EntryWholeWriter errors when trying to write more than -/// u16::MAX files to a single archive. -#[tokio::test] -async fn test_force_no_zip64_errors_with_too_many_files_whole() { - let mut sink = AsyncSink; - let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64(); - for i in 0..u16::MAX { - let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); - writer.write_entry_whole(entry, &[]).await.unwrap() - } - let entry = ZipEntryBuilder::new("65537".to_string().into(), Compression::Stored); - let result = writer.write_entry_whole(entry, &[]).await; - - assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)))); -} - -/// Tests that when force_no_zip64 is true, EntryStreamWriter errors when trying to write more than -/// u16::MAX files to a single archive. -#[tokio::test] -async fn test_force_no_zip64_errors_with_too_many_files_stream() { - let mut sink = AsyncSink; - let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64(); - for i in 0..u16::MAX { - let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored); - let entrywriter = writer.write_entry_stream(entry).await.unwrap(); - entrywriter.close().await.unwrap(); - } - let entry = ZipEntryBuilder::new("65537".to_string().into(), Compression::Stored); - let entrywriter = writer.write_entry_stream(entry).await.unwrap(); - let result = entrywriter.close().await; - - assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles)))); -} - -/// Tests that when force_no_zip64 is true, EntryStreamWriter errors when trying to write -/// a file larger than ~4 GiB to an archive. -#[tokio::test] -async fn test_force_no_zip64_errors_with_too_large_file_stream() { - let mut sink = AsyncSink; - let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64(); - - let entry = ZipEntryBuilder::new("-".to_string().into(), Compression::Stored); - let mut entrywriter = writer.write_entry_stream(entry).await.unwrap(); - - // Writing 4GB, 1kb at a time - for _ in 0..NUM_BATCHES { - entrywriter.write_all(&[0; BATCH_SIZE]).await.unwrap(); - } - let result = entrywriter.close().await; - - assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile)))); -} diff --git a/crates/async_zip/src/tokio/mod.rs b/crates/async_zip/src/tokio/mod.rs deleted file mode 100644 index fbcc688..0000000 --- a/crates/async_zip/src/tokio/mod.rs +++ /dev/null @@ -1,41 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A set of [`tokio`]-specific type aliases and features. -//! -//! # Usage -//! With the `tokio` feature enabled, types from the [`base`] implementation will implement additional constructors -//! for use with [`tokio`]. These constructors internally implement conversion between the required async IO traits. -//! They are defined as: -//! - [`base::read::seek::ZipFileReader::with_tokio()`] -//! - [`base::read::stream::ZipFileReader::with_tokio()`] -//! - [`base::write::ZipFileWriter::with_tokio()`] -//! -//! As a result of Rust's type inference, we are able to reuse the [`base`] implementation's types with considerable -//! ease. There only exists one caveat with their use; the types returned by these constructors contain a wrapping -//! compatibility type provided by an external crate. These compatibility types cannot be named unless you also pull in -//! the [`tokio_util`] dependency manually. This is why we've provided type aliases within this module so that they can -//! be named without needing to pull in a separate dependency. - -#[cfg(doc)] -use crate::base; -#[cfg(doc)] -use tokio; -#[cfg(doc)] -use tokio_util; - -pub mod read; - -pub mod write { - //! A module which supports writing ZIP files. - - #[cfg(doc)] - use crate::base; - use tokio_util::compat::Compat; - - /// A [`tokio`]-specific type alias for [`base::write::ZipFileWriter`]; - pub type ZipFileWriter = crate::base::write::ZipFileWriter>; - - /// A [`tokio`]-specific type alias for [`base::write::EntryStreamWriter`]; - pub type EntryStreamWriter<'a, W> = crate::base::write::EntryStreamWriter<'a, Compat>; -} diff --git a/crates/async_zip/src/tokio/read/fs.rs b/crates/async_zip/src/tokio/read/fs.rs deleted file mode 100644 index c045d39..0000000 --- a/crates/async_zip/src/tokio/read/fs.rs +++ /dev/null @@ -1,160 +0,0 @@ -// Copyright (c) 2022 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A concurrent ZIP reader which acts over a file system path. -//! -//! Concurrency is achieved as a result of: -//! - Wrapping the provided path within an [`Arc`] to allow shared ownership. -//! - Constructing a new [`File`] from the path when reading. -//! -//! ### Usage -//! Unlike the [`seek`] module, we no longer hold a mutable reference to any inner reader which in turn, allows the -//! construction of concurrent [`ZipEntryReader`]s. Though, note that each individual [`ZipEntryReader`] cannot be sent -//! between thread boundaries due to the masked lifetime requirement. Therefore, the overarching [`ZipFileReader`] -//! should be cloned and moved into those contexts when needed. -//! -//! ### Concurrent Example -//! ```no_run -//! # use async_zip::tokio::read::fs::ZipFileReader; -//! # use async_zip::error::Result; -//! # use futures_lite::io::AsyncReadExt; -//! # -//! async fn run() -> Result<()> { -//! let reader = ZipFileReader::new("./foo.zip").await?; -//! let result = tokio::join!(read(&reader, 0), read(&reader, 1)); -//! -//! let data_0 = result.0?; -//! let data_1 = result.1?; -//! -//! // Use data within current scope. -//! -//! Ok(()) -//! } -//! -//! async fn read(reader: &ZipFileReader, index: usize) -> Result> { -//! let mut entry = reader.reader_without_entry(index).await?; -//! let mut data = Vec::new(); -//! entry.read_to_end(&mut data).await?; -//! Ok(data) -//! } -//! ``` -//! -//! ### Parallel Example -//! ```no_run -//! # use async_zip::tokio::read::fs::ZipFileReader; -//! # use async_zip::error::Result; -//! # use futures_lite::io::AsyncReadExt; -//! # -//! async fn run() -> Result<()> { -//! let reader = ZipFileReader::new("./foo.zip").await?; -//! -//! let handle_0 = tokio::spawn(read(reader.clone(), 0)); -//! let handle_1 = tokio::spawn(read(reader.clone(), 1)); -//! -//! let data_0 = handle_0.await.expect("thread panicked")?; -//! let data_1 = handle_1.await.expect("thread panicked")?; -//! -//! // Use data within current scope. -//! -//! Ok(()) -//! } -//! -//! async fn read(reader: ZipFileReader, index: usize) -> Result> { -//! let mut entry = reader.reader_without_entry(index).await?; -//! let mut data = Vec::new(); -//! entry.read_to_end(&mut data).await?; -//! Ok(data) -//! } -//! ``` - -#[cfg(doc)] -use crate::base::read::seek; - -use crate::base::read::io::entry::{WithEntry, WithoutEntry, ZipEntryReader}; -use crate::error::{Result, ZipError}; -use crate::file::ZipFile; - -use std::path::{Path, PathBuf}; -use std::sync::Arc; - -use tokio::fs::File; -use tokio::io::BufReader; -use tokio_util::compat::{Compat, TokioAsyncReadCompatExt}; - -struct Inner { - path: PathBuf, - file: ZipFile, -} - -/// A concurrent ZIP reader which acts over a file system path. -#[derive(Clone)] -pub struct ZipFileReader { - inner: Arc, -} - -impl ZipFileReader { - /// Constructs a new ZIP reader from a file system path. - pub async fn new

(path: P) -> Result - where - P: AsRef, - { - let file = crate::base::read::file(File::open(&path).await?.compat()).await?; - Ok(ZipFileReader::from_raw_parts(path, file)) - } - - /// Constructs a ZIP reader from a file system path and ZIP file information derived from that path. - /// - /// Providing a [`ZipFile`] that wasn't derived from that path may lead to inaccurate parsing. - pub fn from_raw_parts

(path: P, file: ZipFile) -> ZipFileReader - where - P: AsRef, - { - ZipFileReader { inner: Arc::new(Inner { path: path.as_ref().to_owned(), file }) } - } - - /// Returns this ZIP file's information. - pub fn file(&self) -> &ZipFile { - &self.inner.file - } - - /// Returns the file system path provided to the reader during construction. - pub fn path(&self) -> &Path { - &self.inner.path - } - - /// Returns a new entry reader if the provided index is valid. - pub async fn reader_without_entry( - &self, - index: usize, - ) -> Result>, WithoutEntry>> { - let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; - let mut fs_file = BufReader::new(File::open(&self.inner.path).await?).compat(); - - stored_entry.seek_to_data_offset(&mut fs_file).await?; - - Ok(ZipEntryReader::new_with_owned( - fs_file, - stored_entry.entry.compression(), - stored_entry.entry.compressed_size(), - )) - } - - /// Returns a new entry reader if the provided index is valid. - pub async fn reader_with_entry( - &self, - index: usize, - ) -> Result>, WithEntry<'_>>> { - let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?; - let mut fs_file = BufReader::new(File::open(&self.inner.path).await?).compat(); - - stored_entry.seek_to_data_offset(&mut fs_file).await?; - - let reader = ZipEntryReader::new_with_owned( - fs_file, - stored_entry.entry.compression(), - stored_entry.entry.compressed_size(), - ); - - Ok(reader.into_with_entry(stored_entry)) - } -} diff --git a/crates/async_zip/src/tokio/read/mod.rs b/crates/async_zip/src/tokio/read/mod.rs deleted file mode 100644 index c70ac27..0000000 --- a/crates/async_zip/src/tokio/read/mod.rs +++ /dev/null @@ -1,44 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -//! A module which supports reading ZIP files. - -use tokio_util::compat::Compat; - -#[cfg(feature = "tokio-fs")] -pub mod fs; -#[cfg(doc)] -use crate::base; -#[cfg(doc)] -use tokio; - -/// A [`tokio`]-specific type alias for [`base::read::ZipEntryReader`]; -pub type ZipEntryReader<'a, R, E> = crate::base::read::ZipEntryReader<'a, Compat, E>; - -pub mod seek { - //! A ZIP reader which acts over a seekable source. - use tokio_util::compat::Compat; - - #[cfg(doc)] - use crate::base; - #[cfg(doc)] - use tokio; - - /// A [`tokio`]-specific type alias for [`base::read::seek::ZipFileReader`]; - pub type ZipFileReader = crate::base::read::seek::ZipFileReader>; -} - -pub mod stream { - //! A ZIP reader which acts over a non-seekable source. - - #[cfg(doc)] - use crate::base; - #[cfg(doc)] - use tokio; - use tokio_util::compat::Compat; - - /// A [`tokio`]-specific type alias for [`base::read::stream::Reading`]; - pub type Reading<'a, R, E> = crate::base::read::stream::Reading<'a, Compat, E>; - /// A [`tokio`]-specific type alias for [`base::read::stream::Ready`]; - pub type Ready = crate::base::read::stream::Ready>; -} diff --git a/crates/async_zip/src/utils.rs b/crates/async_zip/src/utils.rs deleted file mode 100644 index 269d334..0000000 --- a/crates/async_zip/src/utils.rs +++ /dev/null @@ -1,18 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use crate::error::{Result, ZipError}; -use futures_lite::io::{AsyncRead, AsyncReadExt}; - -// Assert that the next four-byte signature read by a reader which impls AsyncRead matches the expected signature. -pub(crate) async fn assert_signature(reader: &mut R, expected: u32) -> Result<()> { - let signature = { - let mut buffer = [0; 4]; - reader.read_exact(&mut buffer).await?; - u32::from_le_bytes(buffer) - }; - match signature { - actual if actual == expected => Ok(()), - actual => Err(ZipError::UnexpectedHeaderError(actual, expected)), - } -} diff --git a/crates/async_zip/tests/common/mod.rs b/crates/async_zip/tests/common/mod.rs deleted file mode 100644 index d72f02e..0000000 --- a/crates/async_zip/tests/common/mod.rs +++ /dev/null @@ -1,99 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) -#![allow(dead_code)] - -use async_zip::base::read::mem; -use async_zip::base::read::seek; -use async_zip::base::write::ZipFileWriter; -use async_zip::Compression; -use async_zip::ZipEntryBuilder; -use futures_lite::io::AsyncWriteExt; -use tokio::fs::File; -use tokio::io::BufReader; -use tokio_util::compat::TokioAsyncReadCompatExt; - -const FOLDER_PREFIX: &str = "tests/test_inputs"; - -const FILE_LIST: &[&str] = &[ - "sample_data/alpha/back_to_front.txt", - "sample_data/alpha/front_to_back.txt", - "sample_data/numeric/forward.txt", - "sample_data/numeric/reverse.txt", -]; - -pub async fn compress_to_mem(compress: Compression) -> Vec { - let mut bytes = Vec::with_capacity(10_000); - let mut writer = ZipFileWriter::new(&mut bytes); - - for fname in FILE_LIST { - let content = tokio::fs::read(format!("{FOLDER_PREFIX}/{fname}")).await.unwrap(); - let opts = ZipEntryBuilder::new(fname.to_string().into(), compress); - - let mut entry_writer = writer.write_entry_stream(opts).await.unwrap(); - entry_writer.write_all(&content).await.unwrap(); - entry_writer.close().await.unwrap(); - } - writer.close().await.unwrap(); - bytes -} - -#[cfg(feature = "tokio-fs")] -pub async fn check_decompress_fs(fname: &str) { - use async_zip::tokio::read::fs; - let zip = fs::ZipFileReader::new(fname).await.unwrap(); - let zip_entries: Vec<_> = zip.file().entries().to_vec(); - for (idx, entry) in zip_entries.into_iter().enumerate() { - // TODO: resolve unwrap usage - if entry.dir().unwrap() { - continue; - } - // TODO: resolve unwrap usage - let fname = entry.filename().as_str().unwrap(); - let mut output = String::new(); - let mut reader = zip.reader_with_entry(idx).await.unwrap(); - let _ = reader.read_to_string_checked(&mut output).await.unwrap(); - let fs_file = format!("{FOLDER_PREFIX}/{fname}"); - let expected = tokio::fs::read_to_string(fs_file).await.unwrap(); - assert_eq!(output, expected, "for {fname}, expect zip data to match file data"); - } -} - -pub async fn check_decompress_seek(fname: &str) { - let file = BufReader::new(File::open(fname).await.unwrap()); - let mut file_compat = file.compat(); - let mut zip = seek::ZipFileReader::new(&mut file_compat).await.unwrap(); - let zip_entries: Vec<_> = zip.file().entries().to_vec(); - for (idx, entry) in zip_entries.into_iter().enumerate() { - // TODO: resolve unwrap usage - if entry.dir().unwrap() { - continue; - } - // TODO: resolve unwrap usage - let fname = entry.filename().as_str().unwrap(); - let mut output = String::new(); - let mut reader = zip.reader_with_entry(idx).await.unwrap(); - let _ = reader.read_to_string_checked(&mut output).await.unwrap(); - let fs_file = format!("tests/test_inputs/{fname}"); - let expected = tokio::fs::read_to_string(fs_file).await.unwrap(); - assert_eq!(output, expected, "for {fname}, expect zip data to match file data"); - } -} - -pub async fn check_decompress_mem(zip_data: Vec) { - let zip = mem::ZipFileReader::new(zip_data).await.unwrap(); - let zip_entries: Vec<_> = zip.file().entries().to_vec(); - for (idx, entry) in zip_entries.into_iter().enumerate() { - // TODO: resolve unwrap usage - if entry.dir().unwrap() { - continue; - } - // TODO: resolve unwrap usage - let fname = entry.filename().as_str().unwrap(); - let mut output = String::new(); - let mut reader = zip.reader_with_entry(idx).await.unwrap(); - let _ = reader.read_to_string_checked(&mut output).await.unwrap(); - let fs_file = format!("{FOLDER_PREFIX}/{fname}"); - let expected = tokio::fs::read_to_string(fs_file).await.unwrap(); - assert_eq!(output, expected, "for {fname}, expect zip data to match file data"); - } -} diff --git a/crates/async_zip/tests/compress_test.rs b/crates/async_zip/tests/compress_test.rs deleted file mode 100644 index 9efa7e8..0000000 --- a/crates/async_zip/tests/compress_test.rs +++ /dev/null @@ -1,81 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) - -use async_zip::{Compression, ZipEntryBuilder, ZipString}; -use futures_lite::AsyncWriteExt; - -mod common; - -#[cfg(feature = "zstd")] -#[tokio::test] -async fn zip_zstd_in_out() { - let zip_data = common::compress_to_mem(Compression::Zstd).await; - common::check_decompress_mem(zip_data).await -} - -#[cfg(feature = "deflate")] -#[tokio::test] -async fn zip_decompress_in_out() { - let zip_data = common::compress_to_mem(Compression::Deflate).await; - common::check_decompress_mem(zip_data).await -} - -#[tokio::test] -async fn zip_store_in_out() { - let zip_data = common::compress_to_mem(Compression::Stored).await; - common::check_decompress_mem(zip_data).await -} - -#[tokio::test] -async fn zip_utf8_extra_in_out_stream() { - let mut zip_bytes = Vec::with_capacity(10_000); - - { - // writing - let content = "Test".as_bytes(); - let mut writer = async_zip::base::write::ZipFileWriter::new(&mut zip_bytes); - let filename = - ZipString::new_with_alternative("\u{4E2D}\u{6587}.txt".to_string(), b"\xD6\xD0\xCe\xC4.txt".to_vec()); - let opts = ZipEntryBuilder::new(filename, Compression::Stored); - - let mut entry_writer = writer.write_entry_stream(opts).await.unwrap(); - entry_writer.write_all(content).await.unwrap(); - entry_writer.close().await.unwrap(); - - writer.close().await.unwrap(); - } - - { - // reading - let zip = async_zip::base::read::mem::ZipFileReader::new(zip_bytes).await.unwrap(); - let zip_entries: Vec<_> = zip.file().entries().to_vec(); - assert_eq!(zip_entries.len(), 1); - assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt"); - assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref())); - } -} - -#[tokio::test] -async fn zip_utf8_extra_in_out_whole() { - let mut zip_bytes = Vec::with_capacity(10_000); - - { - // writing - let content = "Test".as_bytes(); - let mut writer = async_zip::base::write::ZipFileWriter::new(&mut zip_bytes); - let filename = - ZipString::new_with_alternative("\u{4E2D}\u{6587}.txt".to_string(), b"\xD6\xD0\xCe\xC4.txt".to_vec()); - let opts = ZipEntryBuilder::new(filename, Compression::Stored); - writer.write_entry_whole(opts, content).await.unwrap(); - writer.close().await.unwrap(); - } - - { - // reading - let zip = async_zip::base::read::mem::ZipFileReader::new(zip_bytes).await.unwrap(); - let zip_entries: Vec<_> = zip.file().entries().to_vec(); - assert_eq!(zip_entries.len(), 1); - assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt"); - assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref())); - } -} diff --git a/crates/async_zip/tests/decompress_test.rs b/crates/async_zip/tests/decompress_test.rs deleted file mode 100644 index 8fa7acf..0000000 --- a/crates/async_zip/tests/decompress_test.rs +++ /dev/null @@ -1,89 +0,0 @@ -// Copyright (c) 2023 Harry [Majored] [hello@majored.pw] -// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE) -#![allow(dead_code)] - -use tokio::io::BufReader; -use tokio_util::compat::TokioAsyncReadCompatExt; - -mod common; - -const ZSTD_ZIP_FILE: &str = "tests/test_inputs/sample_data.zstd.zip"; -const DEFLATE_ZIP_FILE: &str = "tests/test_inputs/sample_data.deflate.zip"; -const STORE_ZIP_FILE: &str = "tests/test_inputs/sample_data.store.zip"; -const UTF8_EXTRA_ZIP_FILE: &str = "tests/test_inputs/sample_data_utf8_extra.zip"; - -#[cfg(feature = "zstd")] -#[tokio::test] -async fn decompress_zstd_zip_seek() { - common::check_decompress_seek(ZSTD_ZIP_FILE).await -} - -#[cfg(feature = "deflate")] -#[tokio::test] -async fn decompress_deflate_zip_seek() { - common::check_decompress_seek(DEFLATE_ZIP_FILE).await -} - -#[tokio::test] -async fn check_empty_zip_seek() { - let mut data: Vec = Vec::new(); - async_zip::base::write::ZipFileWriter::new(futures_lite::io::Cursor::new(&mut data)).close().await.unwrap(); - async_zip::base::read::seek::ZipFileReader::new(futures_lite::io::Cursor::new(&data)).await.unwrap(); -} - -#[tokio::test] -async fn decompress_store_zip_seek() { - common::check_decompress_seek(STORE_ZIP_FILE).await -} - -#[cfg(feature = "zstd")] -#[tokio::test] -async fn decompress_zstd_zip_mem() { - let content = tokio::fs::read(ZSTD_ZIP_FILE).await.unwrap(); - common::check_decompress_mem(content).await -} - -#[cfg(feature = "deflate")] -#[tokio::test] -async fn decompress_deflate_zip_mem() { - let content = tokio::fs::read(DEFLATE_ZIP_FILE).await.unwrap(); - common::check_decompress_mem(content).await -} - -#[tokio::test] -async fn decompress_store_zip_mem() { - let content = tokio::fs::read(STORE_ZIP_FILE).await.unwrap(); - common::check_decompress_mem(content).await -} - -#[cfg(feature = "zstd")] -#[cfg(feature = "tokio-fs")] -#[tokio::test] -async fn decompress_zstd_zip_fs() { - common::check_decompress_fs(ZSTD_ZIP_FILE).await -} - -#[cfg(feature = "deflate")] -#[cfg(feature = "tokio-fs")] -#[tokio::test] -async fn decompress_deflate_zip_fs() { - common::check_decompress_fs(DEFLATE_ZIP_FILE).await -} - -#[cfg(feature = "tokio-fs")] -#[tokio::test] -async fn decompress_store_zip_fs() { - common::check_decompress_fs(STORE_ZIP_FILE).await -} - -#[tokio::test] -async fn decompress_zip_with_utf8_extra() { - let file = BufReader::new(tokio::fs::File::open(UTF8_EXTRA_ZIP_FILE).await.unwrap()); - let mut file_compat = file.compat(); - let zip = async_zip::base::read::seek::ZipFileReader::new(&mut file_compat).await.unwrap(); - let zip_entries: Vec<_> = zip.file().entries().to_vec(); - assert_eq!(zip_entries.len(), 1); - assert_eq!(zip_entries[0].header_size(), 93); - assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt"); - assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref())); -} diff --git a/crates/async_zip/tests/test_inputs/sample_data.deflate.zip b/crates/async_zip/tests/test_inputs/sample_data.deflate.zip deleted file mode 100644 index f7cb773..0000000 Binary files a/crates/async_zip/tests/test_inputs/sample_data.deflate.zip and /dev/null differ diff --git a/crates/async_zip/tests/test_inputs/sample_data.store.zip b/crates/async_zip/tests/test_inputs/sample_data.store.zip deleted file mode 100644 index 1f4d53c..0000000 Binary files a/crates/async_zip/tests/test_inputs/sample_data.store.zip and /dev/null differ diff --git a/crates/async_zip/tests/test_inputs/sample_data.zstd.zip b/crates/async_zip/tests/test_inputs/sample_data.zstd.zip deleted file mode 100644 index 614293c..0000000 Binary files a/crates/async_zip/tests/test_inputs/sample_data.zstd.zip and /dev/null differ diff --git a/crates/async_zip/tests/test_inputs/sample_data/alpha/back_to_front.txt b/crates/async_zip/tests/test_inputs/sample_data/alpha/back_to_front.txt deleted file mode 100644 index 5f84448..0000000 --- a/crates/async_zip/tests/test_inputs/sample_data/alpha/back_to_front.txt +++ /dev/null @@ -1,4 +0,0 @@ -Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a -Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a -Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a -Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a diff --git a/crates/async_zip/tests/test_inputs/sample_data/alpha/front_to_back.txt b/crates/async_zip/tests/test_inputs/sample_data/alpha/front_to_back.txt deleted file mode 100644 index 3d01a65..0000000 --- a/crates/async_zip/tests/test_inputs/sample_data/alpha/front_to_back.txt +++ /dev/null @@ -1,4 +0,0 @@ -A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z -A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z -A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z -A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z diff --git a/crates/async_zip/tests/test_inputs/sample_data/numeric/forward.txt b/crates/async_zip/tests/test_inputs/sample_data/numeric/forward.txt deleted file mode 100644 index 836aa3d..0000000 --- a/crates/async_zip/tests/test_inputs/sample_data/numeric/forward.txt +++ /dev/null @@ -1 +0,0 @@ -1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32 diff --git a/crates/async_zip/tests/test_inputs/sample_data/numeric/reverse.txt b/crates/async_zip/tests/test_inputs/sample_data/numeric/reverse.txt deleted file mode 100644 index 2110808..0000000 --- a/crates/async_zip/tests/test_inputs/sample_data/numeric/reverse.txt +++ /dev/null @@ -1 +0,0 @@ -32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1 diff --git a/crates/async_zip/tests/test_inputs/sample_data_utf8_extra.zip b/crates/async_zip/tests/test_inputs/sample_data_utf8_extra.zip deleted file mode 100644 index cec7282..0000000 Binary files a/crates/async_zip/tests/test_inputs/sample_data_utf8_extra.zip and /dev/null differ diff --git a/crates/envy/Cargo.toml b/crates/envy/Cargo.toml deleted file mode 100644 index 531d999..0000000 --- a/crates/envy/Cargo.toml +++ /dev/null @@ -1,21 +0,0 @@ -[package] -name = "envy" -version = "0.4.2" -authors = ["softprops "] -description = "deserialize env vars into typesafe structs" -documentation = "https://softprops.github.io/envy" -homepage = "https://github.com/softprops/envy" -repository = "https://github.com/softprops/envy" -keywords = ["serde", "env"] -license = "MIT" -readme = "README.md" -edition = "2021" -categories = [ - "config" -] - -[dependencies] -serde = "1.0" - -[dev-dependencies] -serde = { version = "1.0", features = ["derive"] } diff --git a/crates/envy/src/error.rs b/crates/envy/src/error.rs deleted file mode 100644 index 3d0fec1..0000000 --- a/crates/envy/src/error.rs +++ /dev/null @@ -1,55 +0,0 @@ -//! Error types -use serde::de::Error as SerdeError; -use std::{error::Error as StdError, fmt}; - -/// Types of errors that may result from failed attempts -/// to deserialize a type from env vars -#[derive(Debug, Clone, PartialEq)] -pub enum Error { - MissingValue(String), - Custom(String), -} - -impl StdError for Error {} - -impl fmt::Display for Error { - fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result { - match self { - Error::MissingValue(field) => write!(fmt, "missing value for {}", &field), - Error::Custom(ref msg) => write!(fmt, "{}", msg), - } - } -} - -impl SerdeError for Error { - fn custom(msg: T) -> Self { - Error::Custom(format!("{}", msg)) - } - - fn missing_field(field: &'static str) -> Error { - Error::MissingValue(field.into()) - } -} - -#[cfg(test)] -mod tests { - use super::*; - - fn impl_std_error(_: E) {} - - #[test] - fn error_impl_std_error() { - impl_std_error(Error::MissingValue("FOO_BAR".into())); - impl_std_error(Error::Custom("whoops".into())) - } - - #[test] - fn error_display() { - assert_eq!( - format!("{}", Error::MissingValue("FOO_BAR".into())), - "missing value for FOO_BAR" - ); - - assert_eq!(format!("{}", Error::Custom("whoops".into())), "whoops") - } -} diff --git a/crates/envy/src/lib.rs b/crates/envy/src/lib.rs deleted file mode 100644 index 75835d9..0000000 --- a/crates/envy/src/lib.rs +++ /dev/null @@ -1,560 +0,0 @@ -//! Envy is a library for deserializing environment variables into typesafe structs -//! -//! # Examples -//! -//! A typical usecase for envy is deserializing configuration store in an process' environment into a struct -//! whose fields map to the names of env vars. -//! -//! Serde makes it easy to provide a deserializable struct with its [deriveable Deserialize](https://serde.rs/derive.html) -//! procedural macro. -//! -//! Simply ask for an instance of that struct from envy's `from_env` function. -//! -//! ```no_run -//! use serde::Deserialize; -//! -//! #[derive(Deserialize, Debug)] -//! struct Config { -//! foo: u16, -//! bar: bool, -//! baz: String, -//! boom: Option, -//! } -//! -//! match envy::from_env::() { -//! Ok(config) => println!("{:#?}", config), -//! Err(error) => eprintln!("{:#?}", error), -//! } -//! ``` -//! -//! Special treatment is given to collections. For config fields that store a `Vec` of values, -//! use an env var that uses a comma separated value. -//! -//! All serde modifiers should work as is. -//! -//! Enums with unit variants can be used as values: -//! -//! ```no_run -//! # use serde::Deserialize; -//! -//! #[derive(Deserialize, Debug, PartialEq)] -//! #[serde(rename_all = "lowercase")] -//! pub enum Size { -//! Small, -//! Medium, -//! Large, -//! } -//! -//! #[derive(Deserialize, Debug)] -//! struct Config { -//! size: Size, -//! } -//! -//! // set env var for size as `SIZE=medium` -//! match envy::from_env::() { -//! Ok(config) => println!("{:#?}", config), -//! Err(error) => eprintln!("{:#?}", error), -//! } -//! ``` - -use serde::de::{ - self, - value::{MapDeserializer, SeqDeserializer}, - IntoDeserializer, -}; -use std::{ - borrow::Cow, - env, - iter::{empty, IntoIterator}, -}; - -// Ours -mod error; -pub use crate::error::Error; - -/// A type result type specific to `envy::Errors` -pub type Result = std::result::Result; - -struct Vars(Iter) -where - Iter: IntoIterator; - -struct Val(String, String); - -impl<'de> IntoDeserializer<'de, Error> for Val { - type Deserializer = Self; - - fn into_deserializer(self) -> Self::Deserializer { - self - } -} - -struct VarName(String); - -impl<'de> IntoDeserializer<'de, Error> for VarName { - type Deserializer = Self; - - fn into_deserializer(self) -> Self::Deserializer { - self - } -} - -impl> Iterator for Vars { - type Item = (VarName, Val); - - fn next(&mut self) -> Option { - self.0 - .next() - .map(|(k, v)| (VarName(k.to_lowercase()), Val(k, v))) - } -} - -macro_rules! forward_parsed_values { - ($($ty:ident => $method:ident,)*) => { - $( - fn $method(self, visitor: V) -> Result - where V: de::Visitor<'de> - { - match self.1.parse::<$ty>() { - Ok(val) => val.into_deserializer().$method(visitor), - Err(e) => Err(de::Error::custom(format_args!("{} while parsing value '{}' provided by {}", e, self.1, self.0))) - } - } - )* - } -} - -impl<'de> de::Deserializer<'de> for Val { - type Error = Error; - fn deserialize_any(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.1.into_deserializer().deserialize_any(visitor) - } - - fn deserialize_seq(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - // std::str::split doesn't work as expected for our use case: when we - // get an empty string we want to produce an empty Vec, but split would - // still yield an iterator with an empty string in it. So we need to - // special case empty strings. - if self.1.is_empty() { - SeqDeserializer::new(empty::()).deserialize_seq(visitor) - } else { - let values = self - .1 - .split(',') - .map(|v| Val(self.0.clone(), v.trim().to_owned())); - SeqDeserializer::new(values).deserialize_seq(visitor) - } - } - - fn deserialize_option(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - if self.1.is_empty() { - visitor.visit_none() - } else { - visitor.visit_some(self) - } - } - - forward_parsed_values! { - u8 => deserialize_u8, - u16 => deserialize_u16, - u32 => deserialize_u32, - u64 => deserialize_u64, - u128 => deserialize_u128, - i8 => deserialize_i8, - i16 => deserialize_i16, - i32 => deserialize_i32, - i64 => deserialize_i64, - i128 => deserialize_i128, - f32 => deserialize_f32, - f64 => deserialize_f64, - } - - fn deserialize_bool(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - if self.1 == "1" || self.1.eq_ignore_ascii_case("true") { - visitor.visit_bool(true) - } else if self.1 == "0" || self.0.eq_ignore_ascii_case("false") { - visitor.visit_bool(false) - } else { - Err(de::Error::custom(format_args!( - "error parsing boolean value: '{}'", - self.1 - ))) - } - } - - #[inline] - fn deserialize_newtype_struct(self, _: &'static str, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - fn deserialize_enum( - self, - _name: &'static str, - _variants: &'static [&'static str], - visitor: V, - ) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_enum(self.1.into_deserializer()) - } - - serde::forward_to_deserialize_any! { - char str string unit - bytes byte_buf map unit_struct tuple_struct - identifier tuple ignored_any - struct - } -} - -impl<'de> de::Deserializer<'de> for VarName { - type Error = Error; - fn deserialize_any(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.0.into_deserializer().deserialize_any(visitor) - } - - #[inline] - fn deserialize_newtype_struct(self, _: &'static str, visitor: V) -> Result - where - V: serde::de::Visitor<'de>, - { - visitor.visit_newtype_struct(self) - } - - serde::forward_to_deserialize_any! { - char str string unit seq option - bytes byte_buf map unit_struct tuple_struct - identifier tuple ignored_any enum - struct bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 - } -} - -/// A deserializer for env vars -struct Deserializer<'de, Iter: Iterator> { - inner: MapDeserializer<'de, Vars, Error>, -} - -impl<'de, Iter: Iterator> Deserializer<'de, Iter> { - fn new(vars: Iter) -> Self { - Deserializer { - inner: MapDeserializer::new(Vars(vars)), - } - } -} - -impl<'de, Iter: Iterator> de::Deserializer<'de> - for Deserializer<'de, Iter> -{ - type Error = Error; - fn deserialize_any(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - self.deserialize_map(visitor) - } - - fn deserialize_map(self, visitor: V) -> Result - where - V: de::Visitor<'de>, - { - visitor.visit_map(self.inner) - } - - serde::forward_to_deserialize_any! { - bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq - bytes byte_buf unit_struct tuple_struct - identifier tuple ignored_any option newtype_struct enum - struct - } -} - -/// Deserializes a type based on information stored in env variables -pub fn from_env() -> Result -where - T: de::DeserializeOwned, -{ - from_iter(env::vars()) -} - -/// Deserializes a type based on an iterable of `(String, String)` -/// representing keys and values -pub fn from_iter(iter: Iter) -> Result -where - T: de::DeserializeOwned, - Iter: IntoIterator, -{ - T::deserialize(Deserializer::new(iter.into_iter())).map_err(|error| match error { - Error::MissingValue(value) => Error::MissingValue(value.to_uppercase()), - _ => error, - }) -} - -/// A type which filters env vars with a prefix for use as serde field inputs -/// -/// These types are created with with the [prefixed](fn.prefixed.html) module function -pub struct Prefixed<'a>(Cow<'a, str>); - -impl<'a> Prefixed<'a> { - /// Deserializes a type based on prefixed env variables - pub fn from_env(&self) -> Result - where - T: de::DeserializeOwned, - { - self.from_iter(env::vars()) - } - - /// Deserializes a type based on prefixed (String, String) tuples - pub fn from_iter(&self, iter: Iter) -> Result - where - T: de::DeserializeOwned, - Iter: IntoIterator, - { - crate::from_iter(iter.into_iter().filter_map(|(k, v)| { - if k.starts_with(self.0.as_ref()) { - Some((k.trim_start_matches(self.0.as_ref()).to_owned(), v)) - } else { - None - } - })) - .map_err(|error| match error { - Error::MissingValue(value) => Error::MissingValue( - format!("{prefix}{value}", prefix = self.0, value = value).to_uppercase(), - ), - _ => error, - }) - } -} - -/// Produces a instance of `Prefixed` for prefixing env variable names -/// -/// # Example -/// -/// ```no_run -/// use serde::Deserialize; -/// -/// #[derive(Deserialize, Debug)] -/// struct Config { -/// foo: u16, -/// bar: bool, -/// baz: String, -/// boom: Option, -/// } -/// -/// // all env variables will be expected to be prefixed with APP_ -/// // i.e. APP_FOO, APP_BAR, ect -/// match envy::prefixed("APP_").from_env::() { -/// Ok(config) => println!("{:#?}", config), -/// Err(error) => eprintln!("{:#?}", error), -/// } -/// ``` -pub fn prefixed<'a, C>(prefix: C) -> Prefixed<'a> -where - C: Into>, -{ - Prefixed(prefix.into()) -} - -#[cfg(test)] -mod tests { - use super::*; - use serde::Deserialize; - use std::collections::HashMap; - - #[derive(Default, Deserialize, Debug, PartialEq)] - #[serde(rename_all = "lowercase")] - pub enum Size { - Small, - #[default] - Medium, - Large, - } - - pub fn default_kaboom() -> u16 { - 8080 - } - - #[derive(Deserialize, Debug, PartialEq)] - pub struct CustomNewType(u32); - - #[derive(Deserialize, Debug, PartialEq)] - pub struct Foo { - bar: String, - baz: bool, - zoom: Option, - doom: Vec, - boom: Vec, - #[serde(default = "default_kaboom")] - kaboom: u16, - #[serde(default)] - debug_mode: bool, - #[serde(default)] - size: Size, - provided: Option, - newtype: CustomNewType, - } - - #[test] - fn deserialize_from_iter() { - let data = vec![ - (String::from("BAR"), String::from("test")), - (String::from("BAZ"), String::from("true")), - (String::from("DOOM"), String::from("1, 2, 3 ")), - // Empty string should result in empty vector. - (String::from("BOOM"), String::from("")), - (String::from("SIZE"), String::from("small")), - (String::from("PROVIDED"), String::from("test")), - (String::from("NEWTYPE"), String::from("42")), - ]; - match from_iter::<_, Foo>(data) { - Ok(actual) => assert_eq!( - actual, - Foo { - bar: String::from("test"), - baz: true, - zoom: None, - doom: vec![1, 2, 3], - boom: vec![], - kaboom: 8080, - debug_mode: false, - size: Size::Small, - provided: Some(String::from("test")), - newtype: CustomNewType(42) - } - ), - Err(e) => panic!("{:#?}", e), - } - } - - #[test] - fn fails_with_missing_value() { - let data = vec![ - (String::from("BAR"), String::from("test")), - (String::from("BAZ"), String::from("true")), - ]; - match from_iter::<_, Foo>(data) { - Ok(_) => panic!("expected failure"), - Err(e) => assert_eq!(e, Error::MissingValue("DOOM".into())), - } - } - - #[test] - fn prefixed_fails_with_missing_value() { - let data = vec![ - (String::from("PREFIX_BAR"), String::from("test")), - (String::from("PREFIX_BAZ"), String::from("true")), - ]; - - match prefixed("PREFIX_").from_iter::<_, Foo>(data) { - Ok(_) => panic!("expected failure"), - Err(e) => assert_eq!(e, Error::MissingValue("PREFIX_DOOM".into())), - } - } - - #[test] - fn fails_with_invalid_type() { - let data = vec![ - (String::from("BAR"), String::from("test")), - (String::from("BAZ"), String::from("notabool")), - (String::from("DOOM"), String::from("1,2,3")), - ]; - match from_iter::<_, Foo>(data) { - Ok(_) => panic!("expected failure"), - Err(e) => assert_eq!( - e, - Error::Custom(String::from("provided string was not `true` or `false` while parsing value \'notabool\' provided by BAZ")) - ), - } - } - - #[test] - fn deserializes_from_prefixed_fieldnames() { - let data = vec![ - (String::from("APP_BAR"), String::from("test")), - (String::from("APP_BAZ"), String::from("true")), - (String::from("APP_DOOM"), String::from("")), - (String::from("APP_BOOM"), String::from("4,5")), - (String::from("APP_SIZE"), String::from("small")), - (String::from("APP_PROVIDED"), String::from("test")), - (String::from("APP_NEWTYPE"), String::from("42")), - ]; - match prefixed("APP_").from_iter::<_, Foo>(data) { - Ok(actual) => assert_eq!( - actual, - Foo { - bar: String::from("test"), - baz: true, - zoom: None, - doom: vec![], - boom: vec!["4".to_string(), "5".to_string()], - kaboom: 8080, - debug_mode: false, - size: Size::Small, - provided: Some(String::from("test")), - newtype: CustomNewType(42) - } - ), - Err(e) => panic!("{:#?}", e), - } - } - - #[test] - fn prefixed_strips_prefixes() { - let mut expected = HashMap::new(); - expected.insert("foo".to_string(), "bar".to_string()); - assert_eq!( - prefixed("PRE_").from_iter(vec![("PRE_FOO".to_string(), "bar".to_string())]), - Ok(expected) - ); - } - - #[test] - fn prefixed_doesnt_parse_non_prefixed() { - let mut expected = HashMap::new(); - expected.insert("foo".to_string(), 12); - assert_eq!( - prefixed("PRE_").from_iter(vec![ - ("FOO".to_string(), "asd".to_string()), - ("PRE_FOO".to_string(), "12".to_string()) - ]), - Ok(expected) - ); - } - - #[test] - fn deserialize_optional() { - #[derive(Deserialize)] - #[serde(default)] - struct X { - val: Option, - } - - impl Default for X { - fn default() -> Self { - Self { val: Some(123) } - } - } - - let data = vec![(String::from("VAL"), String::from(""))]; - - let res = from_iter::<_, X>(data).unwrap(); - assert_eq!(res.val, None) - } -} diff --git a/resources/icon.opt.svg b/resources/icon.opt.svg deleted file mode 100644 index 63d9a3e..0000000 --- a/resources/icon.opt.svg +++ /dev/null @@ -1 +0,0 @@ - diff --git a/resources/icon.svg b/resources/icon.svg deleted file mode 100644 index 718ebc4..0000000 --- a/resources/icon.svg +++ /dev/null @@ -1,56 +0,0 @@ - - - - - - - - - - - - - diff --git a/src/app.rs b/src/app.rs deleted file mode 100644 index 27980bf..0000000 --- a/src/app.rs +++ /dev/null @@ -1,410 +0,0 @@ -use std::{ops::Bound, path::PathBuf, sync::Arc}; - -use async_zip::tokio::read::ZipEntryReader; -use axum::{ - body::Body, - extract::{Host, Request, State}, - http::{Response, Uri}, - response::{IntoResponse, Redirect}, - routing::{any, get, post}, - Form, Json, Router, -}; -use headers::HeaderMapExt; -use http::{HeaderMap, StatusCode}; -use serde::Deserialize; -use tokio::{ - fs::File, - io::{AsyncBufReadExt, AsyncReadExt, BufReader}, -}; -use tokio_util::{ - compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt}, - io::ReaderStream, -}; -use tower_http::trace::{DefaultOnResponse, TraceLayer}; - -use crate::{ - artifact_api::{Artifact, ArtifactApi, ArtifactOrRun}, - cache::{Cache, CacheEntry, GetEntryResult, GetFileResult, GetFileResultFile, IndexEntry}, - config::Config, - error::{Error, Result}, - gzip_reader::{PrecompressedGzipReader, GZIP_EXTRA_LEN}, - query::Query, - templates::{self, LinkItem}, - util::{self, InsertTypedHeader}, - App, -}; - -#[derive(Clone)] -struct AppState { - i: Arc, -} - -struct AppInner { - cfg: Config, - cache: Cache, - api: ArtifactApi, -} - -impl Default for App { - fn default() -> Self { - Self::new() - } -} - -#[derive(Deserialize)] -struct UrlForm { - url: String, -} - -impl App { - pub fn new() -> Self { - Self - } - - fn new_state(&self) -> AppState { - AppState::new() - } - - pub async fn run(&self) -> Result<()> { - let address = "0.0.0.0:3000"; - let listener = tokio::net::TcpListener::bind(address).await?; - tracing::info!("Listening on http://{address}"); - - let router = Router::new() - // Prevent search indexing since artifactview serves temporary artifacts - .route( - "/robots.txt", - get(|| async { "User-agent: *\nDisallow: /\n" }), - ) - // Put the API in the .well-known folder, since it is disabled for pages - .route("/.well-known/api/artifacts", get(Self::get_artifacts)) - .route("/.well-known/api/artifact", get(Self::get_artifact)) - .route("/.well-known/api/files", get(Self::get_files)) - // Prevent access to the .well-known folder since it enables abuse - // (e.g. SSL certificate registration by an attacker) - .route("/.well-known/*path", any(|| async { Error::Inaccessible })) - // Serve artifact pages - .route("/", get(Self::get_page)) - .route("/", post(Self::post_homepage)) - .fallback(get(Self::get_page)) - .with_state(self.new_state()) - // Log requests - .layer( - TraceLayer::new_for_http() - .make_span_with(|request: &Request| { - tracing::error_span!("request", url = util::full_url_from_request(request),) - }) - .on_response(DefaultOnResponse::new().level(tracing::Level::INFO)), - ); - axum::serve(listener, router).await?; - Ok(()) - } - - async fn get_page( - State(state): State, - Host(host): Host, - uri: Uri, - request: Request, - ) -> Result> { - let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - - if subdomain.is_empty() { - // Main page - if uri.path() != "/" { - return Err(Error::NotFound("path".into())); - } - Ok(Response::builder() - .typed_header(headers::ContentType::html()) - .body(templates::Index::default().to_string().into())?) - } else { - let query = Query::from_subdomain(subdomain)?; - let path = percent_encoding::percent_decode_str(uri.path()).decode_utf8_lossy(); - let hdrs = request.headers(); - - let res = state.i.cache.get_entry(&state.i.api, &query).await?; - match res { - GetEntryResult::Entry { entry, zip_path } => { - match entry.get_file(&path, uri.query().unwrap_or_default())? { - GetFileResult::File(res) => { - Self::serve_artifact_file(state, entry, zip_path, res, hdrs).await - } - GetFileResult::Listing(listing) => { - if !path.ends_with('/') { - return Ok(Redirect::to(&format!("{path}/")).into_response()); - } - - // TODO: store actual artifact names - let artifact_name = format!("A{}", query.artifact.unwrap()); - - let mut path_components = vec![ - LinkItem { - name: query.shortid(), - url: state - .i - .cfg - .url_with_subdomain(&query.subdomain_with_artifact(None)), - }, - LinkItem { - name: artifact_name.to_owned(), - url: "/".to_string(), - }, - ]; - let mut buf = String::new(); - for s in path.split('/').filter(|s| !s.is_empty()) { - buf.push('/'); - buf += s; - path_components.push(LinkItem { - name: s.to_owned(), - url: buf.clone(), - }); - } - - let tmpl = templates::Listing { - main_url: state.i.cfg.main_url(), - version: templates::Version, - artifact_name: &artifact_name, - path_components, - n_dirs: listing.n_dirs, - n_files: listing.n_files, - has_parent: listing.has_parent, - entries: listing.entries, - }; - - Ok(Response::builder() - .typed_header(headers::ContentType::html()) - .body(tmpl.to_string().into())?) - } - } - } - GetEntryResult::Artifacts(artifacts) => { - if uri.path() != "/" { - return Err(Error::NotFound("path".into())); - } - if artifacts.is_empty() { - return Err(Error::NotFound("artifacts".into())); - } - let tmpl = templates::Selection { - main_url: state.i.cfg.main_url(), - run_url: &query.forge_url(), - run_name: &query.shortid(), - artifacts: artifacts - .into_iter() - .map(|a| LinkItem::from_artifact(a, &query, &state.i.cfg)) - .collect(), - }; - Ok(Response::builder() - .typed_header(headers::ContentType::html()) - .body(tmpl.to_string().into())?) - } - } - } - } - - async fn post_homepage( - State(state): State, - Host(host): Host, - Form(url): Form, - ) -> Result { - let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - - if subdomain.is_empty() { - let query = Query::from_forge_url(&url.url)?; - let subdomain = query.subdomain(); - let target = format!( - "{}{}.{}", - state.i.cfg.url_proto(), - subdomain, - state.i.cfg.load().root_domain - ); - Ok(Redirect::to(&target)) - } else { - Err(Error::MethodNotAllowed) - } - } - - async fn serve_artifact_file( - state: AppState, - entry: Arc, - zip_path: PathBuf, - res: GetFileResultFile, - hdrs: &HeaderMap, - ) -> Result> { - let file = res.file; - - // Dont serve files above the configured size limit - let lim = state.i.cfg.load().max_file_size; - if lim.is_some_and(|lim| file.uncompressed_size > lim) { - return Err(Error::BadRequest( - format!( - "file too large (size: {}, limit: {})", - file.uncompressed_size, - lim.unwrap() - ) - .into(), - )); - } - - let mut resp = Response::builder() - .status(res.status) - .typed_header(headers::AcceptRanges::bytes()); - if let Some(mime) = res.mime { - resp = resp.typed_header(headers::ContentType::from(mime)); - } - if let Some(last_mod) = entry.last_modified { - resp = resp.typed_header(headers::LastModified::from(last_mod)); - } - - // handle if-(un)modified queries - if let Some(modified) = entry.last_modified { - if let Some(if_unmodified_since) = hdrs.typed_get::() { - if !if_unmodified_since.precondition_passes(modified) { - return Ok(resp - .status(StatusCode::PRECONDITION_FAILED) - .body(Body::empty())?); - } - } - if let Some(if_modified_since) = hdrs.typed_get::() { - if !if_modified_since.is_modified(modified) { - return Ok(resp.status(StatusCode::NOT_MODIFIED).body(Body::empty())?); - } - } - } - - let zip_file = File::open(&zip_path).await?; - let range = hdrs.typed_get::(); - - if matches!(file.compression, async_zip::Compression::Deflate) - && range.is_none() - && util::accepts_gzip(hdrs) - { - // Read compressed file - let reader = PrecompressedGzipReader::new(zip_file, &file).await?; - resp = resp - .typed_header(headers::ContentLength( - u64::from(file.compressed_size) + GZIP_EXTRA_LEN, - )) - .typed_header(headers::ContentEncoding::gzip()); - - Ok(resp.body(Body::from_stream(ReaderStream::new(reader)))?) - } else { - // Read decompressed file - let mut zip_reader = BufReader::new(zip_file); - util::seek_to_data_offset(&mut zip_reader, file.header_offset.into()).await?; - let reader = ZipEntryReader::new_with_owned( - zip_reader.compat(), - file.compression, - file.compressed_size.into(), - ); - - if let Some(rheader) = range { - let total_len = u64::from(file.uncompressed_size); - let mut ranges = rheader.satisfiable_ranges(total_len); - if let Some(range) = ranges.next() { - if ranges.next().is_some() { - return Err(Error::BadRequest( - "multipart ranges are not implemented".into(), - )); - } - let start = match range.0 { - Bound::Included(n) => n, - Bound::Excluded(n) => n + 1, - Bound::Unbounded => 0, - }; - let end = match range.1 { - Bound::Included(n) => n + 1, - Bound::Excluded(n) => n, - Bound::Unbounded => total_len, - }; - - let mut bufreader = tokio::io::BufReader::new(reader.compat()); - - // Advance the BufReader by the parsed offset - let mut to_consume = usize::try_from(start)?; - while to_consume > 0 { - let take = bufreader.fill_buf().await?.len().min(to_consume); - bufreader.consume(take); - to_consume -= take; - } - - let content_length = end - start; - - return Ok(resp - .status(StatusCode::PARTIAL_CONTENT) - .typed_header(headers::ContentLength(content_length)) - .typed_header( - headers::ContentRange::bytes(range, total_len) - .map_err(|e| Error::Internal(e.to_string().into()))?, - ) - .body(Body::from_stream(ReaderStream::new( - bufreader.take(content_length), - )))?); - } - } - Ok(resp - .typed_header(headers::ContentLength(file.uncompressed_size.into())) - .body(Body::from_stream(ReaderStream::new(reader.compat())))?) - } - } - - /// API endpoint to list artifacts of a CI run - async fn get_artifacts( - State(state): State, - Host(host): Host, - ) -> Result>> { - let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - let query = Query::from_subdomain(subdomain)?; - let artifacts = state.i.api.list(&query).await?; - Ok(Json(artifacts)) - } - - /// API endpoint to get the metadata of the current artifact - async fn get_artifact( - State(state): State, - Host(host): Host, - ) -> Result> { - let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - let query = Query::from_subdomain(subdomain)?; - - if query.artifact.is_none() { - return Err(Error::BadRequest("no artifact specified".into())); - } - - let artifact = state.i.api.fetch(&query).await?; - match artifact { - ArtifactOrRun::Artifact(artifact) => Ok(Json(artifact)), - ArtifactOrRun::Run(_) => unreachable!(), - } - } - - /// API endpoint to get a file listing - async fn get_files( - State(state): State, - Host(host): Host, - ) -> Result>> { - let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - let query = Query::from_subdomain(subdomain)?; - - if query.artifact.is_none() { - return Err(Error::BadRequest("no artifact specified".into())); - } - - let res = state.i.cache.get_entry(&state.i.api, &query).await?; - let entry = match res { - GetEntryResult::Entry { entry, .. } => entry, - GetEntryResult::Artifacts(_) => unreachable!(), - }; - let files = entry.get_files(); - Ok(Json(files)) - } -} - -impl AppState { - pub fn new() -> Self { - let cfg = Config::default(); - let cache = Cache::new(cfg.clone()); - let api = ArtifactApi::new(cfg.clone()); - Self { - i: Arc::new(AppInner { cfg, cache, api }), - } - } -} diff --git a/src/artifact_api.rs b/src/artifact_api.rs index e49cafa..c8ee33d 100644 --- a/src/artifact_api.rs +++ b/src/artifact_api.rs @@ -1,16 +1,10 @@ //! API-Client to fetch CI artifacts from Github and Forgejo -use std::{fs::File, io::Cursor, path::Path}; - -use http::header; -use reqwest::{Client, ClientBuilder, IntoUrl, RequestBuilder, Url}; +use anyhow::{anyhow, Result}; +use reqwest::{header, Client, ClientBuilder, IntoUrl, RequestBuilder}; use serde::{Deserialize, Serialize}; -use crate::{ - config::Config, - error::{Error, Result}, - query::Query, -}; +use crate::{config::Config, query::Query}; pub struct ArtifactApi { http: Client, @@ -26,11 +20,6 @@ pub struct Artifact { pub download_url: String, } -pub enum ArtifactOrRun { - Artifact(Artifact), - Run(Vec), -} - #[derive(Deserialize)] struct GithubArtifact { id: u64, @@ -72,7 +61,7 @@ impl From for Artifact { } impl ForgejoArtifact { - fn into_artifact(self, id: u64, query: &Query) -> Artifact { + fn to_artifact(self, id: u64, query: &Query) -> Artifact { Artifact { download_url: format!( "https://{}/{}/{}/actions/runs/{}/artifacts/{}", @@ -103,76 +92,26 @@ impl ArtifactApi { pub async fn list(&self, query: &Query) -> Result> { if query.is_github() { - self.list_github(query).await - } else { self.list_forgejo(query).await + } else { + self.list_github(query).await } } - pub async fn fetch(&self, query: &Query) -> Result { + pub async fn fetch(&self, query: &Query) -> Result { if query.is_github() { self.fetch_github(query).await } else { // Forgejo currently has no API for fetching single artifacts let mut artifacts = self.list_forgejo(query).await?; - - match query.artifact { - Some(artifact) => { - let i = usize::try_from(artifact)?; - if i == 0 || i > artifacts.len() { - return Err(Error::NotFound("artifact".into())); - } - Ok(ArtifactOrRun::Artifact(artifacts.swap_remove(i - 1))) - } - None => Ok(ArtifactOrRun::Run(artifacts)), + let i = usize::try_from(query.artifact)?; + if i == 0 || i > artifacts.len() { + return Err(anyhow!("Artifact not found")); } + Ok(artifacts.swap_remove(i - 1)) } } - pub async fn download(&self, artifact: &Artifact, path: &Path) -> Result<()> { - if artifact.expired { - return Err(Error::Expired); - } - - let lim = self.cfg.load().max_artifact_size; - let check_lim = |size: u64| { - if lim.is_some_and(|lim| u32::try_from(size).map(|size| size > lim).unwrap_or(true)) { - Err(Error::BadRequest( - format!( - "artifact too large (size: {}, limit: {})", - artifact.size, - lim.unwrap() - ) - .into(), - )) - } else { - Ok(()) - } - }; - check_lim(artifact.size)?; - - let url = Url::parse(&artifact.download_url)?; - let req = if url.domain() == Some("api.github.com") { - self.get_github(url) - } else { - self.http.get(url) - }; - - let resp = req.send().await?.error_for_status()?; - - if let Some(act_len) = resp.content_length() { - check_lim(act_len)?; - } - - let tmp_path = path.with_extension(format!("tmp.{:x}", rand::random::())); - let mut file = File::create(&tmp_path)?; - let mut content = Cursor::new(resp.bytes().await?); - std::io::copy(&mut content, &mut file)?; - std::fs::rename(&tmp_path, path)?; - tracing::info!("Downloaded artifact from {}", artifact.download_url); - Ok(()) - } - async fn list_forgejo(&self, query: &Query) -> Result> { let url = format!( "https://{}/{}/{}/actions/runs/{}/artifacts", @@ -192,7 +131,7 @@ impl ArtifactApi { .artifacts .into_iter() .enumerate() - .map(|(i, a)| a.into_artifact(i as u64 + 1, query)) + .map(|(i, a)| a.to_artifact(i as u64 + 1, query)) .collect::>(); Ok(artifacts) @@ -215,25 +154,21 @@ impl ArtifactApi { Ok(resp.artifacts.into_iter().map(Artifact::from).collect()) } - async fn fetch_github(&self, query: &Query) -> Result { - match query.artifact { - Some(artifact) => { - let url = format!( - "https://api.github.com/repos/{}/{}/actions/artifacts/{}", - query.user, query.repo, artifact - ); + async fn fetch_github(&self, query: &Query) -> Result { + let url = format!( + "https://api.github.com/repos/{}/{}/actions/artifacts/{}", + query.user, query.repo, query.artifact + ); - let artifact = self - .get_github(url) - .send() - .await? - .error_for_status()? - .json::() - .await?; - Ok(ArtifactOrRun::Artifact(artifact.into())) - } - None => Ok(ArtifactOrRun::Run(self.list_github(query).await?)), - } + let artifact = self + .get_github(url) + .send() + .await? + .error_for_status()? + .json::() + .await?; + + Ok(artifact.into()) } fn get_github(&self, url: U) -> RequestBuilder { @@ -250,7 +185,7 @@ impl ArtifactApi { mod tests { use crate::{config::Config, query::Query}; - use super::{ArtifactApi, ArtifactOrRun}; + use super::ArtifactApi; #[tokio::test] async fn fetch_forgejo() { @@ -259,22 +194,14 @@ mod tests { user: "HSA".to_owned(), repo: "Visitenbuch".to_owned(), run: 32, - artifact: Some(1), + artifact: 1, }; let api = ArtifactApi::new(Config::default()); let res = api.fetch(&query).await.unwrap(); - - if let ArtifactOrRun::Artifact(res) = res { - assert_eq!(res.name, "playwright-report"); - assert_eq!( - res.download_url, - "https://code.thetadev.de/HSA/Visitenbuch/actions/runs/32/artifacts/playwright-report" - ); - assert_eq!(res.id, 1); - assert_eq!(res.size, 574292); - } else { - panic!("got run"); - } + assert_eq!(res.name, "playwright-report"); + assert_eq!(res.download_url, "https://code.thetadev.de/HSA/Visitenbuch/actions/runs/32/artifacts/playwright-report"); + assert_eq!(res.id, 1); + assert_eq!(res.size, 574292); } #[tokio::test] @@ -284,21 +211,13 @@ mod tests { user: "actions".to_owned(), repo: "upload-artifact".to_owned(), run: 8805345396, - artifact: Some(1440556464), + artifact: 1440556464, }; let api = ArtifactApi::new(Config::default()); let res = api.fetch(&query).await.unwrap(); - - if let ArtifactOrRun::Artifact(res) = res { - assert_eq!(res.name, "Artifact-Wildcard-macos-latest"); - assert_eq!( - res.download_url, - "https://api.github.com/repos/actions/upload-artifact/actions/artifacts/1440556464/zip" - ); - assert_eq!(res.id, 1440556464); - assert_eq!(res.size, 334); - } else { - panic!("got run"); - } + assert_eq!(res.name, "Artifact-Wildcard-macos-latest"); + assert_eq!(res.download_url, "https://api.github.com/repos/actions/upload-artifact/actions/artifacts/1440556464/zip"); + assert_eq!(res.id, 1440556464); + assert_eq!(res.size, 334); } } diff --git a/src/cache.rs b/src/cache.rs deleted file mode 100644 index 0c5a5bf..0000000 --- a/src/cache.rs +++ /dev/null @@ -1,317 +0,0 @@ -use std::{ - borrow::Cow, - collections::{BTreeMap, HashMap}, - path::{Path, PathBuf}, - sync::Arc, - time::{Duration, SystemTime}, -}; - -use async_zip::{tokio::read::fs::ZipFileReader, Compression}; -use http::StatusCode; -use mime::Mime; -use path_macro::path; -use quick_cache::sync::Cache as QuickCache; -use serde::Serialize; -use serde_hex::{SerHex, Strict}; - -use crate::{ - artifact_api::{Artifact, ArtifactApi, ArtifactOrRun}, - config::Config, - error::{Error, Result}, - query::Query, - util, -}; - -pub struct Cache { - cfg: Config, - qc: QuickCache<[u8; 16], Arc>, -} - -pub struct CacheEntry { - pub files: HashMap, - pub last_modified: Option, -} - -#[derive(Clone)] -pub struct FileEntry { - pub header_offset: u32, - pub uncompressed_size: u32, - pub compressed_size: u32, - pub crc32: u32, - pub compression: Compression, -} - -pub enum GetEntryResult { - Entry { - entry: Arc, - zip_path: PathBuf, - }, - Artifacts(Vec), -} - -pub enum GetFileResult { - File(GetFileResultFile), - Listing(Listing), -} - -pub struct GetFileResultFile { - pub file: FileEntry, - pub mime: Option, - pub status: StatusCode, -} - -#[derive(Serialize)] -pub struct IndexEntry { - pub name: String, - pub size: u32, - #[serde(with = "SerHex::")] - pub crc32: u32, -} - -pub struct Listing { - pub entries: Vec, - pub n_files: usize, - pub n_dirs: usize, - pub has_parent: bool, -} - -pub struct ListingEntry { - pub name: String, - pub url: String, - pub size: u32, - pub crc32: String, - pub is_dir: bool, -} - -impl Cache { - pub fn new(cfg: Config) -> Self { - Self { - cfg, - qc: QuickCache::new(50), - } - } - - pub fn get_path(&self, query: &Query) -> PathBuf { - path!(self.cfg.load().cache_dir / format!("{}.zip", hex::encode(query.siphash()))) - } - - pub async fn get_entry(&self, api: &ArtifactApi, query: &Query) -> Result { - if query.artifact.is_some() { - let hash = query.siphash(); - let zip_path = path!(self.cfg.load().cache_dir / format!("{}.zip", hex::encode(hash))); - if !zip_path.is_file() { - let artifact = api.fetch(query).await?; - let artifact = match artifact { - ArtifactOrRun::Artifact(artifact) => artifact, - ArtifactOrRun::Run(_) => unreachable!(), - }; - api.download(&artifact, &zip_path).await?; - } - - let timeout = self - .cfg - .load() - .zip_timeout_ms - .map(|t| Duration::from_millis(t.into())); - let mut entry = self - .qc - .get_or_insert_async(&hash, async { - Ok::<_, Error>(Arc::new(CacheEntry::new(&zip_path, timeout).await?)) - }) - .await?; - - // Verify if the cached entry is fresh - let meta = tokio::fs::metadata(&zip_path).await?; - if meta.modified().ok() != entry.last_modified { - tracing::info!("cached file {zip_path:?} changed"); - entry = Arc::new(CacheEntry::new(&zip_path, timeout).await?); - self.qc.insert(hash, entry.clone()); - } - Ok(GetEntryResult::Entry { entry, zip_path }) - } else { - let run = api.fetch(query).await?; - let artifacts = match run { - ArtifactOrRun::Artifact(_) => unreachable!(), - ArtifactOrRun::Run(run) => run, - }; - - Ok(GetEntryResult::Artifacts(artifacts)) - } - } -} - -impl CacheEntry { - async fn new(zip_path: &Path, timeout: Option) -> Result { - let meta = tokio::fs::metadata(&zip_path).await?; - let zip_fut = ZipFileReader::new(&zip_path); - let zip = match timeout { - Some(timeout) => tokio::time::timeout(timeout, zip_fut).await??, - None => zip_fut.await?, - }; - - Ok(Self { - files: zip - .file() - .entries() - .iter() - .filter_map(|entry| { - Some(( - entry.filename().as_str().ok()?.to_owned(), - FileEntry { - header_offset: entry.header_offset().try_into().ok()?, - uncompressed_size: entry.uncompressed_size().try_into().ok()?, - compressed_size: entry.compressed_size().try_into().ok()?, - crc32: entry.crc32(), - compression: entry.compression(), - }, - )) - }) - .collect(), - last_modified: meta.modified().ok(), - }) - } - - pub fn get_file(&self, path: &str, url_query: &str) -> Result { - let path = path.trim_start_matches('/'); - let mut index_path: Option> = None; - - if path.is_empty() { - // Special case: open index.html directly - index_path = Some("index.html".into()); - } - // Attempt to access the following pages - // 1. Site path directly - // 2. Site path + `/index.html` - else if let Some(file) = self.files.get(path) { - return Ok(GetFileResult::File(GetFileResultFile { - file: file.clone(), - mime: util::path_mime(path), - status: StatusCode::OK, - })); - } else if util::site_path_ext(path).is_none() { - index_path = Some(format!("{path}/index.html").into()); - } - - if let Some(file) = index_path - .and_then(|p: Cow| self.files.get(p.as_ref())) - .or_else(|| self.files.get("200.html")) - { - // index.html or SPA entrypoint - return Ok(GetFileResult::File(GetFileResultFile { - file: file.clone(), - mime: Some(mime::TEXT_HTML), - status: StatusCode::OK, - })); - } - - // Directory listing - let path_as_dir: Cow = if path.is_empty() || path.ends_with('/') { - path.into() - } else { - format!("{path}/").into() - }; - if self - .files - .keys() - .any(|n| n.starts_with(path_as_dir.as_ref())) - { - let mut rev = false; - let mut col = b'N'; - for (k, v) in url::form_urlencoded::parse(url_query.as_bytes()) { - if k == "C" && !v.is_empty() { - col = v.as_bytes()[0]; - } else if k == "O" { - rev = v == "D"; - } - } - return Ok(GetFileResult::Listing(self.get_listing( - &path_as_dir, - col, - rev, - ))); - } else if let Some(file) = self.files.get("404.html") { - // Custom 404 error page - return Ok(GetFileResult::File(GetFileResultFile { - file: file.clone(), - mime: Some(mime::TEXT_HTML), - status: StatusCode::NOT_FOUND, - })); - } - - Err(Error::NotFound("requested file".into())) - } - - pub fn get_files(&self) -> Vec { - self.files - .iter() - .map(|(n, entry)| IndexEntry { - name: n.to_owned(), - size: entry.uncompressed_size, - crc32: entry.crc32, - }) - .collect() - } - - fn get_listing(&self, path: &str, col: u8, rev: bool) -> Listing { - let entries = self - .files - .iter() - .filter_map(|(n, entry)| { - n.strip_prefix(path).map(|n| { - let n = n.split_inclusive('/').next().unwrap(); - (n, entry) - }) - }) - .collect::>(); - - // Put directories first - let mut directories = Vec::new(); - let mut files = Vec::new(); - - let entries_iter: Box> = if col == b'N' && rev { - Box::new(entries.into_iter().rev()) - } else { - Box::new(entries.into_iter()) - }; - - for (n, entry) in entries_iter { - if n.ends_with('/') { - directories.push(ListingEntry { - name: n.to_owned(), - url: format!("{n}{path}"), - size: 0, - crc32: "-".to_string(), - is_dir: true, - }); - } else { - files.push(ListingEntry { - name: n.to_owned(), - url: format!("{n}{path}"), - size: entry.uncompressed_size, - crc32: hex::encode(entry.crc32.to_le_bytes()), - is_dir: false, - }); - } - } - - // Sort by size - if col == b'S' { - if rev { - files.sort_by(|a, b| b.size.cmp(&a.size)); - } else { - files.sort_by_key(|f| f.size); - } - } - - let n_dirs = directories.len(); - let n_files = files.len(); - directories.append(&mut files); - - Listing { - entries: directories, - n_dirs, - n_files, - has_parent: !path.is_empty(), - } - } -} diff --git a/src/config.rs b/src/config.rs index e24657d..5e241e8 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,106 +1,35 @@ -use std::{ - path::{Path, PathBuf}, - sync::Arc, -}; +use std::sync::Arc; -use serde::{Deserialize, Serialize}; - -use crate::error::{Error, Result}; +use arc_swap::{ArcSwap, Guard}; #[derive(Clone)] pub struct Config { - inner: Arc, + inner: Arc>, } -struct ConfigInner { - data: ConfigData, - main_url: String, -} - -#[derive(Debug, Serialize, Deserialize)] -#[serde(default)] +#[derive(Default)] pub struct ConfigData { - pub cache_dir: PathBuf, - pub root_domain: String, - pub no_https: bool, - pub max_artifact_size: Option, - pub max_file_size: Option, - pub max_age_h: Option, - pub zip_timeout_ms: Option, pub github_token: Option, } -impl Default for ConfigData { - fn default() -> Self { - Self { - cache_dir: Path::new("/tmp/artifactview").into(), - root_domain: "localhost:3000".to_string(), - no_https: false, - max_artifact_size: Some(100_000_000), - max_file_size: Some(100_000_000), - max_age_h: Some(12), - zip_timeout_ms: Some(1000), - github_token: None, - } - } -} - impl Default for Config { fn default() -> Self { - Self::new().expect("Could not initialize config") - } -} - -impl ConfigData { - fn url_proto(&self) -> &'static str { - if self.no_https { - "http://" - } else { - "https://" - } + Self::new(ConfigData::default()) } } impl Config { - pub fn new() -> Result { - let data = - envy::from_env::().map_err(|e| Error::Internal(e.to_string().into()))?; - Self::from_data(data) + pub fn new(data: ConfigData) -> Self { + Self { + inner: Arc::new(ArcSwap::from_pointee(data)), + } } - pub fn from_data(data: ConfigData) -> Result { - Self::before_update(&data)?; - Ok(Self { - inner: Arc::new(ConfigInner { - main_url: format!("{}{}", data.url_proto(), data.root_domain), - data, - }), - }) + pub fn update(&self, data: ConfigData) { + self.inner.swap(Arc::new(data)); } - fn before_update(data: &ConfigData) -> Result<()> { - std::fs::create_dir_all(&data.cache_dir)?; - Ok(()) - } - - pub fn load(&self) -> &ConfigData { - &self.inner.data - } - - pub fn url_proto(&self) -> &'static str { - self.inner.data.url_proto() - } - - pub fn url_with_subdomain(&self, subdomain: &str) -> String { - format!( - "{}{}.{}", - self.url_proto(), - subdomain, - self.inner.data.root_domain - ) - } - - pub fn main_url(&self) -> &str { - &self.inner.main_url + pub fn load(&self) -> Guard> { + self.inner.load() } } diff --git a/src/error.rs b/src/error.rs deleted file mode 100644 index a00c8ba..0000000 --- a/src/error.rs +++ /dev/null @@ -1,88 +0,0 @@ -use std::borrow::Cow; - -use axum::{ - body::Body, - response::{IntoResponse, Response}, -}; -use http::StatusCode; - -use crate::{templates, util::InsertTypedHeader}; - -pub type Result = core::result::Result; - -#[derive(Debug, thiserror::Error)] -pub enum Error { - #[error("http client error: {0}")] - HttpClient(Cow<'static, str>, StatusCode), - #[error("http server error: {0}")] - Http(#[from] http::Error), - #[error("io: {0}")] - Io(#[from] std::io::Error), - #[error("zip: {0}")] - Zip(#[from] async_zip::error::ZipError), - #[error("internal error: {0}")] - Internal(Cow<'static, str>), - - #[error("invalid request: {0}")] - BadRequest(Cow<'static, str>), - #[error("expected URL format: -------")] - InvalidUrl, - #[error("{0} not found")] - NotFound(Cow<'static, str>), - #[error("this path cannot be accessed for security reasons")] - Inaccessible, - #[error("this artifact has already expired")] - Expired, - #[error("timeout")] - Timeout(#[from] tokio::time::error::Elapsed), - #[error("method not allowed")] - MethodNotAllowed, -} - -impl From for Error { - fn from(value: reqwest::Error) -> Self { - Self::HttpClient( - value.to_string().into(), - value.status().unwrap_or(StatusCode::INTERNAL_SERVER_ERROR), - ) - } -} - -impl From for Error { - fn from(value: std::num::TryFromIntError) -> Self { - Self::Internal(value.to_string().into()) - } -} - -impl From for Error { - fn from(value: url::ParseError) -> Self { - Self::Internal(value.to_string().into()) - } -} - -impl Error { - pub fn status(&self) -> StatusCode { - match self { - Error::BadRequest(_) | Error::InvalidUrl => StatusCode::BAD_REQUEST, - Error::NotFound(_) | Error::Inaccessible | Error::Expired => StatusCode::NOT_FOUND, - Error::HttpClient(_, status) => *status, - Error::MethodNotAllowed => StatusCode::METHOD_NOT_ALLOWED, - _ => StatusCode::INTERNAL_SERVER_ERROR, - } - } -} - -impl IntoResponse for Error { - fn into_response(self) -> axum::response::Response { - let status = self.status(); - let tmpl = templates::Error { - msg: &self.to_string(), - status: status.as_u16(), - }; - Response::builder() - .status(self.status()) - .typed_header(headers::ContentType::html()) - .body(Body::from(tmpl.to_string())) - .unwrap() - } -} diff --git a/src/gzip_reader.rs b/src/gzip_reader.rs deleted file mode 100644 index 9bbc6fb..0000000 --- a/src/gzip_reader.rs +++ /dev/null @@ -1,65 +0,0 @@ -use std::task::Poll; - -use pin_project::pin_project; -use tokio::{ - fs::File, - io::{AsyncRead, AsyncReadExt, BufReader, Take}, -}; -use tokio_util::bytes::BufMut; - -use crate::{cache::FileEntry, error::Result, util}; - -#[pin_project] -pub struct PrecompressedGzipReader { - #[pin] - reader: Take>, - crc: u32, - uncompressed_size: u32, - state: State, -} - -pub const GZIP_EXTRA_LEN: u64 = 18; - -enum State { - Header, - Body, - Finished, -} - -impl PrecompressedGzipReader { - pub async fn new(file: File, entry: &FileEntry) -> Result { - let mut reader = BufReader::new(file); - util::seek_to_data_offset(&mut reader, entry.header_offset.into()).await?; - Ok(Self { - reader: reader.take(entry.compressed_size.into()), - crc: entry.crc32, - uncompressed_size: entry.uncompressed_size, - state: State::Header, - }) - } -} - -impl AsyncRead for PrecompressedGzipReader { - fn poll_read( - mut self: std::pin::Pin<&mut Self>, - cx: &mut std::task::Context<'_>, - buf: &mut tokio::io::ReadBuf<'_>, - ) -> Poll> { - match self.state { - State::Header => { - buf.put_slice(&[0x1f, 0x8b, 0x08, 0, 0, 0, 0, 0, 0, 0xff]); - self.state = State::Body; - } - State::Body => { - std::task::ready!(self.as_mut().project().reader.poll_read(cx, buf))?; - if self.reader.limit() == 0 { - buf.put_u32_le(self.crc); - buf.put_u32_le(self.uncompressed_size); - self.state = State::Finished; - } - } - State::Finished => {} - } - Poll::Ready(Ok(())) - } -} diff --git a/src/lib.rs b/src/lib.rs index f6cdddd..5c5a809 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,13 +1,4 @@ -#![allow(dead_code)] - -mod app; mod artifact_api; -mod cache; mod config; -mod error; -mod gzip_reader; mod query; -mod templates; -mod util; - -pub struct App; +mod storage; diff --git a/src/main.rs b/src/main.rs index 6309057..e7a11a9 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,13 +1,3 @@ -use artifactview::App; - -#[tokio::main] -async fn main() { - _ = dotenvy::dotenv(); - if std::env::var_os("RUST_LOG").is_none() { - std::env::set_var("RUST_LOG", "info"); - } - tracing_subscriber::fmt::init(); - - let app = App::new(); - app.run().await.unwrap() +fn main() { + println!("Hello, world!"); } diff --git a/src/query.rs b/src/query.rs index c3de5ce..933df96 100644 --- a/src/query.rs +++ b/src/query.rs @@ -1,13 +1,8 @@ -use std::{fmt::Write, hash::Hash}; - +use anyhow::{anyhow, Result}; use once_cell::sync::Lazy; use regex::{Captures, Regex}; -use siphasher::sip128::{Hasher128, SipHasher}; -use url::Url; -use crate::error::{Error, Result}; - -#[derive(Debug, PartialEq, Eq, Hash)] +#[derive(Debug, PartialEq, Eq)] pub struct Query { /// Forge host pub host: String, @@ -18,114 +13,39 @@ pub struct Query { /// CI run id pub run: u64, /// Artifact id (unique for every run) - pub artifact: Option, + pub artifact: u64, } -static RE_REPO_NAME: Lazy = Lazy::new(|| Regex::new("^[A-z0-9\\-_\\.]+$").unwrap()); - impl Query { - pub fn from_subdomain(subdomain: &str) -> Result { + pub fn from_domain(domain: &str) -> Result { + let (subdomain, _) = domain + .split_once('.') + .ok_or_else(|| anyhow!("no subdomain"))?; + + let emsg = "expected URL format: -------"; + let segments = subdomain.split("--").collect::>(); if segments.len() != 4 { - return Err(Error::InvalidUrl); + return Err(anyhow!(emsg)); } - let run_and_artifact = segments[3].split('-').collect::>(); - if run_and_artifact.is_empty() || run_and_artifact.len() > 2 { - return Err(Error::InvalidUrl); - } + let (run, artifact) = segments[3] + .split_once('-') + .ok_or(anyhow!(emsg))?; Ok(Self { - host: Self::decode_domain(segments[0], '.'), - user: Self::decode_domain(segments[1], '-'), - repo: Self::decode_domain(segments[2], '-'), - run: run_and_artifact[0].parse().ok().ok_or(Error::InvalidUrl)?, - artifact: match run_and_artifact.get(1) { - Some(x) => Some(x.parse().ok().ok_or(Error::InvalidUrl)?), - None => None, - }, + host: Self::decode_domain(&segments[0], '.'), + user: Self::decode_domain(&segments[1], '-'), + repo: Self::decode_domain(&segments[2], '-'), + run: run.parse().ok().ok_or(anyhow!(emsg))?, + artifact: artifact.parse().ok().ok_or(anyhow!(emsg))?, }) } - pub fn from_forge_url(url: &str) -> Result { - let url = Url::parse(url).map_err(|_| Error::BadRequest("invalid URL".into()))?; - - let host = url.domain().ok_or(Error::BadRequest("no domain".into()))?; - let mut path_segs = url - .path_segments() - .ok_or(Error::BadRequest("no URL path".into()))?; - let user = path_segs - .next() - .ok_or(Error::BadRequest("no username".into()))?; - let repo = path_segs - .next() - .ok_or(Error::BadRequest("no repository".into()))?; - - if !path_segs.next().is_some_and(|s| s == "actions") - || !path_segs.next().is_some_and(|s| s == "runs") - { - return Err(Error::BadRequest("invalid Actions URL".into())); - } - if !RE_REPO_NAME.is_match(user) { - return Err(Error::BadRequest("invalid username".into())); - } - if !RE_REPO_NAME.is_match(repo) { - return Err(Error::BadRequest("invalid repository name".into())); - } - - let run = path_segs - .next() - .and_then(|s| s.parse::().ok()) - .ok_or(Error::BadRequest("no run ID".into()))?; - - Ok(Self { - host: host.to_owned(), - user: user.to_owned(), - repo: repo.to_owned(), - run, - artifact: None, - }) - } - - pub fn subdomain(&self) -> String { - self.subdomain_with_artifact(self.artifact) - } - - pub fn subdomain_with_artifact(&self, artifact: Option) -> String { - let mut res = format!( - "{}--{}--{}--{}", - Self::encode_domain(&self.host, '.'), - Self::encode_domain(&self.user, '-'), - Self::encode_domain(&self.repo, '-'), - self.run, - ); - if let Some(artifact) = artifact { - write!(res, "-{artifact}").unwrap(); - } - res - } - - pub fn shortid(&self) -> String { - format!("{}/{}#{}", self.user, self.repo, self.run) - } - - pub fn forge_url(&self) -> String { - format!( - "https://{}/{}/{}/actions/runs/{}", - self.host, self.user, self.repo, self.run - ) - } - pub fn is_github(&self) -> bool { self.host == "github.com" } - pub fn siphash(&self) -> [u8; 16] { - let mut h = SipHasher::new(); - self.hash(&mut h); - h.finish128().as_bytes() - } - fn encode_domain(s: &str, bias: char) -> String { // Check if the character at the given position is in the middle of the string // and it is not followed by escape seq numbers or further escapable characters @@ -134,7 +54,7 @@ impl Query { return false; } let next_char = s[pos..].chars().nth(1).unwrap(); - !('0'..='2').contains(&next_char) && !matches!(next_char, '-' | '.' | '_') + !('0'..='2').contains(&next_char) && !matches!(next_char, '-'|'.'|'_') }; // Escape dashes @@ -186,10 +106,14 @@ impl Query { let repl2 = if bias == '-' { repl } else { - SINGLE_DASHES.replace_all(&repl, |c: &Captures| bias.to_string() + &c[1]) + SINGLE_DASHES.replace_all(&repl, |c: &Captures| { + bias.to_string() + &c[1] + }) }; - repl2.replace('\0', "-") + let repl3 = repl2.replace('\0', "-"); + + repl3 } } @@ -197,8 +121,8 @@ impl Query { mod tests { use super::Query; - use proptest::prelude::*; use rstest::rstest; + use proptest::prelude::*; #[rstest] #[case("_h--de.x-u", '#', "-2h-1-1de-0x-1u")] @@ -229,9 +153,9 @@ mod tests { } #[test] - fn query_from_subdomain() { - let d1 = "github-com--thetadev--newpipe-extractor--14-123"; - let query = Query::from_subdomain(d1).unwrap(); + fn query_from_domain() { + let d1 = "github-com--thetadev--newpipe-extractor--14-123.example.com"; + let query = Query::from_domain(d1).unwrap(); assert_eq!( query, Query { @@ -239,22 +163,8 @@ mod tests { user: "thetadev".to_owned(), repo: "newpipe-extractor".to_owned(), run: 14, - artifact: Some(123), + artifact: 123, } ); - assert_eq!(query.subdomain(), d1); - } - - #[test] - fn siphash() { - let q = Query { - host: "github.com".to_owned(), - user: "thetadev".to_owned(), - repo: "newpipe-extractor".to_owned(), - run: 14, - artifact: Some(123), - }; - let hash = q.siphash(); - assert_eq!(hex::encode(hash), "e523468ef42c848155a43f40895dff5a"); } } diff --git a/src/storage.rs b/src/storage.rs new file mode 100644 index 0000000..e69de29 diff --git a/src/templates.rs b/src/templates.rs deleted file mode 100644 index cde8110..0000000 --- a/src/templates.rs +++ /dev/null @@ -1,60 +0,0 @@ -use crate::{artifact_api::Artifact, cache::ListingEntry, config::Config, query::Query}; -use yarte::{Render, Template}; - -#[derive(Default)] -pub struct Version; - -impl Render for Version { - fn render(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.write_str(env!("CARGO_PKG_VERSION")) - } -} - -#[derive(Template, Default)] -#[template(path = "index")] -pub struct Index { - pub version: Version, -} - -#[derive(Template)] -#[template(path = "error")] -pub struct Error<'a> { - pub msg: &'a str, - pub status: u16, -} - -#[derive(Template)] -#[template(path = "selection")] -pub struct Selection<'a> { - pub main_url: &'a str, - pub run_url: &'a str, - pub run_name: &'a str, - pub artifacts: Vec, -} - -#[derive(Template)] -#[template(path = "listing")] -pub struct Listing<'a> { - pub main_url: &'a str, - pub version: Version, - pub artifact_name: &'a str, - pub path_components: Vec, - pub n_dirs: usize, - pub n_files: usize, - pub has_parent: bool, - pub entries: Vec, -} - -pub struct LinkItem { - pub name: String, - pub url: String, -} - -impl LinkItem { - pub fn from_artifact(artifact: Artifact, query: &Query, cfg: &Config) -> Self { - Self { - name: artifact.name, - url: cfg.url_with_subdomain(&query.subdomain_with_artifact(Some(artifact.id))), - } - } -} diff --git a/src/util.rs b/src/util.rs deleted file mode 100644 index 685dfb2..0000000 --- a/src/util.rs +++ /dev/null @@ -1,165 +0,0 @@ -use std::io::SeekFrom; - -use async_zip::error::ZipError; -use axum::{extract::Request, http::HeaderMap}; -use headers::{Header, HeaderMapExt}; -use http::header; -use mime_guess::Mime; -use tokio::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt}; - -use crate::error::{Error, Result}; - -pub trait InsertTypedHeader { - /// Inserts a typed header to this response. - fn typed_header(self, header: T) -> Self; -} - -impl InsertTypedHeader for axum::http::response::Builder { - fn typed_header(mut self, header: T) -> Self { - if let Some(headers) = self.headers_mut() { - headers.typed_insert(header); - } - self - } -} - -pub fn accepts_gzip(headers: &HeaderMap) -> bool { - headers - .get(header::ACCEPT_ENCODING) - .and_then(|h| h.to_str().ok()) - .map(|h| { - h.split(',').any(|val| { - val.split(';') - .next() - .map(|v| { - let vt = v.trim(); - vt.eq_ignore_ascii_case("gzip") || vt == "*" - }) - .unwrap_or_default() - }) - }) - .unwrap_or_default() -} - -/// Seek to the contained compressed data within a zip file -pub async fn seek_to_data_offset( - reader: &mut R, - header_offset: u64, -) -> core::result::Result<(), ZipError> { - const LFH_SIGNATURE: u32 = 0x4034b50; - - // Seek to the header - reader.seek(SeekFrom::Start(header_offset)).await?; - - // Check the signature - let signature = { - let mut buffer = [0; 4]; - reader.read_exact(&mut buffer).await?; - u32::from_le_bytes(buffer) - }; - - match signature { - LFH_SIGNATURE => (), - actual => return Err(ZipError::UnexpectedHeaderError(actual, LFH_SIGNATURE)), - }; - - // Skip the local file header and trailing data - let mut header_data: [u8; 26] = [0; 26]; - reader.read_exact(&mut header_data).await?; - let file_name_length = u16::from_le_bytes(header_data[22..24].try_into().unwrap()); - let extra_field_length = u16::from_le_bytes(header_data[24..26].try_into().unwrap()); - - let trailing_size = (file_name_length as i64) + (extra_field_length as i64); - reader.seek(SeekFrom::Current(trailing_size)).await?; - - Ok(()) -} - -/// Return the file extension of a website path -pub fn site_path_ext(path: &str) -> Option<&str> { - let mut parts = path.split('.').rev(); - parts - .next() - .filter(|ext| !ext.contains('/') && parts.next().is_some()) -} - -/// Get the file extension of a website path -pub fn path_mime(path: &str) -> Option { - site_path_ext(path).and_then(|ext| mime_guess::from_ext(ext).first()) -} - -pub fn full_url_from_request(request: &Request) -> String { - let uri = request.uri(); - if let Some(host) = host_from_request(request) { - format!("{}{}", host, uri.path()) - } else { - uri.to_string() - } -} - -fn host_from_request(request: &Request) -> Option<&str> { - parse_forwarded(request.headers()) - .or_else(|| { - request - .headers() - .get("X-Forwarded-Host") - .and_then(|host| host.to_str().ok()) - }) - .or_else(|| { - request - .headers() - .get(http::header::HOST) - .and_then(|host| host.to_str().ok()) - }) -} - -fn parse_forwarded(headers: &HeaderMap) -> Option<&str> { - // if there are multiple `Forwarded` `HeaderMap::get` will return the first one - let forwarded_values = headers.get(header::FORWARDED)?.to_str().ok()?; - - // get the first set of values - let first_value = forwarded_values.split(',').next()?; - - // find the value of the `host` field - first_value.split(';').find_map(|pair| { - let (key, value) = pair.split_once('=')?; - key.trim() - .eq_ignore_ascii_case("host") - .then(|| value.trim().trim_matches('"')) - }) -} - -pub fn get_subdomain<'a>(host: &'a str, root_domain: &str) -> Result<&'a str> { - let stripped = host.strip_suffix(root_domain).ok_or(Error::BadRequest( - "host does not end with configured ROOT_DOMAIN".into(), - ))?; - Ok(stripped.trim_end_matches('.')) -} - -#[cfg(test)] -mod tests { - use http::{header, HeaderMap}; - use rstest::rstest; - - #[rstest] - #[case("", false)] - #[case("br", false)] - #[case("gzip", true)] - #[case("GZIP", true)] - #[case("*", true)] - #[case("deflate, gzip;q=1.0, *;q=0.5", true)] - fn accepts_gzip(#[case] val: &str, #[case] expect: bool) { - let mut hdrs = HeaderMap::new(); - hdrs.insert(header::ACCEPT_ENCODING, val.try_into().unwrap()); - - assert_eq!(super::accepts_gzip(&hdrs), expect); - } - - #[rstest] - #[case("localhost", Some(""))] - #[case("test.localhost", Some("test"))] - #[case("example.com", None)] - fn get_subdomain(#[case] host: &str, #[case] expect: Option<&str>) { - assert_eq!(super::get_subdomain(host, "localhost").ok(), expect); - } -} diff --git a/templates/error.hbs b/templates/error.hbs deleted file mode 100644 index 63f7d40..0000000 --- a/templates/error.hbs +++ /dev/null @@ -1,40 +0,0 @@ - - - - - - - Artifactview - - -

- -

Error {{status}}

-

{{msg}}

-
- - diff --git a/templates/index.hbs b/templates/index.hbs deleted file mode 100644 index 8cbf827..0000000 --- a/templates/index.hbs +++ /dev/null @@ -1,98 +0,0 @@ - - - - - - - Artifactview - - -
- -

Enter a GitHub/Gitea/Forgejo Actions run url to browse CI artifacts

-
- - -
- -
- - diff --git a/templates/listing.hbs b/templates/listing.hbs deleted file mode 100644 index 85b9f10..0000000 --- a/templates/listing.hbs +++ /dev/null @@ -1,82 +0,0 @@ - - - - - - - -Index of {{artifact_name}} - - - - - - -
- - - -

- {{#each path_components}}{{this.name}} /{{/each}} -

-
- -
-
-
-{{n_dirs}} directories -{{n_files}} files - -
-
-
- - - - - - - - -{{#if has_parent}} - - - - - - -{{/if}} -{{#each entries}} - - - - - -{{/each}} - -
Name  ↓ Size  ↓ CRC32
Parent directory
- - - {{this.name}} - - {{#if this.is_dir}}—{{else}}{{this.size}}{{/if}}{{#if this.is_dir}}—{{else}}{{this.crc32}}{{/if}}
-
-
- - - - - - diff --git a/templates/selection.hbs b/templates/selection.hbs deleted file mode 100644 index be1b878..0000000 --- a/templates/selection.hbs +++ /dev/null @@ -1,49 +0,0 @@ - - - - - - - Artifactview - - -
- - - -

CI artifacts for {{run_name}}:

- {{#each artifacts}} - {{this.name}} - {{/each}} -
- -