Compare commits

...

14 commits

113 changed files with 13652 additions and 105 deletions

6
.env.example Normal file
View file

@ -0,0 +1,6 @@
CACHE_DIR=/tmp/artifactview
MAX_ARTIFACT_SIZE=100000000
MAX_AGE_H=12
# If you only want to access public repositories,
# create a fine-grained token with Public Repositories (read-only) access
GITHUB_TOKEN=github_pat_123456

1
.gitignore vendored
View file

@ -1 +1,2 @@
/target
/.env

12
.pre-commit-config.yaml Normal file
View file

@ -0,0 +1,12 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.3.0
hooks:
- id: end-of-file-fixer
- repo: https://github.com/cathiele/pre-commit-rust
rev: v0.1.0
hooks:
- id: cargo-fmt
- id: cargo-clippy
args: ["--all", "--tests", "--", "-D", "warnings"]

1333
Cargo.lock generated

File diff suppressed because it is too large Load diff

View file

@ -4,15 +4,47 @@ version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = "1.0.86"
arc-swap = "1.7.1"
async_zip = { path = "crates/async_zip", features = ["tokio", "tokio-fs", "deflate"] }
axum = { version = "0.7.5", features = ["http2"] }
axum-extra = { version = "0.9.3", features = ["typed-header"] }
dotenvy = "0.15.7"
envy = { path = "crates/envy" }
flate2 = "1.0.30"
futures-lite = "2.3.0"
headers = "0.4.0"
hex = "0.4.3"
http = "1.1.0"
mime = "0.3.17"
mime_guess = "2.0.4"
once_cell = "1.19.0"
path_macro = "1.0.0"
percent-encoding = "2.3.1"
pin-project = "1.1.5"
quick_cache = "0.5.1"
rand = "0.8.5"
regex = "1.10.4"
reqwest = { version = "0.12.4", features = ["json"] }
serde = { version = "1.0.203", features = ["derive"] }
serde-env = "0.1.1"
serde-hex = "0.1.0"
serde_json = "1.0.117"
tokio = {version = "1.37.0", features = ["macros"]}
siphasher = "1.0.1"
thiserror = "1.0.61"
tokio = { version = "1.37.0", features = ["macros", "fs", "rt-multi-thread"] }
tokio-util = { version = "0.7.11", features = ["io"] }
tower-http = { version = "0.5.2", features = ["trace"] }
tracing = "0.1.40"
tracing-subscriber = "0.3.18"
url = "2.5.0"
yarte = "0.15.7"
[build-dependencies]
yarte_helpers = "0.15.8"
[dev-dependencies]
proptest = "1.4.0"
rstest = { version = "0.19.0", default-features = false }
[workspace]
members = [".", "crates/*"]
resolver = "2"

30
Justfile Normal file
View file

@ -0,0 +1,30 @@
test:
cargo test
release:
#!/usr/bin/env bash
set -e
CRATE="artifactview"
CHANGELOG="CHANGELOG.md"
VERSION=$(cargo pkgid --package "$CRATE" | tr '#@' '\n' | tail -n 1)
TAG="v${VERSION}"
echo "Releasing $TAG:"
if git rev-parse "$TAG" >/dev/null 2>&1; then echo "version tag $TAG already exists"; exit 1; fi
CLIFF_ARGS="--tag '${TAG}' --unreleased"
echo "git-cliff $CLIFF_ARGS"
if [ -f "$CHANGELOG" ]; then
eval "git-cliff $CLIFF_ARGS --prepend '$CHANGELOG'"
else
eval "git-cliff $CLIFF_ARGS --output '$CHANGELOG'"
fi
git add "$CHANGELOG"
git commit -m "chore(release): release $CRATE v$VERSION"
awk 'BEGIN{RS="(^|\n)## [^\n]+\n*"} NR==2 { print }' "$CHANGELOG" | git tag -as -F - --cleanup whitespace "$TAG"
echo "🚀 Run 'git push origin $TAG' to publish"

View file

@ -1,4 +1,4 @@
# artifactview
# Artifactview
View CI build artifacts from Forgejo/Github using your web browser.
@ -20,4 +20,21 @@ status code 404 if no file was found.
Artifactview accepts URLs in the given format: `<HOST>--<USER>--<REPO>--<RUN>-<ARTIFACT>.example.com`
Example: `github-com--theta-dev--example-project--4-11.example.com`
Example: `https://github-com--theta-dev--example-project--4-11.example.com`
## Security considerations
It is recommended to use the whitelist feature to limit Artifactview to access only trusted
servers, users and organizations.
Since many
[well-known URIs](https://www.iana.org/assignments/well-known-uris/well-known-uris.xhtml)
are used to configure security-relevant properties of a website or are used to attest
ownership of a website (like `.well-known/acme-challenge` for issuing TLS certificates),
Artifactview will serve no files from the `.well-known` folder.
There is a configurable limit for both the maximum downloaded artifact size and the
maximum size of individual files to be served (100MB by default).
Additionally there is a configurable timeout for the zip file indexing operation.
These measures should protect the server againt denial-of-service attacks like
overfilling the server drive or uploading zip bombs.

3
build.rs Normal file
View file

@ -0,0 +1,3 @@
fn main() {
yarte_helpers::recompile::when_changed();
}

View file

@ -0,0 +1 @@
{"v":1}

View file

@ -0,0 +1,6 @@
{
"git": {
"sha1": "e4ee7a521f624aea3c2c3eef6b78fb1ec057504b"
},
"path_in_vcs": ""
}

12
crates/async_zip/.github/dependabot.yml vendored Normal file
View file

@ -0,0 +1,12 @@
version: 2
updates:
- package-ecosystem: "github-actions"
# Workflow files stored in the
# default location of `.github/workflows`
directory: "/"
schedule:
interval: "daily"
- package-ecosystem: "cargo"
directory: "/"
schedule:
interval: "daily"

View file

@ -0,0 +1,20 @@
name: clippy (Linux)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run clippy
run: cargo clippy --all-features -- -D clippy::all

View file

@ -0,0 +1,20 @@
name: rustfmt (Linux)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Run rustfmt
run: cargo fmt --check

View file

@ -0,0 +1,51 @@
name: Test (Linux)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Test [no features]
run: cargo test --verbose
- name: Test ['chrono' feature]
run: cargo test --verbose --features chrono
- name: Test ['tokio' feature]
run: cargo test --verbose --features tokio
- name: Test ['tokio-fs' feature]
run: cargo test --verbose --features tokio-fs
- name: Test ['deflate' feature]
run: cargo test --verbose --features deflate
- name: Test ['bzip2' feature]
run: cargo test --verbose --features bzip2
- name: Test ['lzma' feature]
run: cargo test --verbose --features lzma
- name: Test ['zstd' feature]
run: cargo test --verbose --features zstd
- name: Test ['xz' feature]
run: cargo test --verbose --features xz
- name: Test ['deflate64' feature]
run: cargo test --verbose --features deflate64
- name: Test ['full' feature]
run: cargo test --verbose --features full

View file

@ -0,0 +1,24 @@
name: typos (Linux)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Install typos
run: cargo install typos-cli
- name: Run typos
run: typos --format brief

View file

@ -0,0 +1,24 @@
name: Build (WASM)
on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
env:
CARGO_TERM_COLOR: always
jobs:
build:
name: Build ['full-wasm' feature] on ${{ matrix.target }}
runs-on: ubuntu-latest
strategy:
matrix:
target:
- wasm32-wasi
- wasm32-unknown-unknown
steps:
- uses: actions/checkout@v4
- run: rustup target add ${{ matrix.target }}
- run: cargo build --verbose --target ${{ matrix.target }} --features full-wasm

15
crates/async_zip/.gitignore vendored Normal file
View file

@ -0,0 +1,15 @@
# Generated by Cargo
# will have compiled files and executables
/target/
/examples/**/target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
/Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
/examples/**/*.rs.bk
# Ignore generated zip test file that is large
/src/tests/read/zip64/zip64many.zip

View file

@ -0,0 +1,63 @@
[package]
name = "async_zip"
version = "0.0.17"
edition = "2021"
authors = ["Harry [hello@majored.pw]"]
repository = "https://github.com/Majored/rs-async-zip"
description = "An asynchronous ZIP archive reading/writing crate."
readme = "README.md"
license = "MIT"
documentation = "https://docs.rs/async_zip/"
homepage = "https://github.com/Majored/rs-async-zip"
keywords = ["async", "zip", "archive", "tokio"]
categories = ["asynchronous", "compression"]
[features]
full = ["chrono", "tokio-fs", "deflate", "bzip2", "lzma", "zstd", "xz", "deflate64"]
# All features that are compatible with WASM
full-wasm = ["chrono", "deflate", "zstd"]
tokio = ["dep:tokio", "tokio-util", "tokio/io-util"]
tokio-fs = ["tokio/fs"]
deflate = ["async-compression/deflate"]
bzip2 = ["async-compression/bzip2"]
lzma = ["async-compression/lzma"]
zstd = ["async-compression/zstd"]
xz = ["async-compression/xz"]
deflate64 = ["async-compression/deflate64"]
[package.metadata.docs.rs]
all-features = true
# defines the configuration attribute `docsrs`
rustdoc-args = ["--cfg", "docsrs"]
[dependencies]
crc32fast = "1"
futures-lite = { version = "2.1.0", default-features = false, features = ["std"] }
pin-project = "1"
thiserror = "1"
async-compression = { version = "0.4.2", default-features = false, features = ["futures-io"], optional = true }
chrono = { version = "0.4", default-features = false, features = ["clock"], optional = true }
tokio = { version = "1", default-features = false, optional = true }
tokio-util = { version = "0.7", features = ["compat"], optional = true }
[dev-dependencies]
# tests
tokio = { version = "1", features = ["full"] }
tokio-util = { version = "0.7", features = ["compat"] }
env_logger = "0.11.2"
zip = "0.6.3"
# shared across multiple examples
# anyhow = "1"
# sanitize-filename = "0.5"
# actix_multipart
# actix-web = "4"
# actix-multipart = "0.6"
# futures = "0.3"
# derive_more = "0.99"
# uuid = { version = "1", features = ["v4", "serde"] }

22
crates/async_zip/LICENSE Normal file
View file

@ -0,0 +1,22 @@
MIT License
Copyright (c) 2021 Harry
Copyright (c) 2023 Cognite AS
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View file

@ -0,0 +1,81 @@
# async_zip
[![Crates.io](https://img.shields.io/crates/v/async_zip?style=flat-square)](https://crates.io/crates/async_zip)
[![Crates.io](https://img.shields.io/crates/d/async_zip?style=flat-square)](https://crates.io/crates/async_zip)
[![docs.rs](https://img.shields.io/docsrs/async_zip?style=flat-square)](https://docs.rs/async_zip/)
[![GitHub Workflow Status (branch)](https://img.shields.io/github/actions/workflow/status/Majored/rs-async-zip/ci-linux.yml?branch=main&style=flat-square)](https://github.com/Majored/rs-async-zip/actions?query=branch%3Amain)
[![GitHub](https://img.shields.io/github/license/Majored/rs-async-zip?style=flat-square)](https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
An asynchronous ZIP archive reading/writing crate.
## Features
- A base implementation atop `futures`'s IO traits.
- An extended implementation atop `tokio`'s IO traits.
- Support for Stored, Deflate, bzip2, LZMA, zstd, and xz compression methods.
- Various different reading approaches (seek, stream, filesystem, in-memory buffer, etc).
- Support for writing complete data (u8 slices) or streams using data descriptors.
- Initial support for ZIP64 reading and writing.
- Aims for reasonable [specification](https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md) compliance.
## Installation & Basic Usage
```toml
[dependencies]
async_zip = { version = "0.0.17", features = ["full"] }
```
A (soon to be) extensive list of [examples](https://github.com/Majored/rs-async-zip/tree/main/examples) can be found under the `/examples` directory.
### Feature Flags
- `full` - Enables all below features.
- `full-wasm` - Enables all below features that are compatible with WASM.
- `chrono` - Enables support for parsing dates via `chrono`.
- `tokio` - Enables support for the `tokio` implementation module.
- `tokio-fs` - Enables support for the `tokio::fs` reading module.
- `deflate` - Enables support for the Deflate compression method.
- `bzip2` - Enables support for the bzip2 compression method.
- `lzma` - Enables support for the LZMA compression method.
- `zstd` - Enables support for the zstd compression method.
- `xz` - Enables support for the xz compression method.
### Reading
```rust
use tokio::{io::BufReader, fs::File};
use async_zip::tokio::read::seek::ZipFileReader;
...
let mut file = BufReader::new(File::open("./Archive.zip").await?);
let mut zip = ZipFileReader::with_tokio(&mut file).await?;
let mut string = String::new();
let mut reader = zip.reader_with_entry(0).await?;
reader.read_to_string_checked(&mut string).await?;
println!("{}", string);
```
### Writing
```rust
use async_zip::tokio::write::ZipFileWriter;
use async_zip::{Compression, ZipEntryBuilder};
use tokio::fs::File;
...
let mut file = File::create("foo.zip").await?;
let mut writer = ZipFileWriter::with_tokio(&mut file);
let data = b"This is an example file.";
let builder = ZipEntryBuilder::new("bar.txt".into(), Compression::Deflate);
writer.write_entry_whole(builder, data).await?;
writer.close().await?;
```
## Contributions
Whilst I will be continuing to maintain this crate myself, reasonable specification compliance is a huge undertaking for a single individual. As such, contributions will always be encouraged and appreciated.
No contribution guidelines exist but additions should be developed with readability in mind, with appropriate comments, and make use of `rustfmt`.
## Issues & Support
Whether you're wanting to report a bug you've come across during use of this crate or are seeking general help/assistance, please utilise the [issues tracker](https://github.com/Majored/rs-async-zip/issues) and provide as much detail as possible (eg. recreation steps).
I try to respond to issues within a reasonable timeframe.

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,2 @@
max_width = 120
use_small_heuristics = "Max"

View file

@ -0,0 +1,7 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A base runtime-agnostic implementation using `futures`'s IO types.
pub mod read;
pub mod write;

View file

@ -0,0 +1,68 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// Copyright (c) 2023 Cognite AS
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::spec::header::{EndOfCentralDirectoryHeader, Zip64EndOfCentralDirectoryRecord};
/// Combines all the fields in EOCDR and Zip64EOCDR into one struct.
#[derive(Debug)]
pub struct CombinedCentralDirectoryRecord {
pub version_made_by: Option<u16>,
pub version_needed_to_extract: Option<u16>,
pub disk_number: u32,
pub disk_number_start_of_cd: u32,
pub num_entries_in_directory_on_disk: u64,
pub num_entries_in_directory: u64,
pub directory_size: u64,
pub offset_of_start_of_directory: u64,
pub file_comment_length: u16,
}
impl CombinedCentralDirectoryRecord {
/// Combine an EOCDR with an optional Zip64EOCDR.
///
/// Fields that are set to their max value in the EOCDR will be overwritten by the contents of
/// the corresponding Zip64EOCDR field.
pub fn combine(eocdr: EndOfCentralDirectoryHeader, zip64eocdr: Zip64EndOfCentralDirectoryRecord) -> Self {
let mut combined = Self::from(&eocdr);
if eocdr.disk_num == u16::MAX {
combined.disk_number = zip64eocdr.disk_number;
}
if eocdr.start_cent_dir_disk == u16::MAX {
combined.disk_number_start_of_cd = zip64eocdr.disk_number_start_of_cd;
}
if eocdr.num_of_entries_disk == u16::MAX {
combined.num_entries_in_directory_on_disk = zip64eocdr.num_entries_in_directory_on_disk;
}
if eocdr.num_of_entries == u16::MAX {
combined.num_entries_in_directory = zip64eocdr.num_entries_in_directory;
}
if eocdr.size_cent_dir == u32::MAX {
combined.directory_size = zip64eocdr.directory_size;
}
if eocdr.cent_dir_offset == u32::MAX {
combined.offset_of_start_of_directory = zip64eocdr.offset_of_start_of_directory;
}
combined.version_made_by = Some(zip64eocdr.version_made_by);
combined.version_needed_to_extract = Some(zip64eocdr.version_needed_to_extract);
combined
}
}
// An implementation for the case of no zip64EOCDR.
impl From<&EndOfCentralDirectoryHeader> for CombinedCentralDirectoryRecord {
fn from(header: &EndOfCentralDirectoryHeader) -> Self {
Self {
version_made_by: None,
version_needed_to_extract: None,
disk_number: header.disk_num as u32,
disk_number_start_of_cd: header.start_cent_dir_disk as u32,
num_entries_in_directory_on_disk: header.num_of_entries_disk as u64,
num_entries_in_directory: header.num_of_entries as u64,
directory_size: header.size_cent_dir as u64,
offset_of_start_of_directory: header.cent_dir_offset as u64,
file_comment_length: header.file_comm_length,
}
}
}

View file

@ -0,0 +1,103 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::spec::Compression;
use std::pin::Pin;
use std::task::{Context, Poll};
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
use async_compression::futures::bufread;
use futures_lite::io::{AsyncBufRead, AsyncRead};
use pin_project::pin_project;
/// A wrapping reader which holds concrete types for all respective compression method readers.
#[pin_project(project = CompressedReaderProj)]
pub(crate) enum CompressedReader<R> {
Stored(#[pin] R),
#[cfg(feature = "deflate")]
Deflate(#[pin] bufread::DeflateDecoder<R>),
#[cfg(feature = "deflate64")]
Deflate64(#[pin] bufread::Deflate64Decoder<R>),
#[cfg(feature = "bzip2")]
Bz(#[pin] bufread::BzDecoder<R>),
#[cfg(feature = "lzma")]
Lzma(#[pin] bufread::LzmaDecoder<R>),
#[cfg(feature = "zstd")]
Zstd(#[pin] bufread::ZstdDecoder<R>),
#[cfg(feature = "xz")]
Xz(#[pin] bufread::XzDecoder<R>),
}
impl<R> CompressedReader<R>
where
R: AsyncBufRead + Unpin,
{
/// Constructs a new wrapping reader from a generic [`AsyncBufRead`] implementer.
pub(crate) fn new(reader: R, compression: Compression) -> Self {
match compression {
Compression::Stored => CompressedReader::Stored(reader),
#[cfg(feature = "deflate")]
Compression::Deflate => CompressedReader::Deflate(bufread::DeflateDecoder::new(reader)),
#[cfg(feature = "deflate64")]
Compression::Deflate64 => CompressedReader::Deflate64(bufread::Deflate64Decoder::new(reader)),
#[cfg(feature = "bzip2")]
Compression::Bz => CompressedReader::Bz(bufread::BzDecoder::new(reader)),
#[cfg(feature = "lzma")]
Compression::Lzma => CompressedReader::Lzma(bufread::LzmaDecoder::new(reader)),
#[cfg(feature = "zstd")]
Compression::Zstd => CompressedReader::Zstd(bufread::ZstdDecoder::new(reader)),
#[cfg(feature = "xz")]
Compression::Xz => CompressedReader::Xz(bufread::XzDecoder::new(reader)),
}
}
/// Consumes this reader and returns the inner value.
pub(crate) fn into_inner(self) -> R {
match self {
CompressedReader::Stored(inner) => inner,
#[cfg(feature = "deflate")]
CompressedReader::Deflate(inner) => inner.into_inner(),
#[cfg(feature = "deflate64")]
CompressedReader::Deflate64(inner) => inner.into_inner(),
#[cfg(feature = "bzip2")]
CompressedReader::Bz(inner) => inner.into_inner(),
#[cfg(feature = "lzma")]
CompressedReader::Lzma(inner) => inner.into_inner(),
#[cfg(feature = "zstd")]
CompressedReader::Zstd(inner) => inner.into_inner(),
#[cfg(feature = "xz")]
CompressedReader::Xz(inner) => inner.into_inner(),
}
}
}
impl<R> AsyncRead for CompressedReader<R>
where
R: AsyncBufRead + Unpin,
{
fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll<std::io::Result<usize>> {
match self.project() {
CompressedReaderProj::Stored(inner) => inner.poll_read(c, b),
#[cfg(feature = "deflate")]
CompressedReaderProj::Deflate(inner) => inner.poll_read(c, b),
#[cfg(feature = "deflate64")]
CompressedReaderProj::Deflate64(inner) => inner.poll_read(c, b),
#[cfg(feature = "bzip2")]
CompressedReaderProj::Bz(inner) => inner.poll_read(c, b),
#[cfg(feature = "lzma")]
CompressedReaderProj::Lzma(inner) => inner.poll_read(c, b),
#[cfg(feature = "zstd")]
CompressedReaderProj::Zstd(inner) => inner.poll_read(c, b),
#[cfg(feature = "xz")]
CompressedReaderProj::Xz(inner) => inner.poll_read(c, b),
}
}
}

View file

@ -0,0 +1,128 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::base::read::io::{compressed::CompressedReader, hashed::HashedReader, owned::OwnedReader};
use crate::entry::ZipEntry;
use crate::error::{Result, ZipError};
use crate::spec::Compression;
use std::pin::Pin;
use std::task::{Context, Poll};
use futures_lite::io::{AsyncBufRead, AsyncRead, AsyncReadExt, Take};
use pin_project::pin_project;
/// A type which encodes that [`ZipEntryReader`] has associated entry data.
pub struct WithEntry<'a>(OwnedEntry<'a>);
/// A type which encodes that [`ZipEntryReader`] has no associated entry data.
pub struct WithoutEntry;
/// A ZIP entry reader which may implement decompression.
#[pin_project]
pub struct ZipEntryReader<'a, R, E> {
#[pin]
reader: HashedReader<CompressedReader<Take<OwnedReader<'a, R>>>>,
entry: E,
}
impl<'a, R> ZipEntryReader<'a, R, WithoutEntry>
where
R: AsyncBufRead + Unpin,
{
/// Constructs a new entry reader from its required parameters (incl. an owned R).
pub fn new_with_owned(reader: R, compression: Compression, size: u64) -> Self {
let reader = HashedReader::new(CompressedReader::new(OwnedReader::Owned(reader).take(size), compression));
Self { reader, entry: WithoutEntry }
}
/// Constructs a new entry reader from its required parameters (incl. a mutable borrow of an R).
pub(crate) fn new_with_borrow(reader: &'a mut R, compression: Compression, size: u64) -> Self {
let reader = HashedReader::new(CompressedReader::new(OwnedReader::Borrow(reader).take(size), compression));
Self { reader, entry: WithoutEntry }
}
pub(crate) fn into_with_entry(self, entry: &'a ZipEntry) -> ZipEntryReader<'a, R, WithEntry<'a>> {
ZipEntryReader { reader: self.reader, entry: WithEntry(OwnedEntry::Borrow(entry)) }
}
pub(crate) fn into_with_entry_owned(self, entry: ZipEntry) -> ZipEntryReader<'a, R, WithEntry<'a>> {
ZipEntryReader { reader: self.reader, entry: WithEntry(OwnedEntry::Owned(entry)) }
}
}
impl<'a, R, E> AsyncRead for ZipEntryReader<'a, R, E>
where
R: AsyncBufRead + Unpin,
{
fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll<std::io::Result<usize>> {
self.project().reader.poll_read(c, b)
}
}
impl<'a, R, E> ZipEntryReader<'a, R, E>
where
R: AsyncBufRead + Unpin,
{
/// Computes and returns the CRC32 hash of bytes read by this reader so far.
///
/// This hash should only be computed once EOF has been reached.
pub fn compute_hash(&mut self) -> u32 {
self.reader.swap_and_compute_hash()
}
/// Consumes this reader and returns the inner value.
pub(crate) fn into_inner(self) -> R {
self.reader.into_inner().into_inner().into_inner().owned_into_inner()
}
}
impl<R> ZipEntryReader<'_, R, WithEntry<'_>>
where
R: AsyncBufRead + Unpin,
{
/// Returns an immutable reference to the associated entry data.
pub fn entry(&self) -> &'_ ZipEntry {
self.entry.0.entry()
}
/// Reads all bytes until EOF has been reached, appending them to buf, and verifies the CRC32 values.
///
/// This is a helper function synonymous to [`AsyncReadExt::read_to_end()`].
pub async fn read_to_end_checked(&mut self, buf: &mut Vec<u8>) -> Result<usize> {
let read = self.read_to_end(buf).await?;
if self.compute_hash() == self.entry.0.entry().crc32() {
Ok(read)
} else {
Err(ZipError::CRC32CheckError)
}
}
/// Reads all bytes until EOF has been reached, placing them into buf, and verifies the CRC32 values.
///
/// This is a helper function synonymous to [`AsyncReadExt::read_to_string()`].
pub async fn read_to_string_checked(&mut self, buf: &mut String) -> Result<usize> {
let read = self.read_to_string(buf).await?;
if self.compute_hash() == self.entry.0.entry().crc32() {
Ok(read)
} else {
Err(ZipError::CRC32CheckError)
}
}
}
enum OwnedEntry<'a> {
Owned(ZipEntry),
Borrow(&'a ZipEntry),
}
impl<'a> OwnedEntry<'a> {
pub fn entry(&self) -> &'_ ZipEntry {
match self {
OwnedEntry::Owned(entry) => entry,
OwnedEntry::Borrow(entry) => entry,
}
}
}

View file

@ -0,0 +1,56 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::base::read::io::poll_result_ok;
use std::pin::Pin;
use std::task::{ready, Context, Poll};
use crc32fast::Hasher;
use futures_lite::io::AsyncRead;
use pin_project::pin_project;
/// A wrapping reader which computes the CRC32 hash of data read via [`AsyncRead`].
#[pin_project]
pub(crate) struct HashedReader<R> {
#[pin]
pub(crate) reader: R,
pub(crate) hasher: Hasher,
}
impl<R> HashedReader<R>
where
R: AsyncRead + Unpin,
{
/// Constructs a new wrapping reader from a generic [`AsyncRead`] implementer.
pub(crate) fn new(reader: R) -> Self {
Self { reader, hasher: Hasher::default() }
}
/// Swaps the internal hasher and returns the computed CRC32 hash.
///
/// The internal hasher is taken and replaced with a newly-constructed one. As a result, this method should only be
/// called once EOF has been reached and it's known that no more data will be read, else the computed hash(s) won't
/// accurately represent the data read in.
pub(crate) fn swap_and_compute_hash(&mut self) -> u32 {
std::mem::take(&mut self.hasher).finalize()
}
/// Consumes this reader and returns the inner value.
pub(crate) fn into_inner(self) -> R {
self.reader
}
}
impl<R> AsyncRead for HashedReader<R>
where
R: AsyncRead + Unpin,
{
fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll<std::io::Result<usize>> {
let project = self.project();
let written = poll_result_ok!(ready!(project.reader.poll_read(c, b)));
project.hasher.update(&b[..written]);
Poll::Ready(Ok(written))
}
}

View file

@ -0,0 +1,96 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! <https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4316>
//!
//! As with other ZIP libraries, we face the predicament that the end of central directory record may contain a
//! variable-length file comment. As a result, we cannot just make the assumption that the start of this record is
//! 18 bytes (the length of the EOCDR) offset from the end of the data - we must locate it ourselves.
//!
//! The `zip-rs` crate handles this by reading in reverse from the end of the data. This involves seeking backwards
//! by a single byte each iteration and reading 4 bytes into a u32. Whether this is performant/acceptable within a
//! a non-async context, I'm unsure, but it isn't desirable within an async context. Especially since we cannot just
//! place a [`BufReader`] infront of the upstream reader (as its internal buffer is invalidated on each seek).
//!
//! Reading in reverse is still desirable as the use of file comments is limited and they're unlikely to be large.
//!
//! The below method is one that compromises on these two contention points. Please submit an issue or PR if you know
//! of a better algorithm for this (and have tested/verified its performance).
#[cfg(doc)]
use futures_lite::io::BufReader;
use crate::error::{Result as ZipResult, ZipError};
use crate::spec::consts::{EOCDR_LENGTH, EOCDR_SIGNATURE, SIGNATURE_LENGTH};
use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom};
/// The buffer size used when locating the EOCDR, equal to 2KiB.
const BUFFER_SIZE: usize = 2048;
/// The upper bound of where the EOCDR signature cannot be located.
const EOCDR_UPPER_BOUND: u64 = EOCDR_LENGTH as u64;
/// The lower bound of where the EOCDR signature cannot be located.
const EOCDR_LOWER_BOUND: u64 = EOCDR_UPPER_BOUND + SIGNATURE_LENGTH as u64 + u16::MAX as u64;
/// Locate the `end of central directory record` offset, if one exists.
/// The returned offset excludes the signature (4 bytes)
///
/// This method involves buffered reading in reverse and reverse linear searching along those buffers for the EOCDR
/// signature. As a result of this buffered approach, we reduce seeks when compared to `zip-rs`'s method by a factor
/// of the buffer size. We also then don't have to do individual u32 reads against the upstream reader.
///
/// Whilst I haven't done any in-depth benchmarks, when reading a ZIP file with the maximum length comment, this method
/// saw a reduction in location time by a factor of 500 when compared with the `zip-rs` method.
pub async fn eocdr<R>(mut reader: R) -> ZipResult<u64>
where
R: AsyncRead + AsyncSeek + Unpin,
{
let length = reader.seek(SeekFrom::End(0)).await?;
let signature = &EOCDR_SIGNATURE.to_le_bytes();
let mut buffer: [u8; BUFFER_SIZE] = [0; BUFFER_SIZE];
let mut position = length.saturating_sub((EOCDR_LENGTH + BUFFER_SIZE) as u64);
reader.seek(SeekFrom::Start(position)).await?;
loop {
let read = reader.read(&mut buffer).await?;
if let Some(match_index) = reverse_search_buffer(&buffer[..read], signature) {
return Ok(position + (match_index + 1) as u64);
}
// If we hit the start of the data or the lower bound, we're unable to locate the EOCDR.
if position == 0 || position <= length.saturating_sub(EOCDR_LOWER_BOUND) {
return Err(ZipError::UnableToLocateEOCDR);
}
// To handle the case where the EOCDR signature crosses buffer boundaries, we simply overlap reads by the
// signature length. This significantly reduces the complexity of handling partial matches with very little
// overhead.
position = position.saturating_sub((BUFFER_SIZE - SIGNATURE_LENGTH) as u64);
reader.seek(SeekFrom::Start(position)).await?;
}
}
/// A naive reverse linear search along the buffer for the specified signature bytes.
///
/// This is already surprisingly performant. For instance, using memchr::memchr() to match for the first byte of the
/// signature, and then manual byte comparisons for the remaining signature bytes was actually slower by a factor of
/// 2.25. This method was explored as tokio's `read_until()` implementation uses memchr::memchr().
pub(crate) fn reverse_search_buffer(buffer: &[u8], signature: &[u8]) -> Option<usize> {
'outer: for index in (0..buffer.len()).rev() {
for (signature_index, signature_byte) in signature.iter().rev().enumerate() {
if let Some(next_index) = index.checked_sub(signature_index) {
if buffer[next_index] != *signature_byte {
continue 'outer;
}
} else {
break 'outer;
}
}
return Some(index);
}
None
}

View file

@ -0,0 +1,88 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub(crate) mod combined_record;
pub(crate) mod compressed;
pub(crate) mod entry;
pub(crate) mod hashed;
pub(crate) mod locator;
pub(crate) mod owned;
use std::{
future::Future,
io::ErrorKind,
pin::Pin,
task::{ready, Context, Poll},
};
pub use combined_record::CombinedCentralDirectoryRecord;
use futures_lite::io::AsyncBufRead;
use pin_project::pin_project;
use crate::{
spec::consts::{DATA_DESCRIPTOR_LENGTH, DATA_DESCRIPTOR_SIGNATURE, SIGNATURE_LENGTH},
string::{StringEncoding, ZipString},
};
use futures_lite::io::{AsyncRead, AsyncReadExt};
/// Read and return a dynamic length string from a reader which impls AsyncRead.
pub(crate) async fn read_string<R>(reader: R, length: usize, encoding: StringEncoding) -> std::io::Result<ZipString>
where
R: AsyncRead + Unpin,
{
Ok(ZipString::new(read_bytes(reader, length).await?, encoding))
}
/// Read and return a dynamic length vector of bytes from a reader which impls AsyncRead.
pub(crate) async fn read_bytes<R>(reader: R, length: usize) -> std::io::Result<Vec<u8>>
where
R: AsyncRead + Unpin,
{
let mut buffer = Vec::with_capacity(length);
reader.take(length as u64).read_to_end(&mut buffer).await?;
Ok(buffer)
}
#[pin_project]
pub(crate) struct ConsumeDataDescriptor<'a, R>(#[pin] pub(crate) &'a mut R);
impl<R> Future for ConsumeDataDescriptor<'_, R>
where
R: AsyncBufRead + Unpin,
{
type Output = std::io::Result<()>;
fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<()>> {
let mut project = self.project();
let data = poll_result_ok!(ready!(project.0.as_mut().poll_fill_buf(cx)));
let signature = data.get(0..4).ok_or(ErrorKind::UnexpectedEof)?;
let mut consumed = DATA_DESCRIPTOR_LENGTH;
if signature == DATA_DESCRIPTOR_SIGNATURE.to_le_bytes() {
consumed += SIGNATURE_LENGTH;
}
if consumed > data.len() {
return Poll::Ready(Err(ErrorKind::UnexpectedEof.into()));
}
project.0.as_mut().consume(consumed);
Poll::Ready(Ok(()))
}
}
/// A macro that returns the inner value of an Ok or early-returns in the case of an Err.
///
/// This is almost identical to the ? operator but handles the situation when a Result is used in combination with
/// Poll (eg. tokio's IO traits such as AsyncRead).
macro_rules! poll_result_ok {
($poll:expr) => {
match $poll {
Ok(inner) => inner,
Err(err) => return Poll::Ready(Err(err)),
}
};
}
use poll_result_ok;

View file

@ -0,0 +1,62 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use std::pin::Pin;
use std::task::{Context, Poll};
use futures_lite::io::{AsyncBufRead, AsyncRead};
use pin_project::pin_project;
/// A wrapping reader which holds an owned R or a mutable borrow to R.
///
/// This is used to represent whether the supplied reader can be acted on concurrently or not (with an owned value
/// suggesting that R implements some method of synchronisation & cloning).
#[pin_project(project = OwnedReaderProj)]
pub(crate) enum OwnedReader<'a, R> {
Owned(#[pin] R),
Borrow(#[pin] &'a mut R),
}
impl<'a, R> OwnedReader<'a, R>
where
R: AsyncBufRead + Unpin,
{
/// Consumes an owned reader and returns the inner value.
pub(crate) fn owned_into_inner(self) -> R {
match self {
OwnedReader::Owned(inner) => inner,
OwnedReader::Borrow(_) => panic!("not OwnedReader::Owned value"),
}
}
}
impl<'a, R> AsyncBufRead for OwnedReader<'a, R>
where
R: AsyncBufRead + Unpin,
{
fn poll_fill_buf(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll<std::io::Result<&[u8]>> {
match self.project() {
OwnedReaderProj::Owned(inner) => inner.poll_fill_buf(cx),
OwnedReaderProj::Borrow(inner) => inner.poll_fill_buf(cx),
}
}
fn consume(self: Pin<&mut Self>, amt: usize) {
match self.project() {
OwnedReaderProj::Owned(inner) => inner.consume(amt),
OwnedReaderProj::Borrow(inner) => inner.consume(amt),
}
}
}
impl<'a, R> AsyncRead for OwnedReader<'a, R>
where
R: AsyncBufRead + Unpin,
{
fn poll_read(self: Pin<&mut Self>, c: &mut Context<'_>, b: &mut [u8]) -> Poll<std::io::Result<usize>> {
match self.project() {
OwnedReaderProj::Owned(inner) => inner.poll_read(c, b),
OwnedReaderProj::Borrow(inner) => inner.poll_read(c, b),
}
}
}

View file

@ -0,0 +1,147 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A concurrent ZIP reader which acts over an owned vector of bytes.
//!
//! Concurrency is achieved as a result of:
//! - Wrapping the provided vector of bytes within an [`Arc`] to allow shared ownership.
//! - Wrapping this [`Arc`] around a [`Cursor`] when reading (as the [`Arc`] can deref and coerce into a `&[u8]`).
//!
//! ### Usage
//! Unlike the [`seek`] module, we no longer hold a mutable reference to any inner reader which in turn, allows the
//! construction of concurrent [`ZipEntryReader`]s. Though, note that each individual [`ZipEntryReader`] cannot be sent
//! between thread boundaries due to the masked lifetime requirement. Therefore, the overarching [`ZipFileReader`]
//! should be cloned and moved into those contexts when needed.
//!
//! ### Concurrent Example
//! ```no_run
//! # use async_zip::base::read::mem::ZipFileReader;
//! # use async_zip::error::Result;
//! # use futures_lite::io::AsyncReadExt;
//! #
//! async fn run() -> Result<()> {
//! let reader = ZipFileReader::new(Vec::new()).await?;
//! let result = tokio::join!(read(&reader, 0), read(&reader, 1));
//!
//! let data_0 = result.0?;
//! let data_1 = result.1?;
//!
//! // Use data within current scope.
//!
//! Ok(())
//! }
//!
//! async fn read(reader: &ZipFileReader, index: usize) -> Result<Vec<u8>> {
//! let mut entry = reader.reader_without_entry(index).await?;
//! let mut data = Vec::new();
//! entry.read_to_end(&mut data).await?;
//! Ok(data)
//! }
//! ```
//!
//! ### Parallel Example
//! ```no_run
//! # use async_zip::base::read::mem::ZipFileReader;
//! # use async_zip::error::Result;
//! # use futures_lite::io::AsyncReadExt;
//! #
//! async fn run() -> Result<()> {
//! let reader = ZipFileReader::new(Vec::new()).await?;
//!
//! let handle_0 = tokio::spawn(read(reader.clone(), 0));
//! let handle_1 = tokio::spawn(read(reader.clone(), 1));
//!
//! let data_0 = handle_0.await.expect("thread panicked")?;
//! let data_1 = handle_1.await.expect("thread panicked")?;
//!
//! // Use data within current scope.
//!
//! Ok(())
//! }
//!
//! async fn read(reader: ZipFileReader, index: usize) -> Result<Vec<u8>> {
//! let mut entry = reader.reader_without_entry(index).await?;
//! let mut data = Vec::new();
//! entry.read_to_end(&mut data).await?;
//! Ok(data)
//! }
//! ```
#[cfg(doc)]
use crate::base::read::seek;
use crate::base::read::io::entry::ZipEntryReader;
use crate::error::{Result, ZipError};
use crate::file::ZipFile;
use std::sync::Arc;
use futures_lite::io::Cursor;
use super::io::entry::{WithEntry, WithoutEntry};
struct Inner {
data: Vec<u8>,
file: ZipFile,
}
// A concurrent ZIP reader which acts over an owned vector of bytes.
#[derive(Clone)]
pub struct ZipFileReader {
inner: Arc<Inner>,
}
impl ZipFileReader {
/// Constructs a new ZIP reader from an owned vector of bytes.
pub async fn new(data: Vec<u8>) -> Result<ZipFileReader> {
let file = crate::base::read::file(Cursor::new(&data)).await?;
Ok(ZipFileReader::from_raw_parts(data, file))
}
/// Constructs a ZIP reader from an owned vector of bytes and ZIP file information derived from those bytes.
///
/// Providing a [`ZipFile`] that wasn't derived from those bytes may lead to inaccurate parsing.
pub fn from_raw_parts(data: Vec<u8>, file: ZipFile) -> ZipFileReader {
ZipFileReader { inner: Arc::new(Inner { data, file }) }
}
/// Returns this ZIP file's information.
pub fn file(&self) -> &ZipFile {
&self.inner.file
}
/// Returns the raw bytes provided to the reader during construction.
pub fn data(&self) -> &[u8] {
&self.inner.data
}
/// Returns a new entry reader if the provided index is valid.
pub async fn reader_without_entry(&self, index: usize) -> Result<ZipEntryReader<Cursor<&[u8]>, WithoutEntry>> {
let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;
let mut cursor = Cursor::new(&self.inner.data[..]);
stored_entry.seek_to_data_offset(&mut cursor).await?;
Ok(ZipEntryReader::new_with_owned(
cursor,
stored_entry.entry.compression(),
stored_entry.entry.compressed_size(),
))
}
/// Returns a new entry reader if the provided index is valid.
pub async fn reader_with_entry(&self, index: usize) -> Result<ZipEntryReader<Cursor<&[u8]>, WithEntry<'_>>> {
let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;
let mut cursor = Cursor::new(&self.inner.data[..]);
stored_entry.seek_to_data_offset(&mut cursor).await?;
let reader = ZipEntryReader::new_with_owned(
cursor,
stored_entry.entry.compression(),
stored_entry.entry.compressed_size(),
);
Ok(reader.into_with_entry(stored_entry))
}
}

View file

@ -0,0 +1,320 @@
// Copyright (c) 2022-2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A module which supports reading ZIP files.
pub mod mem;
pub mod seek;
pub mod stream;
pub(crate) mod io;
use crate::ZipString;
// Re-exported as part of the public API.
pub use crate::base::read::io::entry::WithEntry;
pub use crate::base::read::io::entry::WithoutEntry;
pub use crate::base::read::io::entry::ZipEntryReader;
use crate::date::ZipDateTime;
use crate::entry::{StoredZipEntry, ZipEntry};
use crate::error::{Result, ZipError};
use crate::file::ZipFile;
use crate::spec::attribute::AttributeCompatibility;
use crate::spec::consts::LFH_LENGTH;
use crate::spec::consts::{CDH_SIGNATURE, LFH_SIGNATURE, NON_ZIP64_MAX_SIZE, SIGNATURE_LENGTH, ZIP64_EOCDL_LENGTH};
use crate::spec::header::InfoZipUnicodeCommentExtraField;
use crate::spec::header::InfoZipUnicodePathExtraField;
use crate::spec::header::{
CentralDirectoryRecord, EndOfCentralDirectoryHeader, ExtraField, LocalFileHeader,
Zip64EndOfCentralDirectoryLocator, Zip64EndOfCentralDirectoryRecord, Zip64ExtendedInformationExtraField,
};
use crate::spec::Compression;
use crate::string::StringEncoding;
use crate::base::read::io::CombinedCentralDirectoryRecord;
use crate::spec::parse::parse_extra_fields;
use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom};
pub(crate) async fn file<R>(mut reader: R) -> Result<ZipFile>
where
R: AsyncRead + AsyncSeek + Unpin,
{
// First find and parse the EOCDR.
let eocdr_offset = crate::base::read::io::locator::eocdr(&mut reader).await?;
reader.seek(SeekFrom::Start(eocdr_offset)).await?;
let eocdr = EndOfCentralDirectoryHeader::from_reader(&mut reader).await?;
let comment = io::read_string(&mut reader, eocdr.file_comm_length.into(), crate::StringEncoding::Utf8).await?;
// Check the 20 bytes before the EOCDR for the Zip64 EOCDL, plus an extra 4 bytes because the offset
// does not include the signature. If the ECODL exists we are dealing with a Zip64 file.
let (eocdr, zip64) = match eocdr_offset.checked_sub(ZIP64_EOCDL_LENGTH + SIGNATURE_LENGTH as u64) {
None => (CombinedCentralDirectoryRecord::from(&eocdr), false),
Some(offset) => {
reader.seek(SeekFrom::Start(offset)).await?;
let zip64_locator = Zip64EndOfCentralDirectoryLocator::try_from_reader(&mut reader).await?;
match zip64_locator {
Some(locator) => {
reader.seek(SeekFrom::Start(locator.relative_offset + SIGNATURE_LENGTH as u64)).await?;
let zip64_eocdr = Zip64EndOfCentralDirectoryRecord::from_reader(&mut reader).await?;
(CombinedCentralDirectoryRecord::combine(eocdr, zip64_eocdr), true)
}
None => (CombinedCentralDirectoryRecord::from(&eocdr), false),
}
}
};
// Outdated feature so unlikely to ever make it into this crate.
if eocdr.disk_number != eocdr.disk_number_start_of_cd
|| eocdr.num_entries_in_directory != eocdr.num_entries_in_directory_on_disk
{
return Err(ZipError::FeatureNotSupported("Spanned/split files"));
}
// Find and parse the central directory.
reader.seek(SeekFrom::Start(eocdr.offset_of_start_of_directory)).await?;
let entries = crate::base::read::cd(reader, eocdr.num_entries_in_directory, zip64).await?;
Ok(ZipFile { entries, comment, zip64 })
}
pub(crate) async fn cd<R>(mut reader: R, num_of_entries: u64, zip64: bool) -> Result<Vec<StoredZipEntry>>
where
R: AsyncRead + Unpin,
{
let num_of_entries = num_of_entries.try_into().map_err(|_| ZipError::TargetZip64NotSupported)?;
let mut entries = Vec::with_capacity(num_of_entries);
for _ in 0..num_of_entries {
let entry = cd_record(&mut reader, zip64).await?;
entries.push(entry);
}
Ok(entries)
}
pub(crate) fn get_zip64_extra_field(extra_fields: &[ExtraField]) -> Option<&Zip64ExtendedInformationExtraField> {
for field in extra_fields {
if let ExtraField::Zip64ExtendedInformation(zip64field) = field {
return Some(zip64field);
}
}
None
}
pub(crate) fn get_zip64_extra_field_mut(
extra_fields: &mut [ExtraField],
) -> Option<&mut Zip64ExtendedInformationExtraField> {
for field in extra_fields {
if let ExtraField::Zip64ExtendedInformation(zip64field) = field {
return Some(zip64field);
}
}
None
}
fn get_combined_sizes(
uncompressed_size: u32,
compressed_size: u32,
extra_field: &Option<&Zip64ExtendedInformationExtraField>,
) -> Result<(u64, u64)> {
let mut uncompressed_size = uncompressed_size as u64;
let mut compressed_size = compressed_size as u64;
if let Some(extra_field) = extra_field {
if let Some(s) = extra_field.uncompressed_size {
uncompressed_size = s;
}
if let Some(s) = extra_field.compressed_size {
compressed_size = s;
}
}
Ok((uncompressed_size, compressed_size))
}
pub(crate) async fn cd_record<R>(mut reader: R, _zip64: bool) -> Result<StoredZipEntry>
where
R: AsyncRead + Unpin,
{
crate::utils::assert_signature(&mut reader, CDH_SIGNATURE).await?;
let header = CentralDirectoryRecord::from_reader(&mut reader).await?;
let header_size = (SIGNATURE_LENGTH + LFH_LENGTH) as u64;
let trailing_size = header.file_name_length as u64 + header.extra_field_length as u64;
let filename_basic = io::read_bytes(&mut reader, header.file_name_length.into()).await?;
let compression = Compression::try_from(header.compression)?;
let extra_field = io::read_bytes(&mut reader, header.extra_field_length.into()).await?;
let extra_fields = parse_extra_fields(extra_field, header.uncompressed_size, header.compressed_size)?;
let comment_basic = io::read_bytes(reader, header.file_comment_length.into()).await?;
let zip64_extra_field = get_zip64_extra_field(&extra_fields);
let (uncompressed_size, compressed_size) =
get_combined_sizes(header.uncompressed_size, header.compressed_size, &zip64_extra_field)?;
let mut file_offset = header.lh_offset as u64;
if let Some(zip64_extra_field) = zip64_extra_field {
if file_offset == NON_ZIP64_MAX_SIZE as u64 {
if let Some(offset) = zip64_extra_field.relative_header_offset {
file_offset = offset;
}
}
}
let filename = detect_filename(filename_basic, header.flags.filename_unicode, extra_fields.as_ref());
let comment = detect_comment(comment_basic, header.flags.filename_unicode, extra_fields.as_ref());
let entry = ZipEntry {
filename,
compression,
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
compression_level: async_compression::Level::Default,
attribute_compatibility: AttributeCompatibility::Unix,
// FIXME: Default to Unix for the moment
crc32: header.crc,
uncompressed_size,
compressed_size,
last_modification_date: ZipDateTime { date: header.mod_date, time: header.mod_time },
internal_file_attribute: header.inter_attr,
external_file_attribute: header.exter_attr,
extra_fields,
comment,
data_descriptor: header.flags.data_descriptor,
};
Ok(StoredZipEntry { entry, file_offset, header_size: header_size + trailing_size })
}
pub(crate) async fn lfh<R>(mut reader: R) -> Result<Option<ZipEntry>>
where
R: AsyncRead + Unpin,
{
let signature = {
let mut buffer = [0; 4];
reader.read_exact(&mut buffer).await?;
u32::from_le_bytes(buffer)
};
match signature {
actual if actual == LFH_SIGNATURE => (),
actual if actual == CDH_SIGNATURE => return Ok(None),
actual => return Err(ZipError::UnexpectedHeaderError(actual, LFH_SIGNATURE)),
};
let header = LocalFileHeader::from_reader(&mut reader).await?;
let filename_basic = io::read_bytes(&mut reader, header.file_name_length.into()).await?;
let compression = Compression::try_from(header.compression)?;
let extra_field = io::read_bytes(&mut reader, header.extra_field_length.into()).await?;
let extra_fields = parse_extra_fields(extra_field, header.uncompressed_size, header.compressed_size)?;
let zip64_extra_field = get_zip64_extra_field(&extra_fields);
let (uncompressed_size, compressed_size) =
get_combined_sizes(header.uncompressed_size, header.compressed_size, &zip64_extra_field)?;
if header.flags.data_descriptor && compression == Compression::Stored {
return Err(ZipError::FeatureNotSupported(
"stream reading entries with data descriptors & Stored compression mode",
));
}
if header.flags.encrypted {
return Err(ZipError::FeatureNotSupported("encryption"));
}
let filename = detect_filename(filename_basic, header.flags.filename_unicode, extra_fields.as_ref());
let entry = ZipEntry {
filename,
compression,
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
compression_level: async_compression::Level::Default,
attribute_compatibility: AttributeCompatibility::Unix,
// FIXME: Default to Unix for the moment
crc32: header.crc,
uncompressed_size,
compressed_size,
last_modification_date: ZipDateTime { date: header.mod_date, time: header.mod_time },
internal_file_attribute: 0,
external_file_attribute: 0,
extra_fields,
comment: String::new().into(),
data_descriptor: header.flags.data_descriptor,
};
Ok(Some(entry))
}
fn detect_comment(basic: Vec<u8>, basic_is_utf8: bool, extra_fields: &[ExtraField]) -> ZipString {
if basic_is_utf8 {
ZipString::new(basic, StringEncoding::Utf8)
} else {
let unicode_extra = extra_fields.iter().find_map(|field| match field {
ExtraField::InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField::V1 { crc32, unicode }) => {
if *crc32 == crc32fast::hash(&basic) {
Some(std::string::String::from_utf8(unicode.clone()))
} else {
None
}
}
_ => None,
});
if let Some(Ok(s)) = unicode_extra {
ZipString::new_with_alternative(s, basic)
} else {
// Do not treat as UTF-8 if UTF-8 flags are not set,
// some string in MBCS may be valid UTF-8 in form, but they are not in truth.
if basic.is_ascii() {
// SAFETY:
// a valid ASCII string is always a valid UTF-8 string
unsafe { std::string::String::from_utf8_unchecked(basic).into() }
} else {
ZipString::new(basic, StringEncoding::Raw)
}
}
}
}
fn detect_filename(basic: Vec<u8>, basic_is_utf8: bool, extra_fields: &[ExtraField]) -> ZipString {
if basic_is_utf8 {
ZipString::new(basic, StringEncoding::Utf8)
} else {
let unicode_extra = extra_fields.iter().find_map(|field| match field {
ExtraField::InfoZipUnicodePath(InfoZipUnicodePathExtraField::V1 { crc32, unicode }) => {
if *crc32 == crc32fast::hash(&basic) {
Some(std::string::String::from_utf8(unicode.clone()))
} else {
None
}
}
_ => None,
});
if let Some(Ok(s)) = unicode_extra {
ZipString::new_with_alternative(s, basic)
} else {
// Do not treat as UTF-8 if UTF-8 flags are not set,
// some string in MBCS may be valid UTF-8 in form, but they are not in truth.
if basic.is_ascii() {
// SAFETY:
// a valid ASCII string is always a valid UTF-8 string
unsafe { std::string::String::from_utf8_unchecked(basic).into() }
} else {
ZipString::new(basic, StringEncoding::Raw)
}
}
}
}

View file

@ -0,0 +1,140 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A ZIP reader which acts over a seekable source.
//!
//! ### Example
//! ```no_run
//! # use async_zip::base::read::seek::ZipFileReader;
//! # use async_zip::error::Result;
//! # use futures_lite::io::AsyncReadExt;
//! # use tokio::fs::File;
//! # use tokio_util::compat::TokioAsyncReadCompatExt;
//! # use tokio::io::BufReader;
//! #
//! async fn run() -> Result<()> {
//! let mut data = BufReader::new(File::open("./foo.zip").await?);
//! let mut reader = ZipFileReader::new(data.compat()).await?;
//!
//! let mut data = Vec::new();
//! let mut entry = reader.reader_without_entry(0).await?;
//! entry.read_to_end(&mut data).await?;
//!
//! // Use data within current scope.
//!
//! Ok(())
//! }
//! ```
use crate::base::read::io::entry::ZipEntryReader;
use crate::error::{Result, ZipError};
use crate::file::ZipFile;
#[cfg(feature = "tokio")]
use crate::tokio::read::seek::ZipFileReader as TokioZipFileReader;
use futures_lite::io::{AsyncBufRead, AsyncSeek};
#[cfg(feature = "tokio")]
use tokio_util::compat::{Compat, TokioAsyncReadCompatExt};
use super::io::entry::{WithEntry, WithoutEntry};
/// A ZIP reader which acts over a seekable source.
#[derive(Clone)]
pub struct ZipFileReader<R> {
reader: R,
file: ZipFile,
}
impl<R> ZipFileReader<R>
where
R: AsyncBufRead + AsyncSeek + Unpin,
{
/// Constructs a new ZIP reader from a seekable source.
pub async fn new(mut reader: R) -> Result<ZipFileReader<R>> {
let file = crate::base::read::file(&mut reader).await?;
Ok(ZipFileReader::from_raw_parts(reader, file))
}
/// Constructs a ZIP reader from a seekable source and ZIP file information derived from that source.
///
/// Providing a [`ZipFile`] that wasn't derived from that source may lead to inaccurate parsing.
pub fn from_raw_parts(reader: R, file: ZipFile) -> ZipFileReader<R> {
ZipFileReader { reader, file }
}
/// Returns this ZIP file's information.
pub fn file(&self) -> &ZipFile {
&self.file
}
/// Returns a mutable reference to the inner seekable source.
///
/// Swapping the source (eg. via std::mem operations) may lead to inaccurate parsing.
pub fn inner_mut(&mut self) -> &mut R {
&mut self.reader
}
/// Returns the inner seekable source by consuming self.
pub fn into_inner(self) -> R {
self.reader
}
/// Returns a new entry reader if the provided index is valid.
pub async fn reader_without_entry(&mut self, index: usize) -> Result<ZipEntryReader<'_, R, WithoutEntry>> {
let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;
stored_entry.seek_to_data_offset(&mut self.reader).await?;
Ok(ZipEntryReader::new_with_borrow(
&mut self.reader,
stored_entry.entry.compression(),
stored_entry.entry.compressed_size(),
))
}
/// Returns a new entry reader if the provided index is valid.
pub async fn reader_with_entry(&mut self, index: usize) -> Result<ZipEntryReader<'_, R, WithEntry<'_>>> {
let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;
stored_entry.seek_to_data_offset(&mut self.reader).await?;
let reader = ZipEntryReader::new_with_borrow(
&mut self.reader,
stored_entry.entry.compression(),
stored_entry.entry.compressed_size(),
);
Ok(reader.into_with_entry(stored_entry))
}
/// Returns a new entry reader if the provided index is valid.
/// Consumes self
pub async fn into_entry<'a>(mut self, index: usize) -> Result<ZipEntryReader<'a, R, WithoutEntry>>
where
R: 'a,
{
let stored_entry = self.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;
stored_entry.seek_to_data_offset(&mut self.reader).await?;
Ok(ZipEntryReader::new_with_owned(
self.reader,
stored_entry.entry.compression(),
stored_entry.entry.compressed_size(),
))
}
}
#[cfg(feature = "tokio")]
impl<R> ZipFileReader<Compat<R>>
where
R: tokio::io::AsyncBufRead + tokio::io::AsyncSeek + Unpin,
{
/// Constructs a new tokio-specific ZIP reader from a seekable source.
pub async fn with_tokio(reader: R) -> Result<TokioZipFileReader<R>> {
let mut reader = reader.compat();
let file = crate::base::read::file(&mut reader).await?;
Ok(ZipFileReader::from_raw_parts(reader, file))
}
}

View file

@ -0,0 +1,174 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A ZIP reader which acts over a non-seekable source.
//!
//! # API Design
//! As opposed to other readers provided by this crate, it's important that the data of an entry is fully read before
//! the proceeding entry is read. This is as a result of not being able to seek forwards or backwards, so we must end
//! up at the start of the next entry.
//!
//! **We encode this invariant within Rust's type system so that it can be enforced at compile time.**
//!
//! This requires that any transition methods between these encoded types consume the reader and provide a new owned
//! reader back. This is certainly something to keep in mind when working with this reader, but idiomatic code can
//! still be produced nevertheless.
//!
//! # Considerations
//! As the central directory of a ZIP archive is stored at the end of it, a non-seekable reader doesn't have access
//! to it. We have to rely on information provided within the local file header which may not be accurate or complete.
//! This results in:
//! - The inability to read ZIP entries using the combination of a data descriptor and the Stored compression method.
//! - No file comment being available (defaults to an empty string).
//! - No internal or external file attributes being available (defaults to 0).
//! - The extra field data potentially being inconsistent with what's stored in the central directory.
//! - None of the following being available when the entry was written with a data descriptor (defaults to 0):
//! - CRC
//! - compressed size
//! - uncompressed size
//!
//! # Example
//! ```no_run
//! # use futures_lite::io::Cursor;
//! # use async_zip::error::Result;
//! # use async_zip::base::read::stream::ZipFileReader;
//! #
//! # async fn run() -> Result<()> {
//! let mut zip = ZipFileReader::new(Cursor::new([0; 0]));
//!
//! // Print the name of every file in a ZIP archive.
//! while let Some(entry) = zip.next_with_entry().await? {
//! println!("File: {}", entry.reader().entry().filename().as_str().unwrap());
//! zip = entry.skip().await?;
//! }
//! #
//! # Ok(())
//! # }
//! ```
use super::io::ConsumeDataDescriptor;
use crate::base::read::io::entry::ZipEntryReader;
use crate::error::Result;
use crate::error::ZipError;
#[cfg(feature = "tokio")]
use crate::tokio::read::stream::Ready as TokioReady;
use futures_lite::io::AsyncBufRead;
use futures_lite::io::AsyncReadExt;
#[cfg(feature = "tokio")]
use tokio_util::compat::TokioAsyncReadCompatExt;
use super::io::entry::WithEntry;
use super::io::entry::WithoutEntry;
/// A type which encodes that [`ZipFileReader`] is ready to open a new entry.
pub struct Ready<R>(R);
/// A type which encodes that [`ZipFileReader`] is currently reading an entry.
pub struct Reading<'a, R, E>(ZipEntryReader<'a, R, E>, bool);
/// A ZIP reader which acts over a non-seekable source.
///
/// See the [module-level docs](.) for more information.
#[derive(Clone)]
pub struct ZipFileReader<S>(S);
impl<'a, R> ZipFileReader<Ready<R>>
where
R: AsyncBufRead + Unpin + 'a,
{
/// Constructs a new ZIP reader from a non-seekable source.
pub fn new(reader: R) -> Self {
Self(Ready(reader))
}
/// Opens the next entry for reading if the central directory hasnt yet been reached.
pub async fn next_without_entry(mut self) -> Result<Option<ZipFileReader<Reading<'a, R, WithoutEntry>>>> {
let entry = match crate::base::read::lfh(&mut self.0 .0).await? {
Some(entry) => entry,
None => return Ok(None),
};
let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size };
let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length);
Ok(Some(ZipFileReader(Reading(reader, entry.data_descriptor))))
}
/// Opens the next entry for reading if the central directory hasnt yet been reached.
pub async fn next_with_entry(mut self) -> Result<Option<ZipFileReader<Reading<'a, R, WithEntry<'a>>>>> {
let entry = match crate::base::read::lfh(&mut self.0 .0).await? {
Some(entry) => entry,
None => return Ok(None),
};
let length = if entry.data_descriptor { u64::MAX } else { entry.compressed_size };
let reader = ZipEntryReader::new_with_owned(self.0 .0, entry.compression, length);
let data_descriptor = entry.data_descriptor;
Ok(Some(ZipFileReader(Reading(reader.into_with_entry_owned(entry), data_descriptor))))
}
/// Consumes the `ZipFileReader` returning the original `reader`
pub async fn into_inner(self) -> R {
self.0 .0
}
}
#[cfg(feature = "tokio")]
impl<R> ZipFileReader<TokioReady<R>>
where
R: tokio::io::AsyncBufRead + Unpin,
{
/// Constructs a new tokio-specific ZIP reader from a non-seekable source.
pub fn with_tokio(reader: R) -> ZipFileReader<TokioReady<R>> {
Self(Ready(reader.compat()))
}
}
impl<'a, R, E> ZipFileReader<Reading<'a, R, E>>
where
R: AsyncBufRead + Unpin,
{
/// Returns an immutable reference to the inner entry reader.
pub fn reader(&self) -> &ZipEntryReader<'a, R, E> {
&self.0 .0
}
/// Returns a mutable reference to the inner entry reader.
pub fn reader_mut(&mut self) -> &mut ZipEntryReader<'a, R, E> {
&mut self.0 .0
}
/// Converts the reader back into the Ready state if EOF has been reached.
pub async fn done(mut self) -> Result<ZipFileReader<Ready<R>>> {
if self.0 .0.read(&mut [0; 1]).await? != 0 {
return Err(ZipError::EOFNotReached);
}
let mut inner = self.0 .0.into_inner();
// Has data descriptor.
if self.0 .1 {
ConsumeDataDescriptor(&mut inner).await?;
}
Ok(ZipFileReader(Ready(inner)))
}
/// Reads until EOF and converts the reader back into the Ready state.
pub async fn skip(mut self) -> Result<ZipFileReader<Ready<R>>> {
while self.0 .0.read(&mut [0; 2048]).await? != 0 {}
let mut inner = self.0 .0.into_inner();
// Has data descriptor.
if self.0 .1 {
ConsumeDataDescriptor(&mut inner).await?;
}
Ok(ZipFileReader(Ready(inner)))
}
}

View file

@ -0,0 +1,137 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::base::write::io::offset::AsyncOffsetWriter;
use crate::spec::Compression;
use std::io::Error;
use std::pin::Pin;
use std::task::{Context, Poll};
#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))]
use async_compression::futures::write;
use futures_lite::io::AsyncWrite;
pub enum CompressedAsyncWriter<'b, W: AsyncWrite + Unpin> {
Stored(ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter<W>>),
#[cfg(feature = "deflate")]
Deflate(write::DeflateEncoder<ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter<W>>>),
#[cfg(feature = "bzip2")]
Bz(write::BzEncoder<ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter<W>>>),
#[cfg(feature = "lzma")]
Lzma(write::LzmaEncoder<ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter<W>>>),
#[cfg(feature = "zstd")]
Zstd(write::ZstdEncoder<ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter<W>>>),
#[cfg(feature = "xz")]
Xz(write::XzEncoder<ShutdownIgnoredWriter<&'b mut AsyncOffsetWriter<W>>>),
}
impl<'b, W: AsyncWrite + Unpin> CompressedAsyncWriter<'b, W> {
pub fn from_raw(writer: &'b mut AsyncOffsetWriter<W>, compression: Compression) -> Self {
match compression {
Compression::Stored => CompressedAsyncWriter::Stored(ShutdownIgnoredWriter(writer)),
#[cfg(feature = "deflate")]
Compression::Deflate => {
CompressedAsyncWriter::Deflate(write::DeflateEncoder::new(ShutdownIgnoredWriter(writer)))
}
#[cfg(feature = "deflate64")]
Compression::Deflate64 => panic!("writing deflate64 is not supported"),
#[cfg(feature = "bzip2")]
Compression::Bz => CompressedAsyncWriter::Bz(write::BzEncoder::new(ShutdownIgnoredWriter(writer))),
#[cfg(feature = "lzma")]
Compression::Lzma => CompressedAsyncWriter::Lzma(write::LzmaEncoder::new(ShutdownIgnoredWriter(writer))),
#[cfg(feature = "zstd")]
Compression::Zstd => CompressedAsyncWriter::Zstd(write::ZstdEncoder::new(ShutdownIgnoredWriter(writer))),
#[cfg(feature = "xz")]
Compression::Xz => CompressedAsyncWriter::Xz(write::XzEncoder::new(ShutdownIgnoredWriter(writer))),
}
}
pub fn into_inner(self) -> &'b mut AsyncOffsetWriter<W> {
match self {
CompressedAsyncWriter::Stored(inner) => inner.into_inner(),
#[cfg(feature = "deflate")]
CompressedAsyncWriter::Deflate(inner) => inner.into_inner().into_inner(),
#[cfg(feature = "bzip2")]
CompressedAsyncWriter::Bz(inner) => inner.into_inner().into_inner(),
#[cfg(feature = "lzma")]
CompressedAsyncWriter::Lzma(inner) => inner.into_inner().into_inner(),
#[cfg(feature = "zstd")]
CompressedAsyncWriter::Zstd(inner) => inner.into_inner().into_inner(),
#[cfg(feature = "xz")]
CompressedAsyncWriter::Xz(inner) => inner.into_inner().into_inner(),
}
}
}
impl<'b, W: AsyncWrite + Unpin> AsyncWrite for CompressedAsyncWriter<'b, W> {
fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<std::result::Result<usize, Error>> {
match *self {
CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_write(cx, buf),
#[cfg(feature = "deflate")]
CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_write(cx, buf),
#[cfg(feature = "bzip2")]
CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_write(cx, buf),
#[cfg(feature = "lzma")]
CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_write(cx, buf),
#[cfg(feature = "zstd")]
CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_write(cx, buf),
#[cfg(feature = "xz")]
CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_write(cx, buf),
}
}
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
match *self {
CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_flush(cx),
#[cfg(feature = "deflate")]
CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_flush(cx),
#[cfg(feature = "bzip2")]
CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_flush(cx),
#[cfg(feature = "lzma")]
CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_flush(cx),
#[cfg(feature = "zstd")]
CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_flush(cx),
#[cfg(feature = "xz")]
CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_flush(cx),
}
}
fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
match *self {
CompressedAsyncWriter::Stored(ref mut inner) => Pin::new(inner).poll_close(cx),
#[cfg(feature = "deflate")]
CompressedAsyncWriter::Deflate(ref mut inner) => Pin::new(inner).poll_close(cx),
#[cfg(feature = "bzip2")]
CompressedAsyncWriter::Bz(ref mut inner) => Pin::new(inner).poll_close(cx),
#[cfg(feature = "lzma")]
CompressedAsyncWriter::Lzma(ref mut inner) => Pin::new(inner).poll_close(cx),
#[cfg(feature = "zstd")]
CompressedAsyncWriter::Zstd(ref mut inner) => Pin::new(inner).poll_close(cx),
#[cfg(feature = "xz")]
CompressedAsyncWriter::Xz(ref mut inner) => Pin::new(inner).poll_close(cx),
}
}
}
pub struct ShutdownIgnoredWriter<W: AsyncWrite + Unpin>(W);
impl<W: AsyncWrite + Unpin> ShutdownIgnoredWriter<W> {
pub fn into_inner(self) -> W {
self.0
}
}
impl<W: AsyncWrite + Unpin> AsyncWrite for ShutdownIgnoredWriter<W> {
fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<std::result::Result<usize, Error>> {
Pin::new(&mut self.0).poll_write(cx, buf)
}
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
Pin::new(&mut self.0).poll_flush(cx)
}
fn poll_close(self: Pin<&mut Self>, _: &mut Context) -> Poll<std::result::Result<(), Error>> {
Poll::Ready(Ok(()))
}
}

View file

@ -0,0 +1,272 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::base::write::compressed_writer::CompressedAsyncWriter;
use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut;
use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut;
use crate::base::write::io::offset::AsyncOffsetWriter;
use crate::base::write::CentralDirectoryEntry;
use crate::base::write::ZipFileWriter;
use crate::entry::ZipEntry;
use crate::error::{Result, Zip64ErrorCase, ZipError};
use crate::spec::extra_field::ExtraFieldAsBytes;
use crate::spec::header::InfoZipUnicodeCommentExtraField;
use crate::spec::header::InfoZipUnicodePathExtraField;
use crate::spec::header::{
CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, HeaderId, LocalFileHeader,
Zip64ExtendedInformationExtraField,
};
use crate::string::StringEncoding;
use std::io::Error;
use std::pin::Pin;
use std::task::{Context, Poll};
use crate::base::read::get_zip64_extra_field_mut;
use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE};
use crc32fast::Hasher;
use futures_lite::io::{AsyncWrite, AsyncWriteExt};
/// An entry writer which supports the streaming of data (ie. the writing of unknown size or data at runtime).
///
/// # Note
/// - This writer cannot be manually constructed; instead, use [`ZipFileWriter::write_entry_stream()`].
/// - [`EntryStreamWriter::close()`] must be called before a stream writer goes out of scope.
/// - Utilities for working with [`AsyncWrite`] values are provided by [`AsyncWriteExt`].
pub struct EntryStreamWriter<'b, W: AsyncWrite + Unpin> {
writer: AsyncOffsetWriter<CompressedAsyncWriter<'b, W>>,
cd_entries: &'b mut Vec<CentralDirectoryEntry>,
entry: ZipEntry,
hasher: Hasher,
lfh: LocalFileHeader,
lfh_offset: u64,
data_offset: u64,
force_no_zip64: bool,
/// To write back to the original writer if zip64 is required.
is_zip64: &'b mut bool,
}
impl<'b, W: AsyncWrite + Unpin> EntryStreamWriter<'b, W> {
pub(crate) async fn from_raw(
writer: &'b mut ZipFileWriter<W>,
mut entry: ZipEntry,
) -> Result<EntryStreamWriter<'b, W>> {
let lfh_offset = writer.writer.offset();
let lfh = EntryStreamWriter::write_lfh(writer, &mut entry).await?;
let data_offset = writer.writer.offset();
let force_no_zip64 = writer.force_no_zip64;
let cd_entries = &mut writer.cd_entries;
let is_zip64 = &mut writer.is_zip64;
let writer = AsyncOffsetWriter::new(CompressedAsyncWriter::from_raw(&mut writer.writer, entry.compression()));
Ok(EntryStreamWriter {
writer,
cd_entries,
entry,
lfh,
lfh_offset,
data_offset,
hasher: Hasher::new(),
force_no_zip64,
is_zip64,
})
}
async fn write_lfh(writer: &'b mut ZipFileWriter<W>, entry: &mut ZipEntry) -> Result<LocalFileHeader> {
// Always emit a zip64 extended field, even if we don't need it, because we *might* need it.
// If we are forcing no zip, we will have to error later if the file is too large.
let (lfh_compressed, lfh_uncompressed) = if !writer.force_no_zip64 {
if !writer.is_zip64 {
writer.is_zip64 = true;
}
entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(Zip64ExtendedInformationExtraField {
header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD,
uncompressed_size: Some(entry.uncompressed_size),
compressed_size: Some(entry.compressed_size),
relative_header_offset: None,
disk_start_number: None,
}));
(NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE)
} else {
if entry.compressed_size > NON_ZIP64_MAX_SIZE as u64 || entry.uncompressed_size > NON_ZIP64_MAX_SIZE as u64
{
return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
}
(entry.compressed_size as u32, entry.uncompressed_size as u32)
};
let utf8_without_alternative =
entry.filename().is_utf8_without_alternative() && entry.comment().is_utf8_without_alternative();
if !utf8_without_alternative {
if matches!(entry.filename().encoding(), StringEncoding::Utf8) {
let u_file_name = entry.filename().as_bytes().to_vec();
if !u_file_name.is_empty() {
let basic_crc32 =
crc32fast::hash(entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes()));
let upath_field = get_or_put_info_zip_unicode_path_extra_field_mut(entry.extra_fields.as_mut());
if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field {
*crc32 = basic_crc32;
*unicode = u_file_name;
}
}
}
if matches!(entry.comment().encoding(), StringEncoding::Utf8) {
let u_comment = entry.comment().as_bytes().to_vec();
if !u_comment.is_empty() {
let basic_crc32 =
crc32fast::hash(entry.comment().alternative().unwrap_or_else(|| entry.comment().as_bytes()));
let ucom_field = get_or_put_info_zip_unicode_comment_extra_field_mut(entry.extra_fields.as_mut());
if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field {
*crc32 = basic_crc32;
*unicode = u_comment;
}
}
}
}
let filename_basic = entry.filename().alternative().unwrap_or_else(|| entry.filename().as_bytes());
let lfh = LocalFileHeader {
compressed_size: lfh_compressed,
uncompressed_size: lfh_uncompressed,
compression: entry.compression().into(),
crc: entry.crc32,
extra_field_length: entry
.extra_fields()
.count_bytes()
.try_into()
.map_err(|_| ZipError::ExtraFieldTooLarge)?,
file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?,
mod_time: entry.last_modification_date().time,
mod_date: entry.last_modification_date().date,
version: crate::spec::version::as_needed_to_extract(entry),
flags: GeneralPurposeFlag {
data_descriptor: true,
encrypted: false,
filename_unicode: utf8_without_alternative,
},
};
writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?;
writer.writer.write_all(&lfh.as_slice()).await?;
writer.writer.write_all(filename_basic).await?;
writer.writer.write_all(&entry.extra_fields().as_bytes()).await?;
Ok(lfh)
}
/// Consumes this entry writer and completes all closing tasks.
///
/// This includes:
/// - Finalising the CRC32 hash value for the written data.
/// - Calculating the compressed and uncompressed byte sizes.
/// - Constructing a central directory header.
/// - Pushing that central directory header to the [`ZipFileWriter`]'s store.
///
/// Failure to call this function before going out of scope would result in a corrupted ZIP file.
pub async fn close(mut self) -> Result<()> {
self.writer.close().await?;
let crc = self.hasher.finalize();
let uncompressed_size = self.writer.offset();
let inner_writer = self.writer.into_inner().into_inner();
let compressed_size = inner_writer.offset() - self.data_offset;
let (cdr_compressed_size, cdr_uncompressed_size, lh_offset) = if self.force_no_zip64 {
if uncompressed_size > NON_ZIP64_MAX_SIZE as u64
|| compressed_size > NON_ZIP64_MAX_SIZE as u64
|| self.lfh_offset > NON_ZIP64_MAX_SIZE as u64
{
return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
}
(uncompressed_size as u32, compressed_size as u32, self.lfh_offset as u32)
} else {
// When streaming an entry, we are always using a zip64 field.
match get_zip64_extra_field_mut(&mut self.entry.extra_fields) {
// This case shouldn't be necessary but is included for completeness.
None => {
self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(
Zip64ExtendedInformationExtraField {
header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD,
uncompressed_size: Some(uncompressed_size),
compressed_size: Some(compressed_size),
relative_header_offset: Some(self.lfh_offset),
disk_start_number: None,
},
));
}
Some(zip64) => {
zip64.uncompressed_size = Some(uncompressed_size);
zip64.compressed_size = Some(compressed_size);
zip64.relative_header_offset = Some(self.lfh_offset);
}
}
self.lfh.extra_field_length =
self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?;
(NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE)
};
inner_writer.write_all(&crate::spec::consts::DATA_DESCRIPTOR_SIGNATURE.to_le_bytes()).await?;
inner_writer.write_all(&crc.to_le_bytes()).await?;
inner_writer.write_all(&cdr_compressed_size.to_le_bytes()).await?;
inner_writer.write_all(&cdr_uncompressed_size.to_le_bytes()).await?;
let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes());
let cdh = CentralDirectoryRecord {
compressed_size: cdr_compressed_size,
uncompressed_size: cdr_uncompressed_size,
crc,
v_made_by: crate::spec::version::as_made_by(),
v_needed: self.lfh.version,
compression: self.lfh.compression,
extra_field_length: self.lfh.extra_field_length,
file_name_length: self.lfh.file_name_length,
file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?,
mod_time: self.lfh.mod_time,
mod_date: self.lfh.mod_date,
flags: self.lfh.flags,
disk_start: 0,
inter_attr: self.entry.internal_file_attribute(),
exter_attr: self.entry.external_file_attribute(),
lh_offset,
};
self.cd_entries.push(CentralDirectoryEntry { header: cdh, entry: self.entry });
// Ensure that we can fit this many files in this archive if forcing no zip64
if self.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize {
if self.force_no_zip64 {
return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles));
}
if !*self.is_zip64 {
*self.is_zip64 = true;
}
}
Ok(())
}
}
impl<'a, W: AsyncWrite + Unpin> AsyncWrite for EntryStreamWriter<'a, W> {
fn poll_write(mut self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<std::result::Result<usize, Error>> {
let poll = Pin::new(&mut self.writer).poll_write(cx, buf);
if let Poll::Ready(Ok(written)) = poll {
self.hasher.update(&buf[0..written]);
}
poll
}
fn poll_flush(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
Pin::new(&mut self.writer).poll_flush(cx)
}
fn poll_close(mut self: Pin<&mut Self>, cx: &mut Context) -> Poll<std::result::Result<(), Error>> {
Pin::new(&mut self.writer).poll_close(cx)
}
}

View file

@ -0,0 +1,259 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::base::write::get_or_put_info_zip_unicode_comment_extra_field_mut;
use crate::base::write::get_or_put_info_zip_unicode_path_extra_field_mut;
use crate::base::write::{CentralDirectoryEntry, ZipFileWriter};
use crate::entry::ZipEntry;
use crate::error::{Result, Zip64ErrorCase, ZipError};
use crate::spec::extra_field::Zip64ExtendedInformationExtraFieldBuilder;
use crate::spec::header::{InfoZipUnicodeCommentExtraField, InfoZipUnicodePathExtraField};
use crate::spec::{
extra_field::ExtraFieldAsBytes,
header::{CentralDirectoryRecord, ExtraField, GeneralPurposeFlag, LocalFileHeader},
Compression,
};
use crate::StringEncoding;
#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))]
use futures_lite::io::Cursor;
use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE};
#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))]
use async_compression::futures::write;
use futures_lite::io::{AsyncWrite, AsyncWriteExt};
pub struct EntryWholeWriter<'b, 'c, W: AsyncWrite + Unpin> {
writer: &'b mut ZipFileWriter<W>,
entry: ZipEntry,
data: &'c [u8],
}
impl<'b, 'c, W: AsyncWrite + Unpin> EntryWholeWriter<'b, 'c, W> {
pub fn from_raw(writer: &'b mut ZipFileWriter<W>, entry: ZipEntry, data: &'c [u8]) -> Self {
Self { writer, entry, data }
}
pub async fn write(mut self) -> Result<()> {
let mut _compressed_data: Option<Vec<u8>> = None;
let compressed_data = match self.entry.compression() {
Compression::Stored => self.data,
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
_ => {
_compressed_data =
Some(compress(self.entry.compression(), self.data, self.entry.compression_level).await);
_compressed_data.as_ref().unwrap()
}
};
let mut zip64_extra_field_builder = None;
let (lfh_uncompressed_size, lfh_compressed_size) = if self.data.len() as u64 > NON_ZIP64_MAX_SIZE as u64
|| compressed_data.len() as u64 > NON_ZIP64_MAX_SIZE as u64
{
if self.writer.force_no_zip64 {
return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
}
if !self.writer.is_zip64 {
self.writer.is_zip64 = true;
}
zip64_extra_field_builder = Some(
Zip64ExtendedInformationExtraFieldBuilder::new()
.sizes(compressed_data.len() as u64, self.data.len() as u64),
);
(NON_ZIP64_MAX_SIZE, NON_ZIP64_MAX_SIZE)
} else {
(self.data.len() as u32, compressed_data.len() as u32)
};
let lh_offset = if self.writer.writer.offset() > NON_ZIP64_MAX_SIZE as u64 {
if self.writer.force_no_zip64 {
return Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile));
}
if !self.writer.is_zip64 {
self.writer.is_zip64 = true;
}
if let Some(zip64_extra_field) = zip64_extra_field_builder {
zip64_extra_field_builder = Some(zip64_extra_field.relative_header_offset(self.writer.writer.offset()));
} else {
zip64_extra_field_builder = Some(
Zip64ExtendedInformationExtraFieldBuilder::new()
.relative_header_offset(self.writer.writer.offset()),
);
}
NON_ZIP64_MAX_SIZE
} else {
self.writer.writer.offset() as u32
};
if let Some(builder) = zip64_extra_field_builder {
if !builder.eof_only() {
self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(builder.build()?));
zip64_extra_field_builder = None;
} else {
zip64_extra_field_builder = Some(builder);
}
}
let utf8_without_alternative =
self.entry.filename().is_utf8_without_alternative() && self.entry.comment().is_utf8_without_alternative();
if !utf8_without_alternative {
if matches!(self.entry.filename().encoding(), StringEncoding::Utf8) {
let u_file_name = self.entry.filename().as_bytes().to_vec();
if !u_file_name.is_empty() {
let basic_crc32 = crc32fast::hash(
self.entry.filename().alternative().unwrap_or_else(|| self.entry.filename().as_bytes()),
);
let upath_field =
get_or_put_info_zip_unicode_path_extra_field_mut(self.entry.extra_fields.as_mut());
if let InfoZipUnicodePathExtraField::V1 { crc32, unicode } = upath_field {
*crc32 = basic_crc32;
*unicode = u_file_name;
}
}
}
if matches!(self.entry.comment().encoding(), StringEncoding::Utf8) {
let u_comment = self.entry.comment().as_bytes().to_vec();
if !u_comment.is_empty() {
let basic_crc32 = crc32fast::hash(
self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes()),
);
let ucom_field =
get_or_put_info_zip_unicode_comment_extra_field_mut(self.entry.extra_fields.as_mut());
if let InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } = ucom_field {
*crc32 = basic_crc32;
*unicode = u_comment;
}
}
}
}
let filename_basic = self.entry.filename().alternative().unwrap_or_else(|| self.entry.filename().as_bytes());
let comment_basic = self.entry.comment().alternative().unwrap_or_else(|| self.entry.comment().as_bytes());
let lf_header = LocalFileHeader {
compressed_size: lfh_compressed_size,
uncompressed_size: lfh_uncompressed_size,
compression: self.entry.compression().into(),
crc: crc32fast::hash(self.data),
extra_field_length: self
.entry
.extra_fields()
.count_bytes()
.try_into()
.map_err(|_| ZipError::ExtraFieldTooLarge)?,
file_name_length: filename_basic.len().try_into().map_err(|_| ZipError::FileNameTooLarge)?,
mod_time: self.entry.last_modification_date().time,
mod_date: self.entry.last_modification_date().date,
version: crate::spec::version::as_needed_to_extract(&self.entry),
flags: GeneralPurposeFlag {
data_descriptor: false,
encrypted: false,
filename_unicode: utf8_without_alternative,
},
};
let mut header = CentralDirectoryRecord {
v_made_by: crate::spec::version::as_made_by(),
v_needed: lf_header.version,
compressed_size: lf_header.compressed_size,
uncompressed_size: lf_header.uncompressed_size,
compression: lf_header.compression,
crc: lf_header.crc,
extra_field_length: lf_header.extra_field_length,
file_name_length: lf_header.file_name_length,
file_comment_length: comment_basic.len().try_into().map_err(|_| ZipError::CommentTooLarge)?,
mod_time: lf_header.mod_time,
mod_date: lf_header.mod_date,
flags: lf_header.flags,
disk_start: 0,
inter_attr: self.entry.internal_file_attribute(),
exter_attr: self.entry.external_file_attribute(),
lh_offset,
};
self.writer.writer.write_all(&crate::spec::consts::LFH_SIGNATURE.to_le_bytes()).await?;
self.writer.writer.write_all(&lf_header.as_slice()).await?;
self.writer.writer.write_all(filename_basic).await?;
self.writer.writer.write_all(&self.entry.extra_fields().as_bytes()).await?;
self.writer.writer.write_all(compressed_data).await?;
if let Some(builder) = zip64_extra_field_builder {
self.entry.extra_fields.push(ExtraField::Zip64ExtendedInformation(builder.build()?));
header.extra_field_length =
self.entry.extra_fields().count_bytes().try_into().map_err(|_| ZipError::ExtraFieldTooLarge)?;
}
self.writer.cd_entries.push(CentralDirectoryEntry { header, entry: self.entry });
// Ensure that we can fit this many files in this archive if forcing no zip64
if self.writer.cd_entries.len() > NON_ZIP64_MAX_NUM_FILES as usize {
if self.writer.force_no_zip64 {
return Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles));
}
if !self.writer.is_zip64 {
self.writer.is_zip64 = true;
}
}
Ok(())
}
}
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
async fn compress(compression: Compression, data: &[u8], level: async_compression::Level) -> Vec<u8> {
// TODO: Reduce reallocations of Vec by making a lower-bound estimate of the length reduction and
// pre-initialising the Vec to that length. Then truncate() to the actual number of bytes written.
match compression {
#[cfg(feature = "deflate")]
Compression::Deflate => {
let mut writer = write::DeflateEncoder::with_quality(Cursor::new(Vec::new()), level);
writer.write_all(data).await.unwrap();
writer.close().await.unwrap();
writer.into_inner().into_inner()
}
#[cfg(feature = "deflate64")]
Compression::Deflate64 => panic!("compressing deflate64 is not supported"),
#[cfg(feature = "bzip2")]
Compression::Bz => {
let mut writer = write::BzEncoder::with_quality(Cursor::new(Vec::new()), level);
writer.write_all(data).await.unwrap();
writer.close().await.unwrap();
writer.into_inner().into_inner()
}
#[cfg(feature = "lzma")]
Compression::Lzma => {
let mut writer = write::LzmaEncoder::with_quality(Cursor::new(Vec::new()), level);
writer.write_all(data).await.unwrap();
writer.close().await.unwrap();
writer.into_inner().into_inner()
}
#[cfg(feature = "xz")]
Compression::Xz => {
let mut writer = write::XzEncoder::with_quality(Cursor::new(Vec::new()), level);
writer.write_all(data).await.unwrap();
writer.close().await.unwrap();
writer.into_inner().into_inner()
}
#[cfg(feature = "zstd")]
Compression::Zstd => {
let mut writer = write::ZstdEncoder::with_quality(Cursor::new(Vec::new()), level);
writer.write_all(data).await.unwrap();
writer.close().await.unwrap();
writer.into_inner().into_inner()
}
_ => unreachable!(),
}
}

View file

@ -0,0 +1,4 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub(crate) mod offset;

View file

@ -0,0 +1,73 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use std::io::{Error, IoSlice};
use std::pin::Pin;
use std::task::{Context, Poll};
use futures_lite::io::AsyncWrite;
use pin_project::pin_project;
/// A wrapper around an [`AsyncWrite`] implementation which tracks the current byte offset.
#[pin_project(project = OffsetWriterProj)]
pub struct AsyncOffsetWriter<W> {
#[pin]
inner: W,
offset: u64,
}
impl<W> AsyncOffsetWriter<W>
where
W: AsyncWrite + Unpin,
{
/// Constructs a new wrapper from an inner [`AsyncWrite`] writer.
pub fn new(inner: W) -> Self {
Self { inner, offset: 0 }
}
/// Returns the current byte offset.
pub fn offset(&self) -> u64 {
self.offset
}
/// Consumes this wrapper and returns the inner [`AsyncWrite`] writer.
pub fn into_inner(self) -> W {
self.inner
}
pub fn inner_mut(&mut self) -> &mut W {
&mut self.inner
}
}
impl<W> AsyncWrite for AsyncOffsetWriter<W>
where
W: AsyncWrite + Unpin,
{
fn poll_write(self: Pin<&mut Self>, cx: &mut Context, buf: &[u8]) -> Poll<Result<usize, Error>> {
let this = self.project();
let poll = this.inner.poll_write(cx, buf);
if let Poll::Ready(Ok(inner)) = &poll {
*this.offset += *inner as u64;
}
poll
}
fn poll_flush(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Result<(), Error>> {
self.project().inner.poll_flush(cx)
}
fn poll_close(self: Pin<&mut Self>, cx: &mut Context) -> Poll<Result<(), Error>> {
self.project().inner.poll_close(cx)
}
fn poll_write_vectored(
self: Pin<&mut Self>,
cx: &mut Context<'_>,
bufs: &[IoSlice<'_>],
) -> Poll<Result<usize, Error>> {
self.project().inner.poll_write_vectored(cx, bufs)
}
}

View file

@ -0,0 +1,290 @@
// Copyright (c) 2021-2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A module which supports writing ZIP files.
//!
//! # Example
//! ### Whole data (u8 slice)
//! ```no_run
//! # #[cfg(feature = "deflate")]
//! # {
//! # use async_zip::{Compression, ZipEntryBuilder, base::write::ZipFileWriter};
//! # use async_zip::error::ZipError;
//! #
//! # async fn run() -> Result<(), ZipError> {
//! let mut writer = ZipFileWriter::new(Vec::<u8>::new());
//!
//! let data = b"This is an example file.";
//! let opts = ZipEntryBuilder::new(String::from("foo.txt").into(), Compression::Deflate);
//!
//! writer.write_entry_whole(opts, data).await?;
//! writer.close().await?;
//! # Ok(())
//! # }
//! # }
//! ```
//! ### Stream data (unknown size & data)
//! ```no_run
//! # #[cfg(feature = "deflate")]
//! # {
//! # use async_zip::{Compression, ZipEntryBuilder, base::write::ZipFileWriter};
//! # use std::io::Cursor;
//! # use async_zip::error::ZipError;
//! # use futures_lite::io::AsyncWriteExt;
//! # use tokio_util::compat::TokioAsyncWriteCompatExt;
//! #
//! # async fn run() -> Result<(), ZipError> {
//! let mut writer = ZipFileWriter::new(Vec::<u8>::new());
//!
//! let data = b"This is an example file.";
//! let opts = ZipEntryBuilder::new(String::from("bar.txt").into(), Compression::Deflate);
//!
//! let mut entry_writer = writer.write_entry_stream(opts).await?;
//! entry_writer.write_all(data).await.unwrap();
//!
//! entry_writer.close().await?;
//! writer.close().await?;
//! # Ok(())
//! # }
//! # }
//! ```
pub(crate) mod compressed_writer;
pub(crate) mod entry_stream;
pub(crate) mod entry_whole;
pub(crate) mod io;
pub use entry_stream::EntryStreamWriter;
#[cfg(feature = "tokio")]
use tokio_util::compat::{Compat, TokioAsyncWriteCompatExt};
use crate::entry::ZipEntry;
use crate::error::Result;
use crate::spec::extra_field::ExtraFieldAsBytes;
use crate::spec::header::{
CentralDirectoryRecord, EndOfCentralDirectoryHeader, ExtraField, InfoZipUnicodeCommentExtraField,
InfoZipUnicodePathExtraField, Zip64EndOfCentralDirectoryLocator, Zip64EndOfCentralDirectoryRecord,
};
#[cfg(feature = "tokio")]
use crate::tokio::write::ZipFileWriter as TokioZipFileWriter;
use entry_whole::EntryWholeWriter;
use io::offset::AsyncOffsetWriter;
use crate::spec::consts::{NON_ZIP64_MAX_NUM_FILES, NON_ZIP64_MAX_SIZE};
use futures_lite::io::{AsyncWrite, AsyncWriteExt};
pub(crate) struct CentralDirectoryEntry {
pub header: CentralDirectoryRecord,
pub entry: ZipEntry,
}
/// A ZIP file writer which acts over AsyncWrite implementers.
///
/// # Note
/// - [`ZipFileWriter::close()`] must be called before a stream writer goes out of scope.
pub struct ZipFileWriter<W> {
pub(crate) writer: AsyncOffsetWriter<W>,
pub(crate) cd_entries: Vec<CentralDirectoryEntry>,
/// If true, will error if a Zip64 struct must be written.
force_no_zip64: bool,
/// Whether to write Zip64 end of directory structs.
pub(crate) is_zip64: bool,
comment_opt: Option<String>,
}
impl<W: AsyncWrite + Unpin> ZipFileWriter<W> {
/// Construct a new ZIP file writer from a mutable reference to a writer.
pub fn new(writer: W) -> Self {
Self {
writer: AsyncOffsetWriter::new(writer),
cd_entries: Vec::new(),
comment_opt: None,
is_zip64: false,
force_no_zip64: false,
}
}
/// Force the ZIP writer to operate in non-ZIP64 mode.
/// If any files would need ZIP64, an error will be raised.
pub fn force_no_zip64(mut self) -> Self {
self.force_no_zip64 = true;
self
}
/// Force the ZIP writer to emit Zip64 structs at the end of the archive.
/// Zip64 extended fields will only be written if needed.
pub fn force_zip64(mut self) -> Self {
self.is_zip64 = true;
self
}
/// Write a new ZIP entry of known size and data.
pub async fn write_entry_whole<E: Into<ZipEntry>>(&mut self, entry: E, data: &[u8]) -> Result<()> {
EntryWholeWriter::from_raw(self, entry.into(), data).write().await
}
/// Write an entry of unknown size and data via streaming (ie. using a data descriptor).
/// The generated Local File Header will be invalid, with no compressed size, uncompressed size,
/// and a null CRC. This might cause problems with the destination reader.
pub async fn write_entry_stream<E: Into<ZipEntry>>(&mut self, entry: E) -> Result<EntryStreamWriter<'_, W>> {
EntryStreamWriter::from_raw(self, entry.into()).await
}
/// Set the ZIP file comment.
pub fn comment(&mut self, comment: String) {
self.comment_opt = Some(comment);
}
/// Returns a mutable reference to the inner writer.
///
/// Care should be taken when using this inner writer as doing so may invalidate internal state of this writer.
pub fn inner_mut(&mut self) -> &mut W {
self.writer.inner_mut()
}
/// Consumes this ZIP writer and completes all closing tasks.
///
/// This includes:
/// - Writing all central directory headers.
/// - Writing the end of central directory header.
/// - Writing the file comment.
///
/// Failure to call this function before going out of scope would result in a corrupted ZIP file.
pub async fn close(mut self) -> Result<W> {
let cd_offset = self.writer.offset();
for entry in &self.cd_entries {
let filename_basic =
entry.entry.filename().alternative().unwrap_or_else(|| entry.entry.filename().as_bytes());
let comment_basic = entry.entry.comment().alternative().unwrap_or_else(|| entry.entry.comment().as_bytes());
self.writer.write_all(&crate::spec::consts::CDH_SIGNATURE.to_le_bytes()).await?;
self.writer.write_all(&entry.header.as_slice()).await?;
self.writer.write_all(filename_basic).await?;
self.writer.write_all(&entry.entry.extra_fields().as_bytes()).await?;
self.writer.write_all(comment_basic).await?;
}
let central_directory_size = self.writer.offset() - cd_offset;
let central_directory_size_u32 = if central_directory_size > NON_ZIP64_MAX_SIZE as u64 {
NON_ZIP64_MAX_SIZE
} else {
central_directory_size as u32
};
let num_entries_in_directory = self.cd_entries.len() as u64;
let num_entries_in_directory_u16 = if num_entries_in_directory > NON_ZIP64_MAX_NUM_FILES as u64 {
NON_ZIP64_MAX_NUM_FILES
} else {
num_entries_in_directory as u16
};
let cd_offset_u32 = if cd_offset > NON_ZIP64_MAX_SIZE as u64 {
if self.force_no_zip64 {
return Err(crate::error::ZipError::Zip64Needed(crate::error::Zip64ErrorCase::LargeFile));
} else {
self.is_zip64 = true;
}
NON_ZIP64_MAX_SIZE
} else {
cd_offset as u32
};
// Add the zip64 EOCDR and EOCDL if we are in zip64 mode.
if self.is_zip64 {
let eocdr_offset = self.writer.offset();
let eocdr = Zip64EndOfCentralDirectoryRecord {
size_of_zip64_end_of_cd_record: 44,
version_made_by: crate::spec::version::as_made_by(),
version_needed_to_extract: 46,
disk_number: 0,
disk_number_start_of_cd: 0,
num_entries_in_directory_on_disk: num_entries_in_directory,
num_entries_in_directory,
directory_size: central_directory_size,
offset_of_start_of_directory: cd_offset,
};
self.writer.write_all(&crate::spec::consts::ZIP64_EOCDR_SIGNATURE.to_le_bytes()).await?;
self.writer.write_all(&eocdr.as_bytes()).await?;
let eocdl = Zip64EndOfCentralDirectoryLocator {
number_of_disk_with_start_of_zip64_end_of_central_directory: 0,
relative_offset: eocdr_offset,
total_number_of_disks: 1,
};
self.writer.write_all(&crate::spec::consts::ZIP64_EOCDL_SIGNATURE.to_le_bytes()).await?;
self.writer.write_all(&eocdl.as_bytes()).await?;
}
let header = EndOfCentralDirectoryHeader {
disk_num: 0,
start_cent_dir_disk: 0,
num_of_entries_disk: num_entries_in_directory_u16,
num_of_entries: num_entries_in_directory_u16,
size_cent_dir: central_directory_size_u32,
cent_dir_offset: cd_offset_u32,
file_comm_length: self.comment_opt.as_ref().map(|v| v.len() as u16).unwrap_or_default(),
};
self.writer.write_all(&crate::spec::consts::EOCDR_SIGNATURE.to_le_bytes()).await?;
self.writer.write_all(&header.as_slice()).await?;
if let Some(comment) = self.comment_opt {
self.writer.write_all(comment.as_bytes()).await?;
}
Ok(self.writer.into_inner())
}
}
#[cfg(feature = "tokio")]
impl<W> ZipFileWriter<Compat<W>>
where
W: tokio::io::AsyncWrite + Unpin,
{
/// Construct a new ZIP file writer from a mutable reference to a writer.
pub fn with_tokio(writer: W) -> TokioZipFileWriter<W> {
Self {
writer: AsyncOffsetWriter::new(writer.compat_write()),
cd_entries: Vec::new(),
comment_opt: None,
is_zip64: false,
force_no_zip64: false,
}
}
}
pub(crate) fn get_or_put_info_zip_unicode_path_extra_field_mut(
extra_fields: &mut Vec<ExtraField>,
) -> &mut InfoZipUnicodePathExtraField {
if !extra_fields.iter().any(|field| matches!(field, ExtraField::InfoZipUnicodePath(_))) {
extra_fields
.push(ExtraField::InfoZipUnicodePath(InfoZipUnicodePathExtraField::V1 { crc32: 0, unicode: vec![] }));
}
for field in extra_fields.iter_mut() {
if let ExtraField::InfoZipUnicodePath(extra_field) = field {
return extra_field;
}
}
panic!("InfoZipUnicodePathExtraField not found after insertion")
}
pub(crate) fn get_or_put_info_zip_unicode_comment_extra_field_mut(
extra_fields: &mut Vec<ExtraField>,
) -> &mut InfoZipUnicodeCommentExtraField {
if !extra_fields.iter().any(|field| matches!(field, ExtraField::InfoZipUnicodeComment(_))) {
extra_fields
.push(ExtraField::InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField::V1 { crc32: 0, unicode: vec![] }));
}
for field in extra_fields.iter_mut() {
if let ExtraField::InfoZipUnicodeComment(extra_field) = field {
return extra_field;
}
}
panic!("InfoZipUnicodeCommentExtraField not found after insertion")
}

View file

@ -0,0 +1,83 @@
// Copyright (c) 2024 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::ZipDateTime;
/// A builder for [`ZipDateTime`].
pub struct ZipDateTimeBuilder(pub(crate) ZipDateTime);
impl From<ZipDateTime> for ZipDateTimeBuilder {
fn from(date: ZipDateTime) -> Self {
Self(date)
}
}
impl Default for ZipDateTimeBuilder {
fn default() -> Self {
Self::new()
}
}
impl ZipDateTimeBuilder {
/// Constructs a new builder which defines the raw underlying data of a ZIP entry.
pub fn new() -> Self {
Self(ZipDateTime { date: 0, time: 0 })
}
/// Sets the date and time's year.
pub fn year(mut self, year: i32) -> Self {
let year: u16 = (((year - 1980) << 9) & 0xFE00).try_into().unwrap();
self.0.date |= year;
self
}
/// Sets the date and time's month.
pub fn month(mut self, month: u32) -> Self {
let month: u16 = ((month << 5) & 0x1E0).try_into().unwrap();
self.0.date |= month;
self
}
/// Sets the date and time's day.
pub fn day(mut self, day: u32) -> Self {
let day: u16 = (day & 0x1F).try_into().unwrap();
self.0.date |= day;
self
}
/// Sets the date and time's hour.
pub fn hour(mut self, hour: u32) -> Self {
let hour: u16 = ((hour << 11) & 0xF800).try_into().unwrap();
self.0.time |= hour;
self
}
/// Sets the date and time's minute.
pub fn minute(mut self, minute: u32) -> Self {
let minute: u16 = ((minute << 5) & 0x7E0).try_into().unwrap();
self.0.time |= minute;
self
}
/// Sets the date and time's second.
///
/// Note that MS-DOS has a maximum granularity of two seconds.
pub fn second(mut self, second: u32) -> Self {
let second: u16 = ((second >> 1) & 0x1F).try_into().unwrap();
self.0.time |= second;
self
}
/// Consumes this builder and returns a final [`ZipDateTime`].
///
/// This is equivalent to:
/// ```
/// # use async_zip::{ZipDateTime, ZipDateTimeBuilder, Compression};
/// #
/// # let builder = ZipDateTimeBuilder::new().year(2024).month(3).day(2);
/// let date: ZipDateTime = builder.into();
/// ```
pub fn build(self) -> ZipDateTime {
self.into()
}
}

View file

@ -0,0 +1,112 @@
// Copyright (c) 2021-2024 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub mod builder;
#[cfg(feature = "chrono")]
use chrono::{DateTime, Datelike, LocalResult, TimeZone, Timelike, Utc};
use self::builder::ZipDateTimeBuilder;
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#446
// https://learn.microsoft.com/en-us/windows/win32/api/oleauto/nf-oleauto-dosdatetimetovarianttime
/// A date and time stored as per the MS-DOS representation used by ZIP files.
#[derive(Debug, Default, PartialEq, Eq, Clone, Copy, Hash)]
pub struct ZipDateTime {
pub(crate) date: u16,
pub(crate) time: u16,
}
impl ZipDateTime {
/// Returns the year of this date & time.
pub fn year(&self) -> i32 {
(((self.date & 0xFE00) >> 9) + 1980).into()
}
/// Returns the month of this date & time.
pub fn month(&self) -> u32 {
((self.date & 0x1E0) >> 5).into()
}
/// Returns the day of this date & time.
pub fn day(&self) -> u32 {
(self.date & 0x1F).into()
}
/// Returns the hour of this date & time.
pub fn hour(&self) -> u32 {
((self.time & 0xF800) >> 11).into()
}
/// Returns the minute of this date & time.
pub fn minute(&self) -> u32 {
((self.time & 0x7E0) >> 5).into()
}
/// Returns the second of this date & time.
///
/// Note that MS-DOS has a maximum granularity of two seconds.
pub fn second(&self) -> u32 {
((self.time & 0x1F) << 1).into()
}
/// Constructs chrono's [`DateTime`] representation of this date & time.
///
/// Note that this requires the `chrono` feature.
#[cfg(feature = "chrono")]
pub fn as_chrono(&self) -> LocalResult<DateTime<Utc>> {
self.into()
}
/// Constructs this date & time from chrono's [`DateTime`] representation.
///
/// Note that this requires the `chrono` feature.
#[cfg(feature = "chrono")]
pub fn from_chrono(dt: &DateTime<Utc>) -> Self {
dt.into()
}
}
impl From<ZipDateTimeBuilder> for ZipDateTime {
fn from(builder: ZipDateTimeBuilder) -> Self {
builder.0
}
}
#[cfg(feature = "chrono")]
impl From<&DateTime<Utc>> for ZipDateTime {
fn from(value: &DateTime<Utc>) -> Self {
let mut builder = ZipDateTimeBuilder::new();
builder = builder.year(value.date_naive().year());
builder = builder.month(value.date_naive().month());
builder = builder.day(value.date_naive().day());
builder = builder.hour(value.time().hour());
builder = builder.minute(value.time().minute());
builder = builder.second(value.time().second());
builder.build()
}
}
#[cfg(feature = "chrono")]
impl From<&ZipDateTime> for LocalResult<DateTime<Utc>> {
fn from(value: &ZipDateTime) -> Self {
Utc.with_ymd_and_hms(value.year(), value.month(), value.day(), value.hour(), value.minute(), value.second())
}
}
#[cfg(feature = "chrono")]
impl From<DateTime<Utc>> for ZipDateTime {
fn from(value: DateTime<Utc>) -> Self {
(&value).into()
}
}
#[cfg(feature = "chrono")]
impl From<ZipDateTime> for LocalResult<DateTime<Utc>> {
fn from(value: ZipDateTime) -> Self {
(&value).into()
}
}

View file

@ -0,0 +1,113 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::entry::ZipEntry;
use crate::spec::{attribute::AttributeCompatibility, header::ExtraField, Compression};
use crate::{date::ZipDateTime, string::ZipString};
/// A builder for [`ZipEntry`].
pub struct ZipEntryBuilder(pub(crate) ZipEntry);
impl From<ZipEntry> for ZipEntryBuilder {
fn from(entry: ZipEntry) -> Self {
Self(entry)
}
}
impl ZipEntryBuilder {
/// Constructs a new builder which defines the raw underlying data of a ZIP entry.
///
/// A filename and compression method are needed to construct the builder as minimal parameters.
pub fn new(filename: ZipString, compression: Compression) -> Self {
Self(ZipEntry::new(filename, compression))
}
/// Sets the entry's filename.
pub fn filename(mut self, filename: ZipString) -> Self {
self.0.filename = filename;
self
}
/// Sets the entry's compression method.
pub fn compression(mut self, compression: Compression) -> Self {
self.0.compression = compression;
self
}
/// Set a size hint for the file, to be written into the local file header.
/// Unlikely to be useful except for the case of streaming files to be Store'd.
/// This size hint does not affect the central directory, nor does it affect whole files.
pub fn size<N: Into<u64>, M: Into<u64>>(mut self, compressed_size: N, uncompressed_size: M) -> Self {
self.0.compressed_size = compressed_size.into();
self.0.uncompressed_size = uncompressed_size.into();
self
}
/// Set the deflate compression option.
///
/// If the compression type isn't deflate, this option has no effect.
#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))]
pub fn deflate_option(mut self, option: crate::DeflateOption) -> Self {
self.0.compression_level = option.into_level();
self
}
/// Sets the entry's attribute host compatibility.
pub fn attribute_compatibility(mut self, compatibility: AttributeCompatibility) -> Self {
self.0.attribute_compatibility = compatibility;
self
}
/// Sets the entry's last modification date.
pub fn last_modification_date(mut self, date: ZipDateTime) -> Self {
self.0.last_modification_date = date;
self
}
/// Sets the entry's internal file attribute.
pub fn internal_file_attribute(mut self, attribute: u16) -> Self {
self.0.internal_file_attribute = attribute;
self
}
/// Sets the entry's external file attribute.
pub fn external_file_attribute(mut self, attribute: u32) -> Self {
self.0.external_file_attribute = attribute;
self
}
/// Sets the entry's extra field data.
pub fn extra_fields(mut self, field: Vec<ExtraField>) -> Self {
self.0.extra_fields = field;
self
}
/// Sets the entry's file comment.
pub fn comment(mut self, comment: ZipString) -> Self {
self.0.comment = comment;
self
}
/// Sets the entry's Unix permissions mode.
///
/// If the attribute host compatibility isn't set to Unix, this will have no effect.
pub fn unix_permissions(mut self, mode: u16) -> Self {
if matches!(self.0.attribute_compatibility, AttributeCompatibility::Unix) {
self.0.external_file_attribute = (self.0.external_file_attribute & 0xFFFF) | (mode as u32) << 16;
}
self
}
/// Consumes this builder and returns a final [`ZipEntry`].
///
/// This is equivalent to:
/// ```
/// # use async_zip::{ZipEntry, ZipEntryBuilder, Compression};
/// #
/// # let builder = ZipEntryBuilder::new(String::from("foo.bar").into(), Compression::Stored);
/// let entry: ZipEntry = builder.into();
/// ```
pub fn build(self) -> ZipEntry {
self.into()
}
}

View file

@ -0,0 +1,219 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub mod builder;
use std::ops::Deref;
use futures_lite::io::{AsyncRead, AsyncReadExt, AsyncSeek, AsyncSeekExt, SeekFrom};
use crate::entry::builder::ZipEntryBuilder;
use crate::error::{Result, ZipError};
use crate::spec::{
attribute::AttributeCompatibility,
consts::LFH_SIGNATURE,
header::{ExtraField, LocalFileHeader},
Compression,
};
use crate::{string::ZipString, ZipDateTime};
/// An immutable store of data about a ZIP entry.
///
/// This type cannot be directly constructed so instead, the [`ZipEntryBuilder`] must be used. Internally this builder
/// stores a [`ZipEntry`] so conversions between these two types via the [`From`] implementations will be
/// non-allocating.
#[derive(Clone, Debug)]
pub struct ZipEntry {
pub(crate) filename: ZipString,
pub(crate) compression: Compression,
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
pub(crate) compression_level: async_compression::Level,
pub(crate) crc32: u32,
pub(crate) uncompressed_size: u64,
pub(crate) compressed_size: u64,
pub(crate) attribute_compatibility: AttributeCompatibility,
pub(crate) last_modification_date: ZipDateTime,
pub(crate) internal_file_attribute: u16,
pub(crate) external_file_attribute: u32,
pub(crate) extra_fields: Vec<ExtraField>,
pub(crate) comment: ZipString,
pub(crate) data_descriptor: bool,
}
impl From<ZipEntryBuilder> for ZipEntry {
fn from(builder: ZipEntryBuilder) -> Self {
builder.0
}
}
impl ZipEntry {
pub(crate) fn new(filename: ZipString, compression: Compression) -> Self {
ZipEntry {
filename,
compression,
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
compression_level: async_compression::Level::Default,
crc32: 0,
uncompressed_size: 0,
compressed_size: 0,
attribute_compatibility: AttributeCompatibility::Unix,
last_modification_date: ZipDateTime::default(),
internal_file_attribute: 0,
external_file_attribute: 0,
extra_fields: Vec::new(),
comment: String::new().into(),
data_descriptor: false,
}
}
/// Returns the entry's filename.
///
/// ## Note
/// This will return the raw filename stored during ZIP creation. If calling this method on entries retrieved from
/// untrusted ZIP files, the filename should be sanitised before being used as a path to prevent [directory
/// traversal attacks](https://en.wikipedia.org/wiki/Directory_traversal_attack).
pub fn filename(&self) -> &ZipString {
&self.filename
}
/// Returns the entry's compression method.
pub fn compression(&self) -> Compression {
self.compression
}
/// Returns the entry's CRC32 value.
pub fn crc32(&self) -> u32 {
self.crc32
}
/// Returns the entry's uncompressed size.
pub fn uncompressed_size(&self) -> u64 {
self.uncompressed_size
}
/// Returns the entry's compressed size.
pub fn compressed_size(&self) -> u64 {
self.compressed_size
}
/// Returns the entry's attribute's host compatibility.
pub fn attribute_compatibility(&self) -> AttributeCompatibility {
self.attribute_compatibility
}
/// Returns the entry's last modification time & date.
pub fn last_modification_date(&self) -> &ZipDateTime {
&self.last_modification_date
}
/// Returns the entry's internal file attribute.
pub fn internal_file_attribute(&self) -> u16 {
self.internal_file_attribute
}
/// Returns the entry's external file attribute
pub fn external_file_attribute(&self) -> u32 {
self.external_file_attribute
}
/// Returns the entry's extra field data.
pub fn extra_fields(&self) -> &[ExtraField] {
&self.extra_fields
}
/// Returns the entry's file comment.
pub fn comment(&self) -> &ZipString {
&self.comment
}
/// Returns the entry's integer-based UNIX permissions.
///
/// # Note
/// This will return None if the attribute host compatibility is not listed as Unix.
pub fn unix_permissions(&self) -> Option<u16> {
if !matches!(self.attribute_compatibility, AttributeCompatibility::Unix) {
return None;
}
Some(((self.external_file_attribute) >> 16) as u16)
}
/// Returns whether or not the entry represents a directory.
pub fn dir(&self) -> Result<bool> {
Ok(self.filename.as_str()?.ends_with('/'))
}
}
/// An immutable store of data about how a ZIP entry is stored within a specific archive.
///
/// Besides storing archive independent information like the size and timestamp it can also be used to query
/// information about how the entry is stored in an archive.
#[derive(Clone)]
pub struct StoredZipEntry {
pub(crate) entry: ZipEntry,
// pub(crate) general_purpose_flag: GeneralPurposeFlag,
pub(crate) file_offset: u64,
pub(crate) header_size: u64,
}
impl StoredZipEntry {
/// Returns the offset in bytes to where the header of the entry starts.
pub fn header_offset(&self) -> u64 {
self.file_offset
}
/// Returns the combined size in bytes of the header, the filename, and any extra fields.
///
/// Note: This uses the extra field length stored in the central directory, which may differ from that stored in
/// the local file header. See specification: <https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#732>
pub fn header_size(&self) -> u64 {
self.header_size
}
/// Seek to the offset in bytes where the data of the entry starts.
pub(crate) async fn seek_to_data_offset<R: AsyncRead + AsyncSeek + Unpin>(&self, mut reader: &mut R) -> Result<()> {
// Seek to the header
reader.seek(SeekFrom::Start(self.file_offset)).await?;
// Check the signature
let signature = {
let mut buffer = [0; 4];
reader.read_exact(&mut buffer).await?;
u32::from_le_bytes(buffer)
};
match signature {
LFH_SIGNATURE => (),
actual => return Err(ZipError::UnexpectedHeaderError(actual, LFH_SIGNATURE)),
};
// Skip the local file header and trailing data
let header = LocalFileHeader::from_reader(&mut reader).await?;
let trailing_size = (header.file_name_length as i64) + (header.extra_field_length as i64);
reader.seek(SeekFrom::Current(trailing_size)).await?;
Ok(())
}
}
impl Deref for StoredZipEntry {
type Target = ZipEntry;
fn deref(&self) -> &Self::Target {
&self.entry
}
}

View file

@ -0,0 +1,72 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A module which holds relevant error reporting structures/types.
use std::fmt::{Display, Formatter};
use thiserror::Error;
/// A Result type alias over ZipError to minimise repetition.
pub type Result<V> = std::result::Result<V, ZipError>;
#[derive(Debug, PartialEq, Eq)]
pub enum Zip64ErrorCase {
TooManyFiles,
LargeFile,
}
impl Display for Zip64ErrorCase {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self {
Self::TooManyFiles => write!(f, "More than 65536 files in archive"),
Self::LargeFile => write!(f, "File is larger than 4 GiB"),
}
}
}
/// An enum of possible errors and their descriptions.
#[non_exhaustive]
#[derive(Debug, Error)]
pub enum ZipError {
#[error("feature not supported: '{0}'")]
FeatureNotSupported(&'static str),
#[error("compression not supported: {0}")]
CompressionNotSupported(u16),
#[error("host attribute compatibility not supported: {0}")]
AttributeCompatibilityNotSupported(u16),
#[error("attempted to read a ZIP64 file whilst on a 32-bit target")]
TargetZip64NotSupported,
#[error("attempted to write a ZIP file with force_no_zip64 when ZIP64 is needed: {0}")]
Zip64Needed(Zip64ErrorCase),
#[error("end of file has not been reached")]
EOFNotReached,
#[error("extra fields exceeded maximum size")]
ExtraFieldTooLarge,
#[error("comment exceeded maximum size")]
CommentTooLarge,
#[error("filename exceeded maximum size")]
FileNameTooLarge,
#[error("attempted to convert non-UTF8 bytes to a string/str")]
StringNotUtf8,
#[error("unable to locate the end of central directory record")]
UnableToLocateEOCDR,
#[error("extra field size was indicated to be {0} but only {1} bytes remain")]
InvalidExtraFieldHeader(u16, usize),
#[error("zip64 extended information field was incomplete")]
Zip64ExtendedFieldIncomplete,
#[error("an upstream reader returned an error: {0}")]
UpstreamReadError(#[from] std::io::Error),
#[error("a computed CRC32 value did not match the expected value")]
CRC32CheckError,
#[error("entry index was out of bounds")]
EntryIndexOutOfBounds,
#[error("Encountered an unexpected header (actual: {0:#x}, expected: {1:#x}).")]
UnexpectedHeaderError(u32, u32),
#[error("Info-ZIP Unicode Comment Extra Field was incomplete")]
InfoZipUnicodeCommentFieldIncomplete,
#[error("Info-ZIP Unicode Path Extra Field was incomplete")]
InfoZipUnicodePathFieldIncomplete,
}

View file

@ -0,0 +1,44 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::{file::ZipFile, string::ZipString};
/// A builder for [`ZipFile`].
pub struct ZipFileBuilder(pub(crate) ZipFile);
impl From<ZipFile> for ZipFileBuilder {
fn from(file: ZipFile) -> Self {
Self(file)
}
}
impl Default for ZipFileBuilder {
fn default() -> Self {
ZipFileBuilder(ZipFile { entries: Vec::new(), zip64: false, comment: String::new().into() })
}
}
impl ZipFileBuilder {
pub fn new() -> Self {
Self::default()
}
/// Sets the file's comment.
pub fn comment(mut self, comment: ZipString) -> Self {
self.0.comment = comment;
self
}
/// Consumes this builder and returns a final [`ZipFile`].
///
/// This is equivalent to:
/// ```
/// # use async_zip::{ZipFile, ZipFileBuilder};
/// #
/// # let builder = ZipFileBuilder::new();
/// let file: ZipFile = builder.into();
/// ```
pub fn build(self) -> ZipFile {
self.into()
}
}

View file

@ -0,0 +1,38 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub(crate) mod builder;
use crate::{entry::StoredZipEntry, string::ZipString};
use builder::ZipFileBuilder;
/// An immutable store of data about a ZIP file.
#[derive(Clone)]
pub struct ZipFile {
pub(crate) entries: Vec<StoredZipEntry>,
pub(crate) zip64: bool,
pub(crate) comment: ZipString,
}
impl From<ZipFileBuilder> for ZipFile {
fn from(builder: ZipFileBuilder) -> Self {
builder.0
}
}
impl ZipFile {
/// Returns a list of this ZIP file's entries.
pub fn entries(&self) -> &[StoredZipEntry] {
&self.entries
}
/// Returns this ZIP file's trailing comment.
pub fn comment(&self) -> &ZipString {
&self.comment
}
/// Returns whether or not this ZIP file is zip64
pub fn zip64(&self) -> bool {
self.zip64
}
}

View file

@ -0,0 +1,62 @@
// Copyright (c) 2021-2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
// Document all features on docs.rs
#![cfg_attr(docsrs, feature(doc_cfg))]
//! An asynchronous ZIP archive reading/writing crate.
//!
//! ## Features
//! - A base implementation atop `futures`'s IO traits.
//! - An extended implementation atop `tokio`'s IO traits.
//! - Support for Stored, Deflate, bzip2, LZMA, zstd, and xz compression methods.
//! - Various different reading approaches (seek, stream, filesystem, in-memory buffer).
//! - Support for writing complete data (u8 slices) or stream writing using data descriptors.
//! - Initial support for ZIP64 reading and writing.
//! - Aims for reasonable [specification](https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md) compliance.
//!
//! ## Installation
//!
//! ```toml
//! [dependencies]
//! async_zip = { version = "0.0.17", features = ["full"] }
//! ```
//!
//! ### Feature Flags
//! - `full` - Enables all below features.
//! - `full-wasm` - Enables all below features that are compatible with WASM.
//! - `chrono` - Enables support for parsing dates via `chrono`.
//! - `tokio` - Enables support for the `tokio` implementation module.
//! - `tokio-fs` - Enables support for the `tokio::fs` reading module.
//! - `deflate` - Enables support for the Deflate compression method.
//! - `bzip2` - Enables support for the bzip2 compression method.
//! - `lzma` - Enables support for the LZMA compression method.
//! - `zstd` - Enables support for the zstd compression method.
//! - `xz` - Enables support for the xz compression method.
//!
//! [Read more.](https://github.com/Majored/rs-async-zip)
pub mod base;
pub mod error;
#[cfg(feature = "tokio")]
pub mod tokio;
pub(crate) mod date;
pub(crate) mod entry;
pub(crate) mod file;
pub(crate) mod spec;
pub(crate) mod string;
pub(crate) mod utils;
#[cfg(test)]
pub(crate) mod tests;
pub use crate::spec::attribute::AttributeCompatibility;
pub use crate::spec::compression::{Compression, DeflateOption};
pub use crate::date::{builder::ZipDateTimeBuilder, ZipDateTime};
pub use crate::entry::{builder::ZipEntryBuilder, StoredZipEntry, ZipEntry};
pub use crate::file::{builder::ZipFileBuilder, ZipFile};
pub use crate::string::{StringEncoding, ZipString};

View file

@ -0,0 +1,41 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::error::{Result, ZipError};
/// An attribute host compatibility supported by this crate.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum AttributeCompatibility {
Unix,
}
impl TryFrom<u16> for AttributeCompatibility {
type Error = ZipError;
// Convert a u16 stored with little endianness into a supported attribute host compatibility.
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4422
fn try_from(value: u16) -> Result<Self> {
match value {
3 => Ok(AttributeCompatibility::Unix),
_ => Err(ZipError::AttributeCompatibilityNotSupported(value)),
}
}
}
impl From<&AttributeCompatibility> for u16 {
// Convert a supported attribute host compatibility into its relevant u16 stored with little endianness.
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4422
fn from(compatibility: &AttributeCompatibility) -> Self {
match compatibility {
AttributeCompatibility::Unix => 3,
}
}
}
impl From<AttributeCompatibility> for u16 {
// Convert a supported attribute host compatibility into its relevant u16 stored with little endianness.
fn from(compatibility: AttributeCompatibility) -> Self {
(&compatibility).into()
}
}

View file

@ -0,0 +1,111 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::error::{Result, ZipError};
#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))]
use async_compression::Level;
/// A compression method supported by this crate.
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Compression {
Stored,
#[cfg(feature = "deflate")]
Deflate,
#[cfg(feature = "deflate64")]
Deflate64,
#[cfg(feature = "bzip2")]
Bz,
#[cfg(feature = "lzma")]
Lzma,
#[cfg(feature = "zstd")]
Zstd,
#[cfg(feature = "xz")]
Xz,
}
impl TryFrom<u16> for Compression {
type Error = ZipError;
// Convert a u16 stored with little endianness into a supported compression method.
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#445
fn try_from(value: u16) -> Result<Self> {
match value {
0 => Ok(Compression::Stored),
#[cfg(feature = "deflate")]
8 => Ok(Compression::Deflate),
#[cfg(feature = "deflate64")]
9 => Ok(Compression::Deflate64),
#[cfg(feature = "bzip2")]
12 => Ok(Compression::Bz),
#[cfg(feature = "lzma")]
14 => Ok(Compression::Lzma),
#[cfg(feature = "zstd")]
93 => Ok(Compression::Zstd),
#[cfg(feature = "xz")]
95 => Ok(Compression::Xz),
_ => Err(ZipError::CompressionNotSupported(value)),
}
}
}
impl From<&Compression> for u16 {
// Convert a supported compression method into its relevant u16 stored with little endianness.
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#445
fn from(compression: &Compression) -> u16 {
match compression {
Compression::Stored => 0,
#[cfg(feature = "deflate")]
Compression::Deflate => 8,
#[cfg(feature = "deflate64")]
Compression::Deflate64 => 9,
#[cfg(feature = "bzip2")]
Compression::Bz => 12,
#[cfg(feature = "lzma")]
Compression::Lzma => 14,
#[cfg(feature = "zstd")]
Compression::Zstd => 93,
#[cfg(feature = "xz")]
Compression::Xz => 95,
}
}
}
impl From<Compression> for u16 {
fn from(compression: Compression) -> u16 {
(&compression).into()
}
}
/// Level of compression data should be compressed with for deflate.
#[derive(Debug, Clone, Copy)]
pub enum DeflateOption {
// Normal (-en) compression option was used.
Normal,
// Maximum (-exx/-ex) compression option was used.
Maximum,
// Fast (-ef) compression option was used.
Fast,
// Super Fast (-es) compression option was used.
Super,
/// Other implementation defined level.
Other(i32),
}
#[cfg(any(feature = "deflate", feature = "bzip2", feature = "zstd", feature = "lzma", feature = "xz"))]
impl DeflateOption {
pub(crate) fn into_level(self) -> Level {
// FIXME: There's no clear documentation on what these specific levels defined in the ZIP specification relate
// to. We want to be compatible with any other library, and not specific to `async_compression`'s levels.
if let Self::Other(l) = self {
Level::Precise(l)
} else {
Level::Default
}
}
}

View file

@ -0,0 +1,44 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub const SIGNATURE_LENGTH: usize = 4;
// Local file header constants
//
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#437
pub const LFH_SIGNATURE: u32 = 0x4034b50;
#[allow(dead_code)]
pub const LFH_LENGTH: usize = 26;
// Central directory header constants
//
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4312
pub const CDH_SIGNATURE: u32 = 0x2014b50;
#[allow(dead_code)]
pub const CDH_LENGTH: usize = 42;
// End of central directory record constants
//
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4316
pub const EOCDR_SIGNATURE: u32 = 0x6054b50;
/// The minimum length of the EOCDR, excluding the signature.
pub const EOCDR_LENGTH: usize = 18;
/// The signature for the zip64 end of central directory record.
/// Ref: https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4314
pub const ZIP64_EOCDR_SIGNATURE: u32 = 0x06064b50;
/// The signature for the zip64 end of central directory locator.
/// Ref: https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4315
pub const ZIP64_EOCDL_SIGNATURE: u32 = 0x07064b50;
/// The length of the ZIP64 EOCDL, including the signature.
/// The EOCDL has a fixed size, thankfully.
pub const ZIP64_EOCDL_LENGTH: u64 = 20;
/// The contents of a header field when one must reference the zip64 version instead.
pub const NON_ZIP64_MAX_SIZE: u32 = 0xFFFFFFFF;
/// The maximum number of files or disks in a ZIP file before it requires ZIP64.
pub const NON_ZIP64_MAX_NUM_FILES: u16 = 0xFFFF;
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#439
pub const DATA_DESCRIPTOR_SIGNATURE: u32 = 0x8074b50;
pub const DATA_DESCRIPTOR_LENGTH: usize = 12;

View file

@ -0,0 +1,320 @@
// Copyright Cognite AS, 2023
use crate::error::{Result as ZipResult, ZipError};
use crate::spec::header::{
ExtraField, HeaderId, InfoZipUnicodeCommentExtraField, InfoZipUnicodePathExtraField, UnknownExtraField,
Zip64ExtendedInformationExtraField,
};
use super::consts::NON_ZIP64_MAX_SIZE;
pub(crate) trait ExtraFieldAsBytes {
fn as_bytes(&self) -> Vec<u8>;
fn count_bytes(&self) -> usize;
}
impl ExtraFieldAsBytes for &[ExtraField] {
fn as_bytes(&self) -> Vec<u8> {
let mut buffer = Vec::new();
for field in self.iter() {
buffer.append(&mut field.as_bytes());
}
buffer
}
fn count_bytes(&self) -> usize {
self.iter().map(|field| field.count_bytes()).sum()
}
}
impl ExtraFieldAsBytes for ExtraField {
fn as_bytes(&self) -> Vec<u8> {
match self {
ExtraField::Zip64ExtendedInformation(field) => field.as_bytes(),
ExtraField::InfoZipUnicodeComment(field) => field.as_bytes(),
ExtraField::InfoZipUnicodePath(field) => field.as_bytes(),
ExtraField::Unknown(field) => field.as_bytes(),
}
}
fn count_bytes(&self) -> usize {
match self {
ExtraField::Zip64ExtendedInformation(field) => field.count_bytes(),
ExtraField::InfoZipUnicodeComment(field) => field.count_bytes(),
ExtraField::InfoZipUnicodePath(field) => field.count_bytes(),
ExtraField::Unknown(field) => field.count_bytes(),
}
}
}
impl ExtraFieldAsBytes for UnknownExtraField {
fn as_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
let header_id: u16 = self.header_id.into();
bytes.append(&mut header_id.to_le_bytes().to_vec());
bytes.append(&mut self.data_size.to_le_bytes().to_vec());
bytes.append(&mut self.content.clone());
bytes
}
fn count_bytes(&self) -> usize {
4 + self.content.len()
}
}
impl ExtraFieldAsBytes for Zip64ExtendedInformationExtraField {
fn as_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
let header_id: u16 = self.header_id.into();
bytes.append(&mut header_id.to_le_bytes().to_vec());
bytes.append(&mut (self.content_size() as u16).to_le_bytes().to_vec());
if let Some(uncompressed_size) = &self.uncompressed_size {
bytes.append(&mut uncompressed_size.to_le_bytes().to_vec());
}
if let Some(compressed_size) = &self.compressed_size {
bytes.append(&mut compressed_size.to_le_bytes().to_vec());
}
if let Some(relative_header_offset) = &self.relative_header_offset {
bytes.append(&mut relative_header_offset.to_le_bytes().to_vec());
}
if let Some(disk_start_number) = &self.disk_start_number {
bytes.append(&mut disk_start_number.to_le_bytes().to_vec());
}
bytes
}
fn count_bytes(&self) -> usize {
4 + self.content_size()
}
}
impl ExtraFieldAsBytes for InfoZipUnicodeCommentExtraField {
fn as_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
let header_id: u16 = HeaderId::INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD.into();
bytes.append(&mut header_id.to_le_bytes().to_vec());
match self {
InfoZipUnicodeCommentExtraField::V1 { crc32, unicode } => {
let data_size: u16 = (5 + unicode.len()).try_into().unwrap();
bytes.append(&mut data_size.to_le_bytes().to_vec());
bytes.push(1);
bytes.append(&mut crc32.to_le_bytes().to_vec());
bytes.append(&mut unicode.clone());
}
InfoZipUnicodeCommentExtraField::Unknown { version, data } => {
let data_size: u16 = (1 + data.len()).try_into().unwrap();
bytes.append(&mut data_size.to_le_bytes().to_vec());
bytes.push(*version);
bytes.append(&mut data.clone());
}
}
bytes
}
fn count_bytes(&self) -> usize {
match self {
InfoZipUnicodeCommentExtraField::V1 { unicode, .. } => 9 + unicode.len(),
InfoZipUnicodeCommentExtraField::Unknown { data, .. } => 5 + data.len(),
}
}
}
impl ExtraFieldAsBytes for InfoZipUnicodePathExtraField {
fn as_bytes(&self) -> Vec<u8> {
let mut bytes = Vec::new();
let header_id: u16 = HeaderId::INFO_ZIP_UNICODE_PATH_EXTRA_FIELD.into();
bytes.append(&mut header_id.to_le_bytes().to_vec());
match self {
InfoZipUnicodePathExtraField::V1 { crc32, unicode } => {
let data_size: u16 = (5 + unicode.len()).try_into().unwrap();
bytes.append(&mut data_size.to_le_bytes().to_vec());
bytes.push(1);
bytes.append(&mut crc32.to_le_bytes().to_vec());
bytes.append(&mut unicode.clone());
}
InfoZipUnicodePathExtraField::Unknown { version, data } => {
let data_size: u16 = (1 + data.len()).try_into().unwrap();
bytes.append(&mut data_size.to_le_bytes().to_vec());
bytes.push(*version);
bytes.append(&mut data.clone());
}
}
bytes
}
fn count_bytes(&self) -> usize {
match self {
InfoZipUnicodePathExtraField::V1 { unicode, .. } => 9 + unicode.len(),
InfoZipUnicodePathExtraField::Unknown { data, .. } => 5 + data.len(),
}
}
}
/// Parse a zip64 extra field from bytes.
/// The content of "data" should exclude the header.
fn zip64_extended_information_field_from_bytes(
header_id: HeaderId,
data: &[u8],
uncompressed_size: u32,
compressed_size: u32,
) -> ZipResult<Zip64ExtendedInformationExtraField> {
// slice.take is nightly-only so we'll just use an index to track the current position
let mut current_idx = 0;
let uncompressed_size = if uncompressed_size == NON_ZIP64_MAX_SIZE && data.len() >= current_idx + 8 {
let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap()));
current_idx += 8;
val
} else {
None
};
let compressed_size = if compressed_size == NON_ZIP64_MAX_SIZE && data.len() >= current_idx + 8 {
let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap()));
current_idx += 8;
val
} else {
None
};
let relative_header_offset = if data.len() >= current_idx + 8 {
let val = Some(u64::from_le_bytes(data[current_idx..current_idx + 8].try_into().unwrap()));
current_idx += 8;
val
} else {
None
};
#[allow(unused_assignments)]
let disk_start_number = if data.len() >= current_idx + 4 {
let val = Some(u32::from_le_bytes(data[current_idx..current_idx + 4].try_into().unwrap()));
current_idx += 4;
val
} else {
None
};
Ok(Zip64ExtendedInformationExtraField {
header_id,
uncompressed_size,
compressed_size,
relative_header_offset,
disk_start_number,
})
}
fn info_zip_unicode_comment_extra_field_from_bytes(
_header_id: HeaderId,
data_size: u16,
data: &[u8],
) -> ZipResult<InfoZipUnicodeCommentExtraField> {
if data.is_empty() {
return Err(ZipError::InfoZipUnicodeCommentFieldIncomplete);
}
let version = data[0];
match version {
1 => {
if data.len() < 5 {
return Err(ZipError::InfoZipUnicodeCommentFieldIncomplete);
}
let crc32 = u32::from_le_bytes(data[1..5].try_into().unwrap());
let unicode = data[5..(data_size as usize)].to_vec();
Ok(InfoZipUnicodeCommentExtraField::V1 { crc32, unicode })
}
_ => Ok(InfoZipUnicodeCommentExtraField::Unknown { version, data: data[1..(data_size as usize)].to_vec() }),
}
}
fn info_zip_unicode_path_extra_field_from_bytes(
_header_id: HeaderId,
data_size: u16,
data: &[u8],
) -> ZipResult<InfoZipUnicodePathExtraField> {
if data.is_empty() {
return Err(ZipError::InfoZipUnicodePathFieldIncomplete);
}
let version = data[0];
match version {
1 => {
if data.len() < 5 {
return Err(ZipError::InfoZipUnicodePathFieldIncomplete);
}
let crc32 = u32::from_le_bytes(data[1..5].try_into().unwrap());
let unicode = data[5..(data_size as usize)].to_vec();
Ok(InfoZipUnicodePathExtraField::V1 { crc32, unicode })
}
_ => Ok(InfoZipUnicodePathExtraField::Unknown { version, data: data[1..(data_size as usize)].to_vec() }),
}
}
pub(crate) fn extra_field_from_bytes(
header_id: HeaderId,
data_size: u16,
data: &[u8],
uncompressed_size: u32,
compressed_size: u32,
) -> ZipResult<ExtraField> {
match header_id {
HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD => Ok(ExtraField::Zip64ExtendedInformation(
zip64_extended_information_field_from_bytes(header_id, data, uncompressed_size, compressed_size)?,
)),
HeaderId::INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD => Ok(ExtraField::InfoZipUnicodeComment(
info_zip_unicode_comment_extra_field_from_bytes(header_id, data_size, data)?,
)),
HeaderId::INFO_ZIP_UNICODE_PATH_EXTRA_FIELD => Ok(ExtraField::InfoZipUnicodePath(
info_zip_unicode_path_extra_field_from_bytes(header_id, data_size, data)?,
)),
_ => Ok(ExtraField::Unknown(UnknownExtraField { header_id, data_size, content: data.to_vec() })),
}
}
pub struct Zip64ExtendedInformationExtraFieldBuilder {
field: Zip64ExtendedInformationExtraField,
}
impl Zip64ExtendedInformationExtraFieldBuilder {
pub fn new() -> Self {
Self {
field: Zip64ExtendedInformationExtraField {
header_id: HeaderId::ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD,
uncompressed_size: None,
compressed_size: None,
relative_header_offset: None,
disk_start_number: None,
},
}
}
pub fn sizes(mut self, compressed_size: u64, uncompressed_size: u64) -> Self {
self.field.compressed_size = Some(compressed_size);
self.field.uncompressed_size = Some(uncompressed_size);
self
}
pub fn relative_header_offset(mut self, relative_header_offset: u64) -> Self {
self.field.relative_header_offset = Some(relative_header_offset);
self
}
#[allow(dead_code)]
pub fn disk_start_number(mut self, disk_start_number: u32) -> Self {
self.field.disk_start_number = Some(disk_start_number);
self
}
pub fn eof_only(&self) -> bool {
(self.field.uncompressed_size.is_none() && self.field.compressed_size.is_none())
&& (self.field.relative_header_offset.is_some() || self.field.disk_start_number.is_some())
}
pub fn build(self) -> ZipResult<Zip64ExtendedInformationExtraField> {
let field = self.field;
if field.content_size() == 0 {
return Err(ZipError::Zip64ExtendedFieldIncomplete);
}
Ok(field)
}
}

View file

@ -0,0 +1,161 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#437
pub struct LocalFileHeader {
pub version: u16,
pub flags: GeneralPurposeFlag,
pub compression: u16,
pub mod_time: u16,
pub mod_date: u16,
pub crc: u32,
pub compressed_size: u32,
pub uncompressed_size: u32,
pub file_name_length: u16,
pub extra_field_length: u16,
}
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#444
#[derive(Copy, Clone)]
pub struct GeneralPurposeFlag {
pub encrypted: bool,
pub data_descriptor: bool,
pub filename_unicode: bool,
}
/// 2 byte header ids
/// Ref https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#452
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub struct HeaderId(pub u16);
impl HeaderId {
pub const ZIP64_EXTENDED_INFORMATION_EXTRA_FIELD: HeaderId = HeaderId(0x0001);
pub const INFO_ZIP_UNICODE_COMMENT_EXTRA_FIELD: HeaderId = HeaderId(0x6375);
pub const INFO_ZIP_UNICODE_PATH_EXTRA_FIELD: HeaderId = HeaderId(0x7075);
}
impl From<u16> for HeaderId {
fn from(value: u16) -> Self {
HeaderId(value)
}
}
impl From<HeaderId> for u16 {
fn from(value: HeaderId) -> Self {
value.0
}
}
/// Represents each extra field.
/// Not strictly part of the spec, but is the most useful way to represent the data.
#[derive(Clone, Debug)]
#[non_exhaustive]
pub enum ExtraField {
Zip64ExtendedInformation(Zip64ExtendedInformationExtraField),
InfoZipUnicodeComment(InfoZipUnicodeCommentExtraField),
InfoZipUnicodePath(InfoZipUnicodePathExtraField),
Unknown(UnknownExtraField),
}
/// An extended information header for Zip64.
/// This field is used both for local file headers and central directory records.
/// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#453
#[derive(Clone, Debug)]
pub struct Zip64ExtendedInformationExtraField {
pub header_id: HeaderId,
pub uncompressed_size: Option<u64>,
pub compressed_size: Option<u64>,
// While not specified in the spec, these two fields are often left out in practice.
pub relative_header_offset: Option<u64>,
pub disk_start_number: Option<u32>,
}
impl Zip64ExtendedInformationExtraField {
pub(crate) fn content_size(&self) -> usize {
self.uncompressed_size.map(|_| 8).unwrap_or_default()
+ self.compressed_size.map(|_| 8).unwrap_or_default()
+ self.relative_header_offset.map(|_| 8).unwrap_or_default()
+ self.disk_start_number.map(|_| 8).unwrap_or_default()
}
}
/// Stores the UTF-8 version of the file comment as stored in the central directory header.
/// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#468
#[derive(Clone, Debug)]
pub enum InfoZipUnicodeCommentExtraField {
V1 { crc32: u32, unicode: Vec<u8> },
Unknown { version: u8, data: Vec<u8> },
}
/// Stores the UTF-8 version of the file name field as stored in the local header and central directory header.
/// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#469
#[derive(Clone, Debug)]
pub enum InfoZipUnicodePathExtraField {
V1 { crc32: u32, unicode: Vec<u8> },
Unknown { version: u8, data: Vec<u8> },
}
/// Represents any unparsed extra field.
#[derive(Clone, Debug)]
pub struct UnknownExtraField {
pub header_id: HeaderId,
pub data_size: u16,
pub content: Vec<u8>,
}
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4312
pub struct CentralDirectoryRecord {
pub v_made_by: u16,
pub v_needed: u16,
pub flags: GeneralPurposeFlag,
pub compression: u16,
pub mod_time: u16,
pub mod_date: u16,
pub crc: u32,
pub compressed_size: u32,
pub uncompressed_size: u32,
pub file_name_length: u16,
pub extra_field_length: u16,
pub file_comment_length: u16,
pub disk_start: u16,
pub inter_attr: u16,
pub exter_attr: u32,
pub lh_offset: u32,
}
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4316
#[derive(Debug)]
pub struct EndOfCentralDirectoryHeader {
pub(crate) disk_num: u16,
pub(crate) start_cent_dir_disk: u16,
pub(crate) num_of_entries_disk: u16,
pub(crate) num_of_entries: u16,
pub(crate) size_cent_dir: u32,
pub(crate) cent_dir_offset: u32,
pub(crate) file_comm_length: u16,
}
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4314
#[derive(Debug, PartialEq)]
pub struct Zip64EndOfCentralDirectoryRecord {
/// The size of this Zip64EndOfCentralDirectoryRecord.
/// This is specified because there is a variable-length extra zip64 information sector.
/// However, we will gleefully ignore this sector because it is reserved for use by PKWare.
pub size_of_zip64_end_of_cd_record: u64,
pub version_made_by: u16,
pub version_needed_to_extract: u16,
pub disk_number: u32,
pub disk_number_start_of_cd: u32,
pub num_entries_in_directory_on_disk: u64,
pub num_entries_in_directory: u64,
pub directory_size: u64,
pub offset_of_start_of_directory: u64,
}
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#4315
#[derive(Debug, PartialEq)]
pub struct Zip64EndOfCentralDirectoryLocator {
pub number_of_disk_with_start_of_zip64_end_of_central_directory: u32,
pub relative_offset: u64,
pub total_number_of_disks: u32,
}

View file

@ -0,0 +1,12 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub(crate) mod attribute;
pub(crate) mod compression;
pub(crate) mod consts;
pub(crate) mod extra_field;
pub(crate) mod header;
pub(crate) mod parse;
pub(crate) mod version;
pub use compression::Compression;

View file

@ -0,0 +1,345 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::error::{Result, ZipError};
use crate::spec::header::{
CentralDirectoryRecord, EndOfCentralDirectoryHeader, ExtraField, GeneralPurposeFlag, HeaderId, LocalFileHeader,
Zip64EndOfCentralDirectoryLocator, Zip64EndOfCentralDirectoryRecord,
};
use futures_lite::io::{AsyncRead, AsyncReadExt};
impl LocalFileHeader {
pub fn as_slice(&self) -> [u8; 26] {
let mut array = [0; 26];
let mut cursor = 0;
array_push!(array, cursor, self.version.to_le_bytes());
array_push!(array, cursor, self.flags.as_slice());
array_push!(array, cursor, self.compression.to_le_bytes());
array_push!(array, cursor, self.mod_time.to_le_bytes());
array_push!(array, cursor, self.mod_date.to_le_bytes());
array_push!(array, cursor, self.crc.to_le_bytes());
array_push!(array, cursor, self.compressed_size.to_le_bytes());
array_push!(array, cursor, self.uncompressed_size.to_le_bytes());
array_push!(array, cursor, self.file_name_length.to_le_bytes());
array_push!(array, cursor, self.extra_field_length.to_le_bytes());
array
}
}
impl GeneralPurposeFlag {
pub fn as_slice(&self) -> [u8; 2] {
let encrypted: u16 = match self.encrypted {
false => 0x0,
true => 0b1,
};
let data_descriptor: u16 = match self.data_descriptor {
false => 0x0,
true => 0x8,
};
let filename_unicode: u16 = match self.filename_unicode {
false => 0x0,
true => 0x800,
};
(encrypted | data_descriptor | filename_unicode).to_le_bytes()
}
}
impl CentralDirectoryRecord {
pub fn as_slice(&self) -> [u8; 42] {
let mut array = [0; 42];
let mut cursor = 0;
array_push!(array, cursor, self.v_made_by.to_le_bytes());
array_push!(array, cursor, self.v_needed.to_le_bytes());
array_push!(array, cursor, self.flags.as_slice());
array_push!(array, cursor, self.compression.to_le_bytes());
array_push!(array, cursor, self.mod_time.to_le_bytes());
array_push!(array, cursor, self.mod_date.to_le_bytes());
array_push!(array, cursor, self.crc.to_le_bytes());
array_push!(array, cursor, self.compressed_size.to_le_bytes());
array_push!(array, cursor, self.uncompressed_size.to_le_bytes());
array_push!(array, cursor, self.file_name_length.to_le_bytes());
array_push!(array, cursor, self.extra_field_length.to_le_bytes());
array_push!(array, cursor, self.file_comment_length.to_le_bytes());
array_push!(array, cursor, self.disk_start.to_le_bytes());
array_push!(array, cursor, self.inter_attr.to_le_bytes());
array_push!(array, cursor, self.exter_attr.to_le_bytes());
array_push!(array, cursor, self.lh_offset.to_le_bytes());
array
}
}
impl EndOfCentralDirectoryHeader {
pub fn as_slice(&self) -> [u8; 18] {
let mut array = [0; 18];
let mut cursor = 0;
array_push!(array, cursor, self.disk_num.to_le_bytes());
array_push!(array, cursor, self.start_cent_dir_disk.to_le_bytes());
array_push!(array, cursor, self.num_of_entries_disk.to_le_bytes());
array_push!(array, cursor, self.num_of_entries.to_le_bytes());
array_push!(array, cursor, self.size_cent_dir.to_le_bytes());
array_push!(array, cursor, self.cent_dir_offset.to_le_bytes());
array_push!(array, cursor, self.file_comm_length.to_le_bytes());
array
}
}
impl From<[u8; 26]> for LocalFileHeader {
fn from(value: [u8; 26]) -> LocalFileHeader {
LocalFileHeader {
version: u16::from_le_bytes(value[0..2].try_into().unwrap()),
flags: GeneralPurposeFlag::from(u16::from_le_bytes(value[2..4].try_into().unwrap())),
compression: u16::from_le_bytes(value[4..6].try_into().unwrap()),
mod_time: u16::from_le_bytes(value[6..8].try_into().unwrap()),
mod_date: u16::from_le_bytes(value[8..10].try_into().unwrap()),
crc: u32::from_le_bytes(value[10..14].try_into().unwrap()),
compressed_size: u32::from_le_bytes(value[14..18].try_into().unwrap()),
uncompressed_size: u32::from_le_bytes(value[18..22].try_into().unwrap()),
file_name_length: u16::from_le_bytes(value[22..24].try_into().unwrap()),
extra_field_length: u16::from_le_bytes(value[24..26].try_into().unwrap()),
}
}
}
impl From<u16> for GeneralPurposeFlag {
fn from(value: u16) -> GeneralPurposeFlag {
let encrypted = !matches!(value & 0x1, 0);
let data_descriptor = !matches!((value & 0x8) >> 3, 0);
let filename_unicode = !matches!((value & 0x800) >> 11, 0);
GeneralPurposeFlag { encrypted, data_descriptor, filename_unicode }
}
}
impl From<[u8; 42]> for CentralDirectoryRecord {
fn from(value: [u8; 42]) -> CentralDirectoryRecord {
CentralDirectoryRecord {
v_made_by: u16::from_le_bytes(value[0..2].try_into().unwrap()),
v_needed: u16::from_le_bytes(value[2..4].try_into().unwrap()),
flags: GeneralPurposeFlag::from(u16::from_le_bytes(value[4..6].try_into().unwrap())),
compression: u16::from_le_bytes(value[6..8].try_into().unwrap()),
mod_time: u16::from_le_bytes(value[8..10].try_into().unwrap()),
mod_date: u16::from_le_bytes(value[10..12].try_into().unwrap()),
crc: u32::from_le_bytes(value[12..16].try_into().unwrap()),
compressed_size: u32::from_le_bytes(value[16..20].try_into().unwrap()),
uncompressed_size: u32::from_le_bytes(value[20..24].try_into().unwrap()),
file_name_length: u16::from_le_bytes(value[24..26].try_into().unwrap()),
extra_field_length: u16::from_le_bytes(value[26..28].try_into().unwrap()),
file_comment_length: u16::from_le_bytes(value[28..30].try_into().unwrap()),
disk_start: u16::from_le_bytes(value[30..32].try_into().unwrap()),
inter_attr: u16::from_le_bytes(value[32..34].try_into().unwrap()),
exter_attr: u32::from_le_bytes(value[34..38].try_into().unwrap()),
lh_offset: u32::from_le_bytes(value[38..42].try_into().unwrap()),
}
}
}
impl From<[u8; 18]> for EndOfCentralDirectoryHeader {
fn from(value: [u8; 18]) -> EndOfCentralDirectoryHeader {
EndOfCentralDirectoryHeader {
disk_num: u16::from_le_bytes(value[0..2].try_into().unwrap()),
start_cent_dir_disk: u16::from_le_bytes(value[2..4].try_into().unwrap()),
num_of_entries_disk: u16::from_le_bytes(value[4..6].try_into().unwrap()),
num_of_entries: u16::from_le_bytes(value[6..8].try_into().unwrap()),
size_cent_dir: u32::from_le_bytes(value[8..12].try_into().unwrap()),
cent_dir_offset: u32::from_le_bytes(value[12..16].try_into().unwrap()),
file_comm_length: u16::from_le_bytes(value[16..18].try_into().unwrap()),
}
}
}
impl From<[u8; 52]> for Zip64EndOfCentralDirectoryRecord {
fn from(value: [u8; 52]) -> Self {
Self {
size_of_zip64_end_of_cd_record: u64::from_le_bytes(value[0..8].try_into().unwrap()),
version_made_by: u16::from_le_bytes(value[8..10].try_into().unwrap()),
version_needed_to_extract: u16::from_le_bytes(value[10..12].try_into().unwrap()),
disk_number: u32::from_le_bytes(value[12..16].try_into().unwrap()),
disk_number_start_of_cd: u32::from_le_bytes(value[16..20].try_into().unwrap()),
num_entries_in_directory_on_disk: u64::from_le_bytes(value[20..28].try_into().unwrap()),
num_entries_in_directory: u64::from_le_bytes(value[28..36].try_into().unwrap()),
directory_size: u64::from_le_bytes(value[36..44].try_into().unwrap()),
offset_of_start_of_directory: u64::from_le_bytes(value[44..52].try_into().unwrap()),
}
}
}
impl From<[u8; 16]> for Zip64EndOfCentralDirectoryLocator {
fn from(value: [u8; 16]) -> Self {
Self {
number_of_disk_with_start_of_zip64_end_of_central_directory: u32::from_le_bytes(
value[0..4].try_into().unwrap(),
),
relative_offset: u64::from_le_bytes(value[4..12].try_into().unwrap()),
total_number_of_disks: u32::from_le_bytes(value[12..16].try_into().unwrap()),
}
}
}
impl LocalFileHeader {
pub async fn from_reader<R: AsyncRead + Unpin>(reader: &mut R) -> Result<LocalFileHeader> {
let mut buffer: [u8; 26] = [0; 26];
reader.read_exact(&mut buffer).await?;
Ok(LocalFileHeader::from(buffer))
}
}
impl EndOfCentralDirectoryHeader {
pub async fn from_reader<R: AsyncRead + Unpin>(reader: &mut R) -> Result<EndOfCentralDirectoryHeader> {
let mut buffer: [u8; 18] = [0; 18];
reader.read_exact(&mut buffer).await?;
Ok(EndOfCentralDirectoryHeader::from(buffer))
}
}
impl CentralDirectoryRecord {
pub async fn from_reader<R: AsyncRead + Unpin>(reader: &mut R) -> Result<CentralDirectoryRecord> {
let mut buffer: [u8; 42] = [0; 42];
reader.read_exact(&mut buffer).await?;
Ok(CentralDirectoryRecord::from(buffer))
}
}
impl Zip64EndOfCentralDirectoryRecord {
pub async fn from_reader<R: AsyncRead + Unpin>(reader: &mut R) -> Result<Zip64EndOfCentralDirectoryRecord> {
let mut buffer: [u8; 52] = [0; 52];
reader.read_exact(&mut buffer).await?;
Ok(Self::from(buffer))
}
pub fn as_bytes(&self) -> [u8; 52] {
let mut array = [0; 52];
let mut cursor = 0;
array_push!(array, cursor, self.size_of_zip64_end_of_cd_record.to_le_bytes());
array_push!(array, cursor, self.version_made_by.to_le_bytes());
array_push!(array, cursor, self.version_needed_to_extract.to_le_bytes());
array_push!(array, cursor, self.disk_number.to_le_bytes());
array_push!(array, cursor, self.disk_number_start_of_cd.to_le_bytes());
array_push!(array, cursor, self.num_entries_in_directory_on_disk.to_le_bytes());
array_push!(array, cursor, self.num_entries_in_directory.to_le_bytes());
array_push!(array, cursor, self.directory_size.to_le_bytes());
array_push!(array, cursor, self.offset_of_start_of_directory.to_le_bytes());
array
}
}
impl Zip64EndOfCentralDirectoryLocator {
/// Read 4 bytes from the reader and check whether its signature matches that of the EOCDL.
/// If it does, return Some(EOCDL), otherwise return None.
pub async fn try_from_reader<R: AsyncRead + Unpin>(
reader: &mut R,
) -> Result<Option<Zip64EndOfCentralDirectoryLocator>> {
let signature = {
let mut buffer = [0; 4];
reader.read_exact(&mut buffer).await?;
u32::from_le_bytes(buffer)
};
if signature != ZIP64_EOCDL_SIGNATURE {
return Ok(None);
}
let mut buffer: [u8; 16] = [0; 16];
reader.read_exact(&mut buffer).await?;
Ok(Some(Self::from(buffer)))
}
pub fn as_bytes(&self) -> [u8; 16] {
let mut array = [0; 16];
let mut cursor = 0;
array_push!(array, cursor, self.number_of_disk_with_start_of_zip64_end_of_central_directory.to_le_bytes());
array_push!(array, cursor, self.relative_offset.to_le_bytes());
array_push!(array, cursor, self.total_number_of_disks.to_le_bytes());
array
}
}
/// Parse the extra fields.
pub fn parse_extra_fields(data: Vec<u8>, uncompressed_size: u32, compressed_size: u32) -> Result<Vec<ExtraField>> {
let mut cursor = 0;
let mut extra_fields = Vec::new();
while cursor + 4 < data.len() {
let header_id: HeaderId = u16::from_le_bytes(data[cursor..cursor + 2].try_into().unwrap()).into();
let field_size = u16::from_le_bytes(data[cursor + 2..cursor + 4].try_into().unwrap());
if cursor + 4 + field_size as usize > data.len() {
return Err(ZipError::InvalidExtraFieldHeader(field_size, data.len() - cursor - 8 - field_size as usize));
}
let data = &data[cursor + 4..cursor + 4 + field_size as usize];
extra_fields.push(extra_field_from_bytes(header_id, field_size, data, uncompressed_size, compressed_size)?);
cursor += 4 + field_size as usize;
}
Ok(extra_fields)
}
/// Replace elements of an array at a given cursor index for use with a zero-initialised array.
macro_rules! array_push {
($arr:ident, $cursor:ident, $value:expr) => {{
for entry in $value {
$arr[$cursor] = entry;
$cursor += 1;
}
}};
}
use crate::spec::consts::ZIP64_EOCDL_SIGNATURE;
use crate::spec::extra_field::extra_field_from_bytes;
pub(crate) use array_push;
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_zip64_eocdr() {
let eocdr: [u8; 56] = [
0x50, 0x4B, 0x06, 0x06, 0x2C, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x1E, 0x03, 0x2D, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
0x00, 0x00, 0x00, 0x00, 0x2F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
0x00, 0x00,
];
let without_signature: [u8; 52] = eocdr[4..56].try_into().unwrap();
let zip64eocdr = Zip64EndOfCentralDirectoryRecord::from(without_signature);
assert_eq!(
zip64eocdr,
Zip64EndOfCentralDirectoryRecord {
size_of_zip64_end_of_cd_record: 44,
version_made_by: 798,
version_needed_to_extract: 45,
disk_number: 0,
disk_number_start_of_cd: 0,
num_entries_in_directory_on_disk: 1,
num_entries_in_directory: 1,
directory_size: 47,
offset_of_start_of_directory: 64,
}
)
}
#[tokio::test]
async fn test_parse_zip64_eocdl() {
let eocdl: [u8; 20] = [
0x50, 0x4B, 0x06, 0x07, 0x00, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00,
0x00, 0x00,
];
let mut cursor = futures_lite::io::Cursor::new(eocdl);
let zip64eocdl = Zip64EndOfCentralDirectoryLocator::try_from_reader(&mut cursor).await.unwrap().unwrap();
assert_eq!(
zip64eocdl,
Zip64EndOfCentralDirectoryLocator {
number_of_disk_with_start_of_zip64_end_of_central_directory: 0,
relative_offset: 111,
total_number_of_disks: 1,
}
)
}
}

View file

@ -0,0 +1,42 @@
// Copyright (c) 2021 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::entry::ZipEntry;
#[cfg(any(
feature = "deflate",
feature = "bzip2",
feature = "zstd",
feature = "lzma",
feature = "xz",
feature = "deflate64"
))]
use crate::spec::Compression;
pub(crate) const SPEC_VERSION_MADE_BY: u16 = 63;
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#443
pub fn as_needed_to_extract(entry: &ZipEntry) -> u16 {
let mut version = match entry.compression() {
#[cfg(feature = "deflate")]
Compression::Deflate => 20,
#[cfg(feature = "deflate64")]
Compression::Deflate64 => 21,
#[cfg(feature = "bzip2")]
Compression::Bz => 46,
#[cfg(feature = "lzma")]
Compression::Lzma => 63,
_ => 10,
};
if let Ok(true) = entry.dir() {
version = std::cmp::max(version, 20);
}
version
}
// https://github.com/Majored/rs-async-zip/blob/main/SPECIFICATION.md#442
pub fn as_made_by() -> u16 {
// Default to UNIX mapping for the moment.
3 << 8 | SPEC_VERSION_MADE_BY
}

View file

@ -0,0 +1,112 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::error::{Result, ZipError};
/// A string encoding supported by this crate.
#[derive(Debug, Clone, Copy)]
pub enum StringEncoding {
Utf8,
Raw,
}
/// A string wrapper for handling different encodings.
#[derive(Debug, Clone)]
pub struct ZipString {
encoding: StringEncoding,
raw: Vec<u8>,
alternative: Option<Vec<u8>>,
}
impl ZipString {
/// Constructs a new encoded string from its raw bytes and its encoding type.
///
/// # Note
/// If the provided encoding is [`StringEncoding::Utf8`] but the raw bytes are not valid UTF-8 (ie. a call to
/// `std::str::from_utf8()` fails), the encoding is defaulted back to [`StringEncoding::Raw`].
pub fn new(raw: Vec<u8>, mut encoding: StringEncoding) -> Self {
if let StringEncoding::Utf8 = encoding {
if std::str::from_utf8(&raw).is_err() {
encoding = StringEncoding::Raw;
}
}
Self { encoding, raw, alternative: None }
}
/// Constructs a new encoded string from utf-8 data, with an alternative in native MBCS encoding.
pub fn new_with_alternative(utf8: String, alternative: Vec<u8>) -> Self {
Self { encoding: StringEncoding::Utf8, raw: utf8.into_bytes(), alternative: Some(alternative) }
}
/// Returns the raw bytes for this string.
pub fn as_bytes(&self) -> &[u8] {
&self.raw
}
/// Returns the encoding type for this string.
pub fn encoding(&self) -> StringEncoding {
self.encoding
}
/// Returns the alternative bytes (in native MBCS encoding) for this string.
pub fn alternative(&self) -> Option<&[u8]> {
self.alternative.as_deref()
}
/// Returns the raw bytes converted into a string slice.
///
/// # Note
/// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
pub fn as_str(&self) -> Result<&str> {
if !matches!(self.encoding, StringEncoding::Utf8) {
return Err(ZipError::StringNotUtf8);
}
// SAFETY:
// "The bytes passed in must be valid UTF-8.'
//
// This function will error if self.encoding is not StringEncoding::Utf8.
//
// self.encoding is only ever StringEncoding::Utf8 if this variant was provided to the constructor AND the
// call to `std::str::from_utf8()` within the constructor succeeded. Mutable access to the inner vector is
// never given and no method implemented on this type mutates the inner vector.
Ok(unsafe { std::str::from_utf8_unchecked(&self.raw) })
}
/// Returns the raw bytes converted to an owned string.
///
/// # Note
/// A call to this method will only succeed if the encoding type is [`StringEncoding::Utf8`].
pub fn into_string(self) -> Result<String> {
if !matches!(self.encoding, StringEncoding::Utf8) {
return Err(ZipError::StringNotUtf8);
}
// SAFETY: See above.
Ok(unsafe { String::from_utf8_unchecked(self.raw) })
}
/// Returns the alternative bytes (in native MBCS encoding) converted to the owned.
pub fn into_alternative(self) -> Option<Vec<u8>> {
self.alternative
}
/// Returns whether this string is encoded as utf-8 without an alternative.
pub fn is_utf8_without_alternative(&self) -> bool {
matches!(self.encoding, StringEncoding::Utf8) && self.alternative.is_none()
}
}
impl From<String> for ZipString {
fn from(value: String) -> Self {
Self { encoding: StringEncoding::Utf8, raw: value.into_bytes(), alternative: None }
}
}
impl From<&str> for ZipString {
fn from(value: &str) -> Self {
Self { encoding: StringEncoding::Utf8, raw: value.as_bytes().to_vec(), alternative: None }
}
}

View file

@ -0,0 +1,2 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)

View file

@ -0,0 +1,16 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub(crate) mod combined;
pub(crate) mod read;
pub(crate) mod spec;
pub(crate) mod write;
use std::sync::Once;
static ENV_LOGGER: Once = Once::new();
/// Initialize the env logger for any tests that require it.
/// Safe to call multiple times.
fn init_logger() {
ENV_LOGGER.call_once(|| env_logger::Builder::from_default_env().format_module_path(true).init());
}

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,46 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::base::read::io::compressed::CompressedReader;
use crate::spec::Compression;
compressed_test_helper!(stored_test, Compression::Stored, "foo bar", "foo bar");
#[cfg(feature = "deflate")]
compressed_test_helper!(deflate_test, Compression::Deflate, "foo bar", include_bytes!("deflate.data"));
#[cfg(feature = "bzip2")]
compressed_test_helper!(bz_test, Compression::Bz, "foo bar", include_bytes!("bzip2.data"));
#[cfg(feature = "lzma")]
compressed_test_helper!(lzma_test, Compression::Lzma, "foo bar", include_bytes!("lzma.data"));
#[cfg(feature = "zstd")]
compressed_test_helper!(zstd_test, Compression::Zstd, "foo bar", include_bytes!("zstd.data"));
#[cfg(feature = "xz")]
compressed_test_helper!(xz_test, Compression::Xz, "foo bar", include_bytes!("xz.data"));
/// A helper macro for generating a CompressedReader test using a specific compression method.
macro_rules! compressed_test_helper {
($name:ident, $typ:expr, $data_raw:expr, $data:expr) => {
#[cfg(test)]
#[tokio::test]
async fn $name() {
use futures_lite::io::{AsyncReadExt, Cursor};
let data = $data;
let data_raw = $data_raw;
let cursor = Cursor::new(data);
let mut reader = CompressedReader::new(cursor, $typ);
let mut read_data = String::new();
reader.read_to_string(&mut read_data).await.expect("read into CompressedReader failed");
assert_eq!(read_data, data_raw);
}
};
}
use compressed_test_helper;

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,64 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
#[test]
fn search_one_byte_test() {
let buffer: &[u8] = &[0x0, 0x0, 0x0, 0x0, 0x0, 0x0];
let signature: &[u8] = &[0x1];
let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature);
assert!(matched.is_none());
let buffer: &[u8] = &[0x2, 0x1, 0x0, 0x0, 0x0, 0x0];
let signature: &[u8] = &[0x1];
let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature);
assert!(matched.is_some());
assert_eq!(1, matched.unwrap());
}
#[test]
fn search_two_byte_test() {
let buffer: &[u8] = &[0x2, 0x1, 0x0, 0x0, 0x0, 0x0];
let signature: &[u8] = &[0x2, 0x1];
let matched = crate::base::read::io::locator::reverse_search_buffer(buffer, signature);
assert!(matched.is_some());
assert_eq!(1, matched.unwrap());
}
#[tokio::test]
async fn locator_empty_test() {
use futures_lite::io::Cursor;
let data = &include_bytes!("empty.zip");
let mut cursor = Cursor::new(data);
let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await;
assert!(eocdr.is_ok());
assert_eq!(eocdr.unwrap(), 4);
}
#[tokio::test]
async fn locator_empty_max_comment_test() {
use futures_lite::io::Cursor;
let data = &include_bytes!("empty-with-max-comment.zip");
let mut cursor = Cursor::new(data);
let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await;
assert!(eocdr.is_ok());
assert_eq!(eocdr.unwrap(), 4);
}
#[tokio::test]
async fn locator_buffer_boundary_test() {
use futures_lite::io::Cursor;
let data = &include_bytes!("empty-buffer-boundary.zip");
let mut cursor = Cursor::new(data);
let eocdr = crate::base::read::io::locator::eocdr(&mut cursor).await;
assert!(eocdr.is_ok());
assert_eq!(eocdr.unwrap(), 4);
}

View file

@ -0,0 +1,6 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub(crate) mod compression;
pub(crate) mod locator;
pub(crate) mod zip64;

View file

@ -0,0 +1,107 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// Copyright (c) 2023 Cognite AS
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use futures_lite::io::AsyncReadExt;
use crate::tests::init_logger;
const ZIP64_ZIP_CONTENTS: &str = "Hello World!\n";
/// Tests opening and reading a zip64 archive.
/// It contains one file named "-" with a zip 64 extended field header.
#[tokio::test]
async fn test_read_zip64_archive_mem() {
use crate::base::read::mem::ZipFileReader;
init_logger();
let data = include_bytes!("zip64.zip").to_vec();
let reader = ZipFileReader::new(data).await.unwrap();
let mut entry_reader = reader.reader_without_entry(0).await.unwrap();
let mut read_data = String::new();
entry_reader.read_to_string(&mut read_data).await.expect("read failed");
assert_eq!(
read_data.chars().count(),
ZIP64_ZIP_CONTENTS.chars().count(),
"{read_data:?} != {ZIP64_ZIP_CONTENTS:?}"
);
assert_eq!(read_data, ZIP64_ZIP_CONTENTS);
}
/// Like test_read_zip64_archive_mem() but for the streaming version
#[tokio::test]
async fn test_read_zip64_archive_stream() {
use crate::base::read::stream::ZipFileReader;
init_logger();
let data = include_bytes!("zip64.zip").to_vec();
let reader = ZipFileReader::new(data.as_slice());
let mut entry_reader = reader.next_without_entry().await.unwrap().unwrap();
let mut read_data = String::new();
entry_reader.reader_mut().read_to_string(&mut read_data).await.expect("read failed");
assert_eq!(
read_data.chars().count(),
ZIP64_ZIP_CONTENTS.chars().count(),
"{read_data:?} != {ZIP64_ZIP_CONTENTS:?}"
);
assert_eq!(read_data, ZIP64_ZIP_CONTENTS);
}
/// Generate an example file only if it doesn't exist already.
/// The file is placed adjacent to this rs file.
#[cfg(feature = "tokio")]
fn generate_zip64many_zip() -> std::path::PathBuf {
use std::io::Write;
use zip::write::FileOptions;
let mut path = std::path::PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("src/tests/read/zip64/zip64many.zip");
// Only recreate the zip if it doesnt already exist.
if path.exists() {
return path;
}
let zip_file = std::fs::File::create(&path).unwrap();
let mut zip = zip::ZipWriter::new(zip_file);
let options = FileOptions::default().compression_method(zip::CompressionMethod::Stored);
for i in 0..2_u32.pow(16) + 1 {
zip.start_file(format!("{i}.txt"), options).unwrap();
zip.write_all(b"\n").unwrap();
}
zip.finish().unwrap();
path
}
/// Test reading a generated zip64 archive that contains more than 2^16 entries.
#[cfg(feature = "tokio-fs")]
#[tokio::test]
async fn test_read_zip64_archive_many_entries() {
use crate::tokio::read::fs::ZipFileReader;
init_logger();
let path = generate_zip64many_zip();
let reader = ZipFileReader::new(path).await.unwrap();
// Verify that each entry exists and is has the contents "\n"
for i in 0..2_u32.pow(16) + 1 {
let entry = reader.file().entries().get(i as usize).unwrap();
eprintln!("{:?}", entry.filename().as_bytes());
assert_eq!(entry.filename.as_str().unwrap(), format!("{i}.txt"));
let mut entry = reader.reader_without_entry(i as usize).await.unwrap();
let mut contents = String::new();
entry.read_to_string(&mut contents).await.unwrap();
assert_eq!(contents, "\n");
}
}

Binary file not shown.

View file

@ -0,0 +1,44 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
#[cfg(feature = "chrono")]
use chrono::{TimeZone, Utc};
use crate::ZipDateTimeBuilder;
#[test]
#[cfg(feature = "chrono")]
fn date_conversion_test_chrono() {
let original_dt = Utc.timestamp_opt(1666544102, 0).unwrap();
let zip_dt = crate::ZipDateTime::from_chrono(&original_dt);
let result_dt = zip_dt.as_chrono().single().expect("expected single unique result");
assert_eq!(result_dt, original_dt);
}
#[test]
fn date_conversion_test() {
let year = 2000;
let month = 9;
let day = 8;
let hour = 7;
let minute = 5;
let second = 4;
let mut builder = ZipDateTimeBuilder::new();
builder = builder.year(year);
builder = builder.month(month);
builder = builder.day(day);
builder = builder.hour(hour);
builder = builder.minute(minute);
builder = builder.second(second);
let built = builder.build();
assert_eq!(year, built.year());
assert_eq!(month, built.month());
assert_eq!(day, built.day());
assert_eq!(hour, built.hour());
assert_eq!(minute, built.minute());
assert_eq!(second, built.second());
}

View file

@ -0,0 +1,4 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
pub(crate) mod date;

View file

@ -0,0 +1,29 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use futures_lite::io::AsyncWrite;
use std::io::Error;
use std::pin::Pin;
use std::task::{Context, Poll};
pub(crate) mod offset;
mod zip64;
/// /dev/null for AsyncWrite.
/// Useful for tests that involve writing, but not reading, large amounts of data.
pub(crate) struct AsyncSink;
// AsyncSink is always ready to receive bytes and throw them away.
impl AsyncWrite for AsyncSink {
fn poll_write(self: Pin<&mut Self>, _: &mut Context<'_>, buf: &[u8]) -> Poll<Result<usize, Error>> {
Poll::Ready(Ok(buf.len()))
}
fn poll_flush(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Result<(), Error>> {
Poll::Ready(Ok(()))
}
fn poll_close(self: Pin<&mut Self>, _: &mut Context<'_>) -> Poll<Result<(), Error>> {
Poll::Ready(Ok(()))
}
}

View file

@ -0,0 +1,22 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::base::write::io::offset::AsyncOffsetWriter;
#[tokio::test]
async fn basic() {
use futures_lite::io::AsyncWriteExt;
use futures_lite::io::Cursor;
let mut writer = AsyncOffsetWriter::new(Cursor::new(Vec::new()));
assert_eq!(writer.offset(), 0);
writer.write_all(b"Foo. Bar. Foo. Bar.").await.expect("failed to write data");
assert_eq!(writer.offset(), 19);
writer.write_all(b"Foo. Foo.").await.expect("failed to write data");
assert_eq!(writer.offset(), 28);
writer.write_all(b"Bar. Bar.").await.expect("failed to write data");
assert_eq!(writer.offset(), 37);
}

View file

@ -0,0 +1,243 @@
// Copyright Cognite AS, 2023
use crate::base::write::ZipFileWriter;
use crate::error::{Zip64ErrorCase, ZipError};
use crate::spec::consts::NON_ZIP64_MAX_SIZE;
use crate::tests::init_logger;
use crate::tests::write::AsyncSink;
use crate::{Compression, ZipEntryBuilder};
use std::io::Read;
use crate::spec::header::ExtraField;
use futures_lite::io::AsyncWriteExt;
// Useful constants for writing a large file.
const BATCH_SIZE: usize = 100_000;
const NUM_BATCHES: usize = NON_ZIP64_MAX_SIZE as usize / BATCH_SIZE + 1;
const BATCHED_FILE_SIZE: usize = NUM_BATCHES * BATCH_SIZE;
/// Test writing a small zip64 file.
/// No zip64 extra fields will be emitted for EntryWhole.
/// Z64 end of directory record & locator should be emitted
#[tokio::test]
async fn test_write_zip64_file() {
init_logger();
let mut buffer = Vec::new();
let mut writer = ZipFileWriter::new(&mut buffer).force_zip64();
let entry = ZipEntryBuilder::new("file1".to_string().into(), Compression::Stored);
writer.write_entry_whole(entry, &[0, 0, 0, 0]).await.unwrap();
let entry = ZipEntryBuilder::new("file2".to_string().into(), Compression::Stored);
let mut entry_writer = writer.write_entry_stream(entry).await.unwrap();
entry_writer.write_all(&[0, 0, 0, 0]).await.unwrap();
entry_writer.close().await.unwrap();
writer.close().await.unwrap();
let cursor = std::io::Cursor::new(buffer);
let mut zip = zip::read::ZipArchive::new(cursor).unwrap();
let mut file1 = zip.by_name("file1").unwrap();
assert_eq!(file1.extra_data(), &[] as &[u8]);
let mut buffer = Vec::new();
file1.read_to_end(&mut buffer).unwrap();
assert_eq!(buffer.as_slice(), &[0, 0, 0, 0]);
drop(file1);
let mut file2 = zip.by_name("file2").unwrap();
let mut buffer = Vec::new();
file2.read_to_end(&mut buffer).unwrap();
assert_eq!(buffer.as_slice(), &[0, 0, 0, 0]);
}
/// Test writing a large zip64 file. This test will use upwards of 4GB of memory.
#[tokio::test]
async fn test_write_large_zip64_file() {
init_logger();
// Allocate space with some extra for metadata records
let mut buffer = Vec::with_capacity(BATCHED_FILE_SIZE + 100_000);
let mut writer = ZipFileWriter::new(&mut buffer);
// Stream-written zip files are dubiously spec-conformant. We need to specify a valid file size
// in order for rs-zip (and unzip) to correctly read these files.
let entry = ZipEntryBuilder::new("file".to_string().into(), Compression::Stored)
.size(BATCHED_FILE_SIZE as u64, BATCHED_FILE_SIZE as u64);
let mut entry_writer = writer.write_entry_stream(entry).await.unwrap();
for _ in 0..NUM_BATCHES {
entry_writer.write_all(&[0; BATCH_SIZE]).await.unwrap();
}
entry_writer.close().await.unwrap();
assert!(writer.is_zip64);
let cd_entry = writer.cd_entries.last().unwrap();
match &cd_entry.entry.extra_fields.last().unwrap() {
ExtraField::Zip64ExtendedInformation(zip64) => {
assert_eq!(zip64.compressed_size.unwrap(), BATCHED_FILE_SIZE as u64);
assert_eq!(zip64.uncompressed_size.unwrap(), BATCHED_FILE_SIZE as u64);
}
e => panic!("Expected a Zip64 extended field, got {:?}", e),
}
assert_eq!(cd_entry.header.uncompressed_size, NON_ZIP64_MAX_SIZE);
assert_eq!(cd_entry.header.compressed_size, NON_ZIP64_MAX_SIZE);
writer.close().await.unwrap();
let cursor = std::io::Cursor::new(buffer);
let mut archive = zip::read::ZipArchive::new(cursor).unwrap();
let mut file = archive.by_name("file").unwrap();
assert_eq!(file.compression(), zip::CompressionMethod::Stored);
assert_eq!(file.size(), BATCHED_FILE_SIZE as u64);
let mut buffer = [0; 100_000];
let mut bytes_total = 0;
loop {
let read_bytes = file.read(&mut buffer).unwrap();
if read_bytes == 0 {
break;
}
bytes_total += read_bytes;
}
assert_eq!(bytes_total, BATCHED_FILE_SIZE);
}
/// Test writing a file, and reading it with async-zip
#[tokio::test]
async fn test_write_large_zip64_file_self_read() {
use futures_lite::io::AsyncReadExt;
init_logger();
// Allocate space with some extra for metadata records
let mut buffer = Vec::with_capacity(BATCHED_FILE_SIZE + 100_000);
let mut writer = ZipFileWriter::new(&mut buffer);
let entry = ZipEntryBuilder::new("file".into(), Compression::Stored);
let mut entry_writer = writer.write_entry_stream(entry).await.unwrap();
for _ in 0..NUM_BATCHES {
entry_writer.write_all(&[0; BATCH_SIZE]).await.unwrap();
}
entry_writer.close().await.unwrap();
writer.close().await.unwrap();
let reader = crate::base::read::mem::ZipFileReader::new(buffer).await.unwrap();
assert!(reader.file().zip64);
assert_eq!(reader.file().entries[0].entry.filename().as_str().unwrap(), "file");
assert_eq!(reader.file().entries[0].entry.compressed_size, BATCHED_FILE_SIZE as u64);
let mut entry = reader.reader_without_entry(0).await.unwrap();
let mut buffer = [0; 100_000];
let mut bytes_total = 0;
loop {
let read_bytes = entry.read(&mut buffer).await.unwrap();
if read_bytes == 0 {
break;
}
bytes_total += read_bytes;
}
assert_eq!(bytes_total, BATCHED_FILE_SIZE);
}
/// Test writing a zip64 file with more than u16::MAX files.
#[tokio::test]
async fn test_write_zip64_file_many_entries() {
init_logger();
// The generated file will likely be ~3MB in size.
let mut buffer = Vec::with_capacity(3_500_000);
let mut writer = ZipFileWriter::new(&mut buffer);
for i in 0..=u16::MAX as u32 + 1 {
let entry = ZipEntryBuilder::new(i.to_string().into(), Compression::Stored);
writer.write_entry_whole(entry, &[]).await.unwrap();
}
assert!(writer.is_zip64);
writer.close().await.unwrap();
let cursor = std::io::Cursor::new(buffer);
let mut zip = zip::read::ZipArchive::new(cursor).unwrap();
assert_eq!(zip.len(), u16::MAX as usize + 2);
for i in 0..=u16::MAX as u32 + 1 {
let mut file = zip.by_name(&i.to_string()).unwrap();
let mut buf = Vec::new();
file.read_to_end(&mut buf).unwrap();
}
}
/// Tests that EntryWholeWriter switches to Zip64 mode when writing too many files for a non-Zip64.
#[tokio::test]
async fn test_zip64_when_many_files_whole() {
let mut sink = AsyncSink;
let mut writer = ZipFileWriter::new(&mut sink);
for i in 0..=u16::MAX as u32 + 1 {
let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored);
writer.write_entry_whole(entry, &[]).await.unwrap()
}
assert!(writer.is_zip64);
writer.close().await.unwrap();
}
/// Tests that EntryStreamWriter switches to Zip64 mode when writing too many files for a non-Zip64.
#[tokio::test]
async fn test_zip64_when_many_files_stream() {
let mut sink = AsyncSink;
let mut writer = ZipFileWriter::new(&mut sink);
for i in 0..=u16::MAX as u32 + 1 {
let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored);
let entrywriter = writer.write_entry_stream(entry).await.unwrap();
entrywriter.close().await.unwrap();
}
assert!(writer.is_zip64);
writer.close().await.unwrap();
}
/// Tests that when force_no_zip64 is true, EntryWholeWriter errors when trying to write more than
/// u16::MAX files to a single archive.
#[tokio::test]
async fn test_force_no_zip64_errors_with_too_many_files_whole() {
let mut sink = AsyncSink;
let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64();
for i in 0..u16::MAX {
let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored);
writer.write_entry_whole(entry, &[]).await.unwrap()
}
let entry = ZipEntryBuilder::new("65537".to_string().into(), Compression::Stored);
let result = writer.write_entry_whole(entry, &[]).await;
assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles))));
}
/// Tests that when force_no_zip64 is true, EntryStreamWriter errors when trying to write more than
/// u16::MAX files to a single archive.
#[tokio::test]
async fn test_force_no_zip64_errors_with_too_many_files_stream() {
let mut sink = AsyncSink;
let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64();
for i in 0..u16::MAX {
let entry = ZipEntryBuilder::new(format!("{i}").into(), Compression::Stored);
let entrywriter = writer.write_entry_stream(entry).await.unwrap();
entrywriter.close().await.unwrap();
}
let entry = ZipEntryBuilder::new("65537".to_string().into(), Compression::Stored);
let entrywriter = writer.write_entry_stream(entry).await.unwrap();
let result = entrywriter.close().await;
assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::TooManyFiles))));
}
/// Tests that when force_no_zip64 is true, EntryStreamWriter errors when trying to write
/// a file larger than ~4 GiB to an archive.
#[tokio::test]
async fn test_force_no_zip64_errors_with_too_large_file_stream() {
let mut sink = AsyncSink;
let mut writer = ZipFileWriter::new(&mut sink).force_no_zip64();
let entry = ZipEntryBuilder::new("-".to_string().into(), Compression::Stored);
let mut entrywriter = writer.write_entry_stream(entry).await.unwrap();
// Writing 4GB, 1kb at a time
for _ in 0..NUM_BATCHES {
entrywriter.write_all(&[0; BATCH_SIZE]).await.unwrap();
}
let result = entrywriter.close().await;
assert!(matches!(result, Err(ZipError::Zip64Needed(Zip64ErrorCase::LargeFile))));
}

View file

@ -0,0 +1,41 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A set of [`tokio`]-specific type aliases and features.
//!
//! # Usage
//! With the `tokio` feature enabled, types from the [`base`] implementation will implement additional constructors
//! for use with [`tokio`]. These constructors internally implement conversion between the required async IO traits.
//! They are defined as:
//! - [`base::read::seek::ZipFileReader::with_tokio()`]
//! - [`base::read::stream::ZipFileReader::with_tokio()`]
//! - [`base::write::ZipFileWriter::with_tokio()`]
//!
//! As a result of Rust's type inference, we are able to reuse the [`base`] implementation's types with considerable
//! ease. There only exists one caveat with their use; the types returned by these constructors contain a wrapping
//! compatibility type provided by an external crate. These compatibility types cannot be named unless you also pull in
//! the [`tokio_util`] dependency manually. This is why we've provided type aliases within this module so that they can
//! be named without needing to pull in a separate dependency.
#[cfg(doc)]
use crate::base;
#[cfg(doc)]
use tokio;
#[cfg(doc)]
use tokio_util;
pub mod read;
pub mod write {
//! A module which supports writing ZIP files.
#[cfg(doc)]
use crate::base;
use tokio_util::compat::Compat;
/// A [`tokio`]-specific type alias for [`base::write::ZipFileWriter`];
pub type ZipFileWriter<W> = crate::base::write::ZipFileWriter<Compat<W>>;
/// A [`tokio`]-specific type alias for [`base::write::EntryStreamWriter`];
pub type EntryStreamWriter<'a, W> = crate::base::write::EntryStreamWriter<'a, Compat<W>>;
}

View file

@ -0,0 +1,160 @@
// Copyright (c) 2022 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A concurrent ZIP reader which acts over a file system path.
//!
//! Concurrency is achieved as a result of:
//! - Wrapping the provided path within an [`Arc`] to allow shared ownership.
//! - Constructing a new [`File`] from the path when reading.
//!
//! ### Usage
//! Unlike the [`seek`] module, we no longer hold a mutable reference to any inner reader which in turn, allows the
//! construction of concurrent [`ZipEntryReader`]s. Though, note that each individual [`ZipEntryReader`] cannot be sent
//! between thread boundaries due to the masked lifetime requirement. Therefore, the overarching [`ZipFileReader`]
//! should be cloned and moved into those contexts when needed.
//!
//! ### Concurrent Example
//! ```no_run
//! # use async_zip::tokio::read::fs::ZipFileReader;
//! # use async_zip::error::Result;
//! # use futures_lite::io::AsyncReadExt;
//! #
//! async fn run() -> Result<()> {
//! let reader = ZipFileReader::new("./foo.zip").await?;
//! let result = tokio::join!(read(&reader, 0), read(&reader, 1));
//!
//! let data_0 = result.0?;
//! let data_1 = result.1?;
//!
//! // Use data within current scope.
//!
//! Ok(())
//! }
//!
//! async fn read(reader: &ZipFileReader, index: usize) -> Result<Vec<u8>> {
//! let mut entry = reader.reader_without_entry(index).await?;
//! let mut data = Vec::new();
//! entry.read_to_end(&mut data).await?;
//! Ok(data)
//! }
//! ```
//!
//! ### Parallel Example
//! ```no_run
//! # use async_zip::tokio::read::fs::ZipFileReader;
//! # use async_zip::error::Result;
//! # use futures_lite::io::AsyncReadExt;
//! #
//! async fn run() -> Result<()> {
//! let reader = ZipFileReader::new("./foo.zip").await?;
//!
//! let handle_0 = tokio::spawn(read(reader.clone(), 0));
//! let handle_1 = tokio::spawn(read(reader.clone(), 1));
//!
//! let data_0 = handle_0.await.expect("thread panicked")?;
//! let data_1 = handle_1.await.expect("thread panicked")?;
//!
//! // Use data within current scope.
//!
//! Ok(())
//! }
//!
//! async fn read(reader: ZipFileReader, index: usize) -> Result<Vec<u8>> {
//! let mut entry = reader.reader_without_entry(index).await?;
//! let mut data = Vec::new();
//! entry.read_to_end(&mut data).await?;
//! Ok(data)
//! }
//! ```
#[cfg(doc)]
use crate::base::read::seek;
use crate::base::read::io::entry::{WithEntry, WithoutEntry, ZipEntryReader};
use crate::error::{Result, ZipError};
use crate::file::ZipFile;
use std::path::{Path, PathBuf};
use std::sync::Arc;
use tokio::fs::File;
use tokio::io::BufReader;
use tokio_util::compat::{Compat, TokioAsyncReadCompatExt};
struct Inner {
path: PathBuf,
file: ZipFile,
}
/// A concurrent ZIP reader which acts over a file system path.
#[derive(Clone)]
pub struct ZipFileReader {
inner: Arc<Inner>,
}
impl ZipFileReader {
/// Constructs a new ZIP reader from a file system path.
pub async fn new<P>(path: P) -> Result<ZipFileReader>
where
P: AsRef<Path>,
{
let file = crate::base::read::file(File::open(&path).await?.compat()).await?;
Ok(ZipFileReader::from_raw_parts(path, file))
}
/// Constructs a ZIP reader from a file system path and ZIP file information derived from that path.
///
/// Providing a [`ZipFile`] that wasn't derived from that path may lead to inaccurate parsing.
pub fn from_raw_parts<P>(path: P, file: ZipFile) -> ZipFileReader
where
P: AsRef<Path>,
{
ZipFileReader { inner: Arc::new(Inner { path: path.as_ref().to_owned(), file }) }
}
/// Returns this ZIP file's information.
pub fn file(&self) -> &ZipFile {
&self.inner.file
}
/// Returns the file system path provided to the reader during construction.
pub fn path(&self) -> &Path {
&self.inner.path
}
/// Returns a new entry reader if the provided index is valid.
pub async fn reader_without_entry(
&self,
index: usize,
) -> Result<ZipEntryReader<'static, Compat<BufReader<File>>, WithoutEntry>> {
let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;
let mut fs_file = BufReader::new(File::open(&self.inner.path).await?).compat();
stored_entry.seek_to_data_offset(&mut fs_file).await?;
Ok(ZipEntryReader::new_with_owned(
fs_file,
stored_entry.entry.compression(),
stored_entry.entry.compressed_size(),
))
}
/// Returns a new entry reader if the provided index is valid.
pub async fn reader_with_entry(
&self,
index: usize,
) -> Result<ZipEntryReader<'_, Compat<BufReader<File>>, WithEntry<'_>>> {
let stored_entry = self.inner.file.entries.get(index).ok_or(ZipError::EntryIndexOutOfBounds)?;
let mut fs_file = BufReader::new(File::open(&self.inner.path).await?).compat();
stored_entry.seek_to_data_offset(&mut fs_file).await?;
let reader = ZipEntryReader::new_with_owned(
fs_file,
stored_entry.entry.compression(),
stored_entry.entry.compressed_size(),
);
Ok(reader.into_with_entry(stored_entry))
}
}

View file

@ -0,0 +1,44 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
//! A module which supports reading ZIP files.
use tokio_util::compat::Compat;
#[cfg(feature = "tokio-fs")]
pub mod fs;
#[cfg(doc)]
use crate::base;
#[cfg(doc)]
use tokio;
/// A [`tokio`]-specific type alias for [`base::read::ZipEntryReader`];
pub type ZipEntryReader<'a, R, E> = crate::base::read::ZipEntryReader<'a, Compat<R>, E>;
pub mod seek {
//! A ZIP reader which acts over a seekable source.
use tokio_util::compat::Compat;
#[cfg(doc)]
use crate::base;
#[cfg(doc)]
use tokio;
/// A [`tokio`]-specific type alias for [`base::read::seek::ZipFileReader`];
pub type ZipFileReader<R> = crate::base::read::seek::ZipFileReader<Compat<R>>;
}
pub mod stream {
//! A ZIP reader which acts over a non-seekable source.
#[cfg(doc)]
use crate::base;
#[cfg(doc)]
use tokio;
use tokio_util::compat::Compat;
/// A [`tokio`]-specific type alias for [`base::read::stream::Reading`];
pub type Reading<'a, R, E> = crate::base::read::stream::Reading<'a, Compat<R>, E>;
/// A [`tokio`]-specific type alias for [`base::read::stream::Ready`];
pub type Ready<R> = crate::base::read::stream::Ready<Compat<R>>;
}

View file

@ -0,0 +1,18 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use crate::error::{Result, ZipError};
use futures_lite::io::{AsyncRead, AsyncReadExt};
// Assert that the next four-byte signature read by a reader which impls AsyncRead matches the expected signature.
pub(crate) async fn assert_signature<R: AsyncRead + Unpin>(reader: &mut R, expected: u32) -> Result<()> {
let signature = {
let mut buffer = [0; 4];
reader.read_exact(&mut buffer).await?;
u32::from_le_bytes(buffer)
};
match signature {
actual if actual == expected => Ok(()),
actual => Err(ZipError::UnexpectedHeaderError(actual, expected)),
}
}

View file

@ -0,0 +1,99 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
#![allow(dead_code)]
use async_zip::base::read::mem;
use async_zip::base::read::seek;
use async_zip::base::write::ZipFileWriter;
use async_zip::Compression;
use async_zip::ZipEntryBuilder;
use futures_lite::io::AsyncWriteExt;
use tokio::fs::File;
use tokio::io::BufReader;
use tokio_util::compat::TokioAsyncReadCompatExt;
const FOLDER_PREFIX: &str = "tests/test_inputs";
const FILE_LIST: &[&str] = &[
"sample_data/alpha/back_to_front.txt",
"sample_data/alpha/front_to_back.txt",
"sample_data/numeric/forward.txt",
"sample_data/numeric/reverse.txt",
];
pub async fn compress_to_mem(compress: Compression) -> Vec<u8> {
let mut bytes = Vec::with_capacity(10_000);
let mut writer = ZipFileWriter::new(&mut bytes);
for fname in FILE_LIST {
let content = tokio::fs::read(format!("{FOLDER_PREFIX}/{fname}")).await.unwrap();
let opts = ZipEntryBuilder::new(fname.to_string().into(), compress);
let mut entry_writer = writer.write_entry_stream(opts).await.unwrap();
entry_writer.write_all(&content).await.unwrap();
entry_writer.close().await.unwrap();
}
writer.close().await.unwrap();
bytes
}
#[cfg(feature = "tokio-fs")]
pub async fn check_decompress_fs(fname: &str) {
use async_zip::tokio::read::fs;
let zip = fs::ZipFileReader::new(fname).await.unwrap();
let zip_entries: Vec<_> = zip.file().entries().to_vec();
for (idx, entry) in zip_entries.into_iter().enumerate() {
// TODO: resolve unwrap usage
if entry.dir().unwrap() {
continue;
}
// TODO: resolve unwrap usage
let fname = entry.filename().as_str().unwrap();
let mut output = String::new();
let mut reader = zip.reader_with_entry(idx).await.unwrap();
let _ = reader.read_to_string_checked(&mut output).await.unwrap();
let fs_file = format!("{FOLDER_PREFIX}/{fname}");
let expected = tokio::fs::read_to_string(fs_file).await.unwrap();
assert_eq!(output, expected, "for {fname}, expect zip data to match file data");
}
}
pub async fn check_decompress_seek(fname: &str) {
let file = BufReader::new(File::open(fname).await.unwrap());
let mut file_compat = file.compat();
let mut zip = seek::ZipFileReader::new(&mut file_compat).await.unwrap();
let zip_entries: Vec<_> = zip.file().entries().to_vec();
for (idx, entry) in zip_entries.into_iter().enumerate() {
// TODO: resolve unwrap usage
if entry.dir().unwrap() {
continue;
}
// TODO: resolve unwrap usage
let fname = entry.filename().as_str().unwrap();
let mut output = String::new();
let mut reader = zip.reader_with_entry(idx).await.unwrap();
let _ = reader.read_to_string_checked(&mut output).await.unwrap();
let fs_file = format!("tests/test_inputs/{fname}");
let expected = tokio::fs::read_to_string(fs_file).await.unwrap();
assert_eq!(output, expected, "for {fname}, expect zip data to match file data");
}
}
pub async fn check_decompress_mem(zip_data: Vec<u8>) {
let zip = mem::ZipFileReader::new(zip_data).await.unwrap();
let zip_entries: Vec<_> = zip.file().entries().to_vec();
for (idx, entry) in zip_entries.into_iter().enumerate() {
// TODO: resolve unwrap usage
if entry.dir().unwrap() {
continue;
}
// TODO: resolve unwrap usage
let fname = entry.filename().as_str().unwrap();
let mut output = String::new();
let mut reader = zip.reader_with_entry(idx).await.unwrap();
let _ = reader.read_to_string_checked(&mut output).await.unwrap();
let fs_file = format!("{FOLDER_PREFIX}/{fname}");
let expected = tokio::fs::read_to_string(fs_file).await.unwrap();
assert_eq!(output, expected, "for {fname}, expect zip data to match file data");
}
}

View file

@ -0,0 +1,81 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
use async_zip::{Compression, ZipEntryBuilder, ZipString};
use futures_lite::AsyncWriteExt;
mod common;
#[cfg(feature = "zstd")]
#[tokio::test]
async fn zip_zstd_in_out() {
let zip_data = common::compress_to_mem(Compression::Zstd).await;
common::check_decompress_mem(zip_data).await
}
#[cfg(feature = "deflate")]
#[tokio::test]
async fn zip_decompress_in_out() {
let zip_data = common::compress_to_mem(Compression::Deflate).await;
common::check_decompress_mem(zip_data).await
}
#[tokio::test]
async fn zip_store_in_out() {
let zip_data = common::compress_to_mem(Compression::Stored).await;
common::check_decompress_mem(zip_data).await
}
#[tokio::test]
async fn zip_utf8_extra_in_out_stream() {
let mut zip_bytes = Vec::with_capacity(10_000);
{
// writing
let content = "Test".as_bytes();
let mut writer = async_zip::base::write::ZipFileWriter::new(&mut zip_bytes);
let filename =
ZipString::new_with_alternative("\u{4E2D}\u{6587}.txt".to_string(), b"\xD6\xD0\xCe\xC4.txt".to_vec());
let opts = ZipEntryBuilder::new(filename, Compression::Stored);
let mut entry_writer = writer.write_entry_stream(opts).await.unwrap();
entry_writer.write_all(content).await.unwrap();
entry_writer.close().await.unwrap();
writer.close().await.unwrap();
}
{
// reading
let zip = async_zip::base::read::mem::ZipFileReader::new(zip_bytes).await.unwrap();
let zip_entries: Vec<_> = zip.file().entries().to_vec();
assert_eq!(zip_entries.len(), 1);
assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt");
assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref()));
}
}
#[tokio::test]
async fn zip_utf8_extra_in_out_whole() {
let mut zip_bytes = Vec::with_capacity(10_000);
{
// writing
let content = "Test".as_bytes();
let mut writer = async_zip::base::write::ZipFileWriter::new(&mut zip_bytes);
let filename =
ZipString::new_with_alternative("\u{4E2D}\u{6587}.txt".to_string(), b"\xD6\xD0\xCe\xC4.txt".to_vec());
let opts = ZipEntryBuilder::new(filename, Compression::Stored);
writer.write_entry_whole(opts, content).await.unwrap();
writer.close().await.unwrap();
}
{
// reading
let zip = async_zip::base::read::mem::ZipFileReader::new(zip_bytes).await.unwrap();
let zip_entries: Vec<_> = zip.file().entries().to_vec();
assert_eq!(zip_entries.len(), 1);
assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt");
assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref()));
}
}

View file

@ -0,0 +1,89 @@
// Copyright (c) 2023 Harry [Majored] [hello@majored.pw]
// MIT License (https://github.com/Majored/rs-async-zip/blob/main/LICENSE)
#![allow(dead_code)]
use tokio::io::BufReader;
use tokio_util::compat::TokioAsyncReadCompatExt;
mod common;
const ZSTD_ZIP_FILE: &str = "tests/test_inputs/sample_data.zstd.zip";
const DEFLATE_ZIP_FILE: &str = "tests/test_inputs/sample_data.deflate.zip";
const STORE_ZIP_FILE: &str = "tests/test_inputs/sample_data.store.zip";
const UTF8_EXTRA_ZIP_FILE: &str = "tests/test_inputs/sample_data_utf8_extra.zip";
#[cfg(feature = "zstd")]
#[tokio::test]
async fn decompress_zstd_zip_seek() {
common::check_decompress_seek(ZSTD_ZIP_FILE).await
}
#[cfg(feature = "deflate")]
#[tokio::test]
async fn decompress_deflate_zip_seek() {
common::check_decompress_seek(DEFLATE_ZIP_FILE).await
}
#[tokio::test]
async fn check_empty_zip_seek() {
let mut data: Vec<u8> = Vec::new();
async_zip::base::write::ZipFileWriter::new(futures_lite::io::Cursor::new(&mut data)).close().await.unwrap();
async_zip::base::read::seek::ZipFileReader::new(futures_lite::io::Cursor::new(&data)).await.unwrap();
}
#[tokio::test]
async fn decompress_store_zip_seek() {
common::check_decompress_seek(STORE_ZIP_FILE).await
}
#[cfg(feature = "zstd")]
#[tokio::test]
async fn decompress_zstd_zip_mem() {
let content = tokio::fs::read(ZSTD_ZIP_FILE).await.unwrap();
common::check_decompress_mem(content).await
}
#[cfg(feature = "deflate")]
#[tokio::test]
async fn decompress_deflate_zip_mem() {
let content = tokio::fs::read(DEFLATE_ZIP_FILE).await.unwrap();
common::check_decompress_mem(content).await
}
#[tokio::test]
async fn decompress_store_zip_mem() {
let content = tokio::fs::read(STORE_ZIP_FILE).await.unwrap();
common::check_decompress_mem(content).await
}
#[cfg(feature = "zstd")]
#[cfg(feature = "tokio-fs")]
#[tokio::test]
async fn decompress_zstd_zip_fs() {
common::check_decompress_fs(ZSTD_ZIP_FILE).await
}
#[cfg(feature = "deflate")]
#[cfg(feature = "tokio-fs")]
#[tokio::test]
async fn decompress_deflate_zip_fs() {
common::check_decompress_fs(DEFLATE_ZIP_FILE).await
}
#[cfg(feature = "tokio-fs")]
#[tokio::test]
async fn decompress_store_zip_fs() {
common::check_decompress_fs(STORE_ZIP_FILE).await
}
#[tokio::test]
async fn decompress_zip_with_utf8_extra() {
let file = BufReader::new(tokio::fs::File::open(UTF8_EXTRA_ZIP_FILE).await.unwrap());
let mut file_compat = file.compat();
let zip = async_zip::base::read::seek::ZipFileReader::new(&mut file_compat).await.unwrap();
let zip_entries: Vec<_> = zip.file().entries().to_vec();
assert_eq!(zip_entries.len(), 1);
assert_eq!(zip_entries[0].header_size(), 93);
assert_eq!(zip_entries[0].filename().as_str().unwrap(), "\u{4E2D}\u{6587}.txt");
assert_eq!(zip_entries[0].filename().alternative(), Some(b"\xD6\xD0\xCe\xC4.txt".as_ref()));
}

View file

@ -0,0 +1,4 @@
Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a
Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a
Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a
Z,z,Y,y,X,x,W,w,V,v,U,u,T,t,S,s,R,r,Q,q,P,p,O,o,N,n,M,m,L,l,K,k,J,j,I,I,H,h,G,g,F,f,E,e,D,d,C,c,B,b,A,a

View file

@ -0,0 +1,4 @@
A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z
A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z
A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z
A,a,B,b,C,c,D,d,E,e,F,f,G,g,H,h,I,I,J,j,K,k,L,l,M,m,N,n,O,o,P,p,Q,q,R,r,S,s,T,t,U,u,V,v,W,w,X,x,Y,y,Z,z

View file

@ -0,0 +1 @@
1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32

View file

@ -0,0 +1 @@
32,31,30,29,28,27,26,25,24,23,22,21,20,19,18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2,1

21
crates/envy/Cargo.toml Normal file
View file

@ -0,0 +1,21 @@
[package]
name = "envy"
version = "0.4.2"
authors = ["softprops <d.tangren@gmail.com>"]
description = "deserialize env vars into typesafe structs"
documentation = "https://softprops.github.io/envy"
homepage = "https://github.com/softprops/envy"
repository = "https://github.com/softprops/envy"
keywords = ["serde", "env"]
license = "MIT"
readme = "README.md"
edition = "2021"
categories = [
"config"
]
[dependencies]
serde = "1.0"
[dev-dependencies]
serde = { version = "1.0", features = ["derive"] }

55
crates/envy/src/error.rs Normal file
View file

@ -0,0 +1,55 @@
//! Error types
use serde::de::Error as SerdeError;
use std::{error::Error as StdError, fmt};
/// Types of errors that may result from failed attempts
/// to deserialize a type from env vars
#[derive(Debug, Clone, PartialEq)]
pub enum Error {
MissingValue(String),
Custom(String),
}
impl StdError for Error {}
impl fmt::Display for Error {
fn fmt(&self, fmt: &mut fmt::Formatter) -> fmt::Result {
match self {
Error::MissingValue(field) => write!(fmt, "missing value for {}", &field),
Error::Custom(ref msg) => write!(fmt, "{}", msg),
}
}
}
impl SerdeError for Error {
fn custom<T: fmt::Display>(msg: T) -> Self {
Error::Custom(format!("{}", msg))
}
fn missing_field(field: &'static str) -> Error {
Error::MissingValue(field.into())
}
}
#[cfg(test)]
mod tests {
use super::*;
fn impl_std_error<E: StdError>(_: E) {}
#[test]
fn error_impl_std_error() {
impl_std_error(Error::MissingValue("FOO_BAR".into()));
impl_std_error(Error::Custom("whoops".into()))
}
#[test]
fn error_display() {
assert_eq!(
format!("{}", Error::MissingValue("FOO_BAR".into())),
"missing value for FOO_BAR"
);
assert_eq!(format!("{}", Error::Custom("whoops".into())), "whoops")
}
}

560
crates/envy/src/lib.rs Normal file
View file

@ -0,0 +1,560 @@
//! Envy is a library for deserializing environment variables into typesafe structs
//!
//! # Examples
//!
//! A typical usecase for envy is deserializing configuration store in an process' environment into a struct
//! whose fields map to the names of env vars.
//!
//! Serde makes it easy to provide a deserializable struct with its [deriveable Deserialize](https://serde.rs/derive.html)
//! procedural macro.
//!
//! Simply ask for an instance of that struct from envy's `from_env` function.
//!
//! ```no_run
//! use serde::Deserialize;
//!
//! #[derive(Deserialize, Debug)]
//! struct Config {
//! foo: u16,
//! bar: bool,
//! baz: String,
//! boom: Option<u64>,
//! }
//!
//! match envy::from_env::<Config>() {
//! Ok(config) => println!("{:#?}", config),
//! Err(error) => eprintln!("{:#?}", error),
//! }
//! ```
//!
//! Special treatment is given to collections. For config fields that store a `Vec` of values,
//! use an env var that uses a comma separated value.
//!
//! All serde modifiers should work as is.
//!
//! Enums with unit variants can be used as values:
//!
//! ```no_run
//! # use serde::Deserialize;
//!
//! #[derive(Deserialize, Debug, PartialEq)]
//! #[serde(rename_all = "lowercase")]
//! pub enum Size {
//! Small,
//! Medium,
//! Large,
//! }
//!
//! #[derive(Deserialize, Debug)]
//! struct Config {
//! size: Size,
//! }
//!
//! // set env var for size as `SIZE=medium`
//! match envy::from_env::<Config>() {
//! Ok(config) => println!("{:#?}", config),
//! Err(error) => eprintln!("{:#?}", error),
//! }
//! ```
use serde::de::{
self,
value::{MapDeserializer, SeqDeserializer},
IntoDeserializer,
};
use std::{
borrow::Cow,
env,
iter::{empty, IntoIterator},
};
// Ours
mod error;
pub use crate::error::Error;
/// A type result type specific to `envy::Errors`
pub type Result<T> = std::result::Result<T, Error>;
struct Vars<Iter>(Iter)
where
Iter: IntoIterator<Item = (String, String)>;
struct Val(String, String);
impl<'de> IntoDeserializer<'de, Error> for Val {
type Deserializer = Self;
fn into_deserializer(self) -> Self::Deserializer {
self
}
}
struct VarName(String);
impl<'de> IntoDeserializer<'de, Error> for VarName {
type Deserializer = Self;
fn into_deserializer(self) -> Self::Deserializer {
self
}
}
impl<Iter: Iterator<Item = (String, String)>> Iterator for Vars<Iter> {
type Item = (VarName, Val);
fn next(&mut self) -> Option<Self::Item> {
self.0
.next()
.map(|(k, v)| (VarName(k.to_lowercase()), Val(k, v)))
}
}
macro_rules! forward_parsed_values {
($($ty:ident => $method:ident,)*) => {
$(
fn $method<V>(self, visitor: V) -> Result<V::Value>
where V: de::Visitor<'de>
{
match self.1.parse::<$ty>() {
Ok(val) => val.into_deserializer().$method(visitor),
Err(e) => Err(de::Error::custom(format_args!("{} while parsing value '{}' provided by {}", e, self.1, self.0)))
}
}
)*
}
}
impl<'de> de::Deserializer<'de> for Val {
type Error = Error;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
self.1.into_deserializer().deserialize_any(visitor)
}
fn deserialize_seq<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
// std::str::split doesn't work as expected for our use case: when we
// get an empty string we want to produce an empty Vec, but split would
// still yield an iterator with an empty string in it. So we need to
// special case empty strings.
if self.1.is_empty() {
SeqDeserializer::new(empty::<Val>()).deserialize_seq(visitor)
} else {
let values = self
.1
.split(',')
.map(|v| Val(self.0.clone(), v.trim().to_owned()));
SeqDeserializer::new(values).deserialize_seq(visitor)
}
}
fn deserialize_option<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
if self.1.is_empty() {
visitor.visit_none()
} else {
visitor.visit_some(self)
}
}
forward_parsed_values! {
u8 => deserialize_u8,
u16 => deserialize_u16,
u32 => deserialize_u32,
u64 => deserialize_u64,
u128 => deserialize_u128,
i8 => deserialize_i8,
i16 => deserialize_i16,
i32 => deserialize_i32,
i64 => deserialize_i64,
i128 => deserialize_i128,
f32 => deserialize_f32,
f64 => deserialize_f64,
}
fn deserialize_bool<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
if self.1 == "1" || self.1.eq_ignore_ascii_case("true") {
visitor.visit_bool(true)
} else if self.1 == "0" || self.0.eq_ignore_ascii_case("false") {
visitor.visit_bool(false)
} else {
Err(de::Error::custom(format_args!(
"error parsing boolean value: '{}'",
self.1
)))
}
}
#[inline]
fn deserialize_newtype_struct<V>(self, _: &'static str, visitor: V) -> Result<V::Value>
where
V: serde::de::Visitor<'de>,
{
visitor.visit_newtype_struct(self)
}
fn deserialize_enum<V>(
self,
_name: &'static str,
_variants: &'static [&'static str],
visitor: V,
) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
visitor.visit_enum(self.1.into_deserializer())
}
serde::forward_to_deserialize_any! {
char str string unit
bytes byte_buf map unit_struct tuple_struct
identifier tuple ignored_any
struct
}
}
impl<'de> de::Deserializer<'de> for VarName {
type Error = Error;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
self.0.into_deserializer().deserialize_any(visitor)
}
#[inline]
fn deserialize_newtype_struct<V>(self, _: &'static str, visitor: V) -> Result<V::Value>
where
V: serde::de::Visitor<'de>,
{
visitor.visit_newtype_struct(self)
}
serde::forward_to_deserialize_any! {
char str string unit seq option
bytes byte_buf map unit_struct tuple_struct
identifier tuple ignored_any enum
struct bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64
}
}
/// A deserializer for env vars
struct Deserializer<'de, Iter: Iterator<Item = (String, String)>> {
inner: MapDeserializer<'de, Vars<Iter>, Error>,
}
impl<'de, Iter: Iterator<Item = (String, String)>> Deserializer<'de, Iter> {
fn new(vars: Iter) -> Self {
Deserializer {
inner: MapDeserializer::new(Vars(vars)),
}
}
}
impl<'de, Iter: Iterator<Item = (String, String)>> de::Deserializer<'de>
for Deserializer<'de, Iter>
{
type Error = Error;
fn deserialize_any<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
self.deserialize_map(visitor)
}
fn deserialize_map<V>(self, visitor: V) -> Result<V::Value>
where
V: de::Visitor<'de>,
{
visitor.visit_map(self.inner)
}
serde::forward_to_deserialize_any! {
bool u8 u16 u32 u64 i8 i16 i32 i64 f32 f64 char str string unit seq
bytes byte_buf unit_struct tuple_struct
identifier tuple ignored_any option newtype_struct enum
struct
}
}
/// Deserializes a type based on information stored in env variables
pub fn from_env<T>() -> Result<T>
where
T: de::DeserializeOwned,
{
from_iter(env::vars())
}
/// Deserializes a type based on an iterable of `(String, String)`
/// representing keys and values
pub fn from_iter<Iter, T>(iter: Iter) -> Result<T>
where
T: de::DeserializeOwned,
Iter: IntoIterator<Item = (String, String)>,
{
T::deserialize(Deserializer::new(iter.into_iter())).map_err(|error| match error {
Error::MissingValue(value) => Error::MissingValue(value.to_uppercase()),
_ => error,
})
}
/// A type which filters env vars with a prefix for use as serde field inputs
///
/// These types are created with with the [prefixed](fn.prefixed.html) module function
pub struct Prefixed<'a>(Cow<'a, str>);
impl<'a> Prefixed<'a> {
/// Deserializes a type based on prefixed env variables
pub fn from_env<T>(&self) -> Result<T>
where
T: de::DeserializeOwned,
{
self.from_iter(env::vars())
}
/// Deserializes a type based on prefixed (String, String) tuples
pub fn from_iter<Iter, T>(&self, iter: Iter) -> Result<T>
where
T: de::DeserializeOwned,
Iter: IntoIterator<Item = (String, String)>,
{
crate::from_iter(iter.into_iter().filter_map(|(k, v)| {
if k.starts_with(self.0.as_ref()) {
Some((k.trim_start_matches(self.0.as_ref()).to_owned(), v))
} else {
None
}
}))
.map_err(|error| match error {
Error::MissingValue(value) => Error::MissingValue(
format!("{prefix}{value}", prefix = self.0, value = value).to_uppercase(),
),
_ => error,
})
}
}
/// Produces a instance of `Prefixed` for prefixing env variable names
///
/// # Example
///
/// ```no_run
/// use serde::Deserialize;
///
/// #[derive(Deserialize, Debug)]
/// struct Config {
/// foo: u16,
/// bar: bool,
/// baz: String,
/// boom: Option<u64>,
/// }
///
/// // all env variables will be expected to be prefixed with APP_
/// // i.e. APP_FOO, APP_BAR, ect
/// match envy::prefixed("APP_").from_env::<Config>() {
/// Ok(config) => println!("{:#?}", config),
/// Err(error) => eprintln!("{:#?}", error),
/// }
/// ```
pub fn prefixed<'a, C>(prefix: C) -> Prefixed<'a>
where
C: Into<Cow<'a, str>>,
{
Prefixed(prefix.into())
}
#[cfg(test)]
mod tests {
use super::*;
use serde::Deserialize;
use std::collections::HashMap;
#[derive(Default, Deserialize, Debug, PartialEq)]
#[serde(rename_all = "lowercase")]
pub enum Size {
Small,
#[default]
Medium,
Large,
}
pub fn default_kaboom() -> u16 {
8080
}
#[derive(Deserialize, Debug, PartialEq)]
pub struct CustomNewType(u32);
#[derive(Deserialize, Debug, PartialEq)]
pub struct Foo {
bar: String,
baz: bool,
zoom: Option<u16>,
doom: Vec<u64>,
boom: Vec<String>,
#[serde(default = "default_kaboom")]
kaboom: u16,
#[serde(default)]
debug_mode: bool,
#[serde(default)]
size: Size,
provided: Option<String>,
newtype: CustomNewType,
}
#[test]
fn deserialize_from_iter() {
let data = vec![
(String::from("BAR"), String::from("test")),
(String::from("BAZ"), String::from("true")),
(String::from("DOOM"), String::from("1, 2, 3 ")),
// Empty string should result in empty vector.
(String::from("BOOM"), String::from("")),
(String::from("SIZE"), String::from("small")),
(String::from("PROVIDED"), String::from("test")),
(String::from("NEWTYPE"), String::from("42")),
];
match from_iter::<_, Foo>(data) {
Ok(actual) => assert_eq!(
actual,
Foo {
bar: String::from("test"),
baz: true,
zoom: None,
doom: vec![1, 2, 3],
boom: vec![],
kaboom: 8080,
debug_mode: false,
size: Size::Small,
provided: Some(String::from("test")),
newtype: CustomNewType(42)
}
),
Err(e) => panic!("{:#?}", e),
}
}
#[test]
fn fails_with_missing_value() {
let data = vec![
(String::from("BAR"), String::from("test")),
(String::from("BAZ"), String::from("true")),
];
match from_iter::<_, Foo>(data) {
Ok(_) => panic!("expected failure"),
Err(e) => assert_eq!(e, Error::MissingValue("DOOM".into())),
}
}
#[test]
fn prefixed_fails_with_missing_value() {
let data = vec![
(String::from("PREFIX_BAR"), String::from("test")),
(String::from("PREFIX_BAZ"), String::from("true")),
];
match prefixed("PREFIX_").from_iter::<_, Foo>(data) {
Ok(_) => panic!("expected failure"),
Err(e) => assert_eq!(e, Error::MissingValue("PREFIX_DOOM".into())),
}
}
#[test]
fn fails_with_invalid_type() {
let data = vec![
(String::from("BAR"), String::from("test")),
(String::from("BAZ"), String::from("notabool")),
(String::from("DOOM"), String::from("1,2,3")),
];
match from_iter::<_, Foo>(data) {
Ok(_) => panic!("expected failure"),
Err(e) => assert_eq!(
e,
Error::Custom(String::from("provided string was not `true` or `false` while parsing value \'notabool\' provided by BAZ"))
),
}
}
#[test]
fn deserializes_from_prefixed_fieldnames() {
let data = vec![
(String::from("APP_BAR"), String::from("test")),
(String::from("APP_BAZ"), String::from("true")),
(String::from("APP_DOOM"), String::from("")),
(String::from("APP_BOOM"), String::from("4,5")),
(String::from("APP_SIZE"), String::from("small")),
(String::from("APP_PROVIDED"), String::from("test")),
(String::from("APP_NEWTYPE"), String::from("42")),
];
match prefixed("APP_").from_iter::<_, Foo>(data) {
Ok(actual) => assert_eq!(
actual,
Foo {
bar: String::from("test"),
baz: true,
zoom: None,
doom: vec![],
boom: vec!["4".to_string(), "5".to_string()],
kaboom: 8080,
debug_mode: false,
size: Size::Small,
provided: Some(String::from("test")),
newtype: CustomNewType(42)
}
),
Err(e) => panic!("{:#?}", e),
}
}
#[test]
fn prefixed_strips_prefixes() {
let mut expected = HashMap::new();
expected.insert("foo".to_string(), "bar".to_string());
assert_eq!(
prefixed("PRE_").from_iter(vec![("PRE_FOO".to_string(), "bar".to_string())]),
Ok(expected)
);
}
#[test]
fn prefixed_doesnt_parse_non_prefixed() {
let mut expected = HashMap::new();
expected.insert("foo".to_string(), 12);
assert_eq!(
prefixed("PRE_").from_iter(vec![
("FOO".to_string(), "asd".to_string()),
("PRE_FOO".to_string(), "12".to_string())
]),
Ok(expected)
);
}
#[test]
fn deserialize_optional() {
#[derive(Deserialize)]
#[serde(default)]
struct X {
val: Option<u32>,
}
impl Default for X {
fn default() -> Self {
Self { val: Some(123) }
}
}
let data = vec![(String::from("VAL"), String::from(""))];
let res = from_iter::<_, X>(data).unwrap();
assert_eq!(res.val, None)
}
}

1
resources/icon.opt.svg Normal file
View file

@ -0,0 +1 @@
<svg xmlns="http://www.w3.org/2000/svg" width="50" height="50" viewBox="0 0 13.229 13.229"><g aria-label="AV" style="font-size:10.5833px;line-height:1.25;stroke-width:.264583"><path d="m12.381 2.878-2.698 7.557H8.73L6.031 2.878h.995L8.73 7.725q.17.466.286.879.116.402.19.772.074-.37.19-.783.117-.413.287-.889l1.693-4.826Z" style="fill:#888;fill-opacity:1"/><path d="m1.158 10.435 2.699-7.557h.952l2.699 7.557h-.995L4.81 5.588q-.169-.466-.285-.879-.117-.402-.19-.772-.075.37-.191.783-.117.412-.286.889l-1.694 4.826Z" style="font-size:10.5833px;line-height:1.25;fill:#ddd;fill-opacity:1;stroke-width:.264583"/></g></svg>

After

Width:  |  Height:  |  Size: 619 B

56
resources/icon.svg Normal file
View file

@ -0,0 +1,56 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<!-- Created with Inkscape (http://www.inkscape.org/) -->
<svg
width="50"
height="50"
viewBox="0 0 13.229166 13.229167"
version="1.1"
id="svg5"
inkscape:version="1.2.2 (b0a8486541, 2022-12-01)"
sodipodi:docname="logo.svg"
xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape"
xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<sodipodi:namedview
id="namedview7"
pagecolor="#000000"
bordercolor="#666666"
borderopacity="1.0"
inkscape:showpageshadow="2"
inkscape:pageopacity="0"
inkscape:pagecheckerboard="0"
inkscape:deskcolor="#d1d1d1"
inkscape:document-units="mm"
showgrid="false"
inkscape:zoom="10.982338"
inkscape:cx="3.8243224"
inkscape:cy="29.046639"
inkscape:window-width="2516"
inkscape:window-height="1051"
inkscape:window-x="0"
inkscape:window-y="0"
inkscape:window-maximized="1"
inkscape:current-layer="text236" />
<defs
id="defs2" />
<g
inkscape:label="Layer 1"
inkscape:groupmode="layer"
id="layer1">
<g
aria-label="AV"
id="text236"
style="font-size:10.5833px;line-height:1.25;stroke-width:0.264583">
<path
d="M 12.381365,2.8782164 9.6826233,10.434692 H 8.7301265 L 6.031385,2.8782164 H 7.0262152 L 8.7301265,7.7253677 Q 8.8994592,8.1910329 9.0158755,8.6037815 9.1322918,9.0059469 9.2063749,9.3763624 9.280458,9.0059469 9.3968743,8.5931982 9.5132903,8.1804496 9.6826233,7.7042011 L 11.375951,2.8782164 Z"
style="fill:#888888;fill-opacity:1"
id="path402" />
<path
d="M 1.1580623,10.434692 3.8568039,2.8782162 H 4.8093007 L 7.5080423,10.434692 H 6.513212 L 4.8093007,5.5875401 Q 4.639968,5.1218752 4.5235518,4.7091272 4.4071354,4.3069612 4.3330523,3.9365462 4.2589693,4.3069612 4.1425529,4.7197102 4.0261369,5.1324582 3.8568039,5.6087071 L 2.1634763,10.434692 Z"
style="font-size:10.5833px;line-height:1.25;fill:#dddddd;fill-opacity:1;stroke-width:0.264583"
id="path402-3" />
</g>
</g>
</svg>

After

Width:  |  Height:  |  Size: 2.2 KiB

410
src/app.rs Normal file
View file

@ -0,0 +1,410 @@
use std::{ops::Bound, path::PathBuf, sync::Arc};
use async_zip::tokio::read::ZipEntryReader;
use axum::{
body::Body,
extract::{Host, Request, State},
http::{Response, Uri},
response::{IntoResponse, Redirect},
routing::{any, get, post},
Form, Json, Router,
};
use headers::HeaderMapExt;
use http::{HeaderMap, StatusCode};
use serde::Deserialize;
use tokio::{
fs::File,
io::{AsyncBufReadExt, AsyncReadExt, BufReader},
};
use tokio_util::{
compat::{FuturesAsyncReadCompatExt, TokioAsyncReadCompatExt},
io::ReaderStream,
};
use tower_http::trace::{DefaultOnResponse, TraceLayer};
use crate::{
artifact_api::{Artifact, ArtifactApi, ArtifactOrRun},
cache::{Cache, CacheEntry, GetEntryResult, GetFileResult, GetFileResultFile, IndexEntry},
config::Config,
error::{Error, Result},
gzip_reader::{PrecompressedGzipReader, GZIP_EXTRA_LEN},
query::Query,
templates::{self, LinkItem},
util::{self, InsertTypedHeader},
App,
};
#[derive(Clone)]
struct AppState {
i: Arc<AppInner>,
}
struct AppInner {
cfg: Config,
cache: Cache,
api: ArtifactApi,
}
impl Default for App {
fn default() -> Self {
Self::new()
}
}
#[derive(Deserialize)]
struct UrlForm {
url: String,
}
impl App {
pub fn new() -> Self {
Self
}
fn new_state(&self) -> AppState {
AppState::new()
}
pub async fn run(&self) -> Result<()> {
let address = "0.0.0.0:3000";
let listener = tokio::net::TcpListener::bind(address).await?;
tracing::info!("Listening on http://{address}");
let router = Router::new()
// Prevent search indexing since artifactview serves temporary artifacts
.route(
"/robots.txt",
get(|| async { "User-agent: *\nDisallow: /\n" }),
)
// Put the API in the .well-known folder, since it is disabled for pages
.route("/.well-known/api/artifacts", get(Self::get_artifacts))
.route("/.well-known/api/artifact", get(Self::get_artifact))
.route("/.well-known/api/files", get(Self::get_files))
// Prevent access to the .well-known folder since it enables abuse
// (e.g. SSL certificate registration by an attacker)
.route("/.well-known/*path", any(|| async { Error::Inaccessible }))
// Serve artifact pages
.route("/", get(Self::get_page))
.route("/", post(Self::post_homepage))
.fallback(get(Self::get_page))
.with_state(self.new_state())
// Log requests
.layer(
TraceLayer::new_for_http()
.make_span_with(|request: &Request<Body>| {
tracing::error_span!("request", url = util::full_url_from_request(request),)
})
.on_response(DefaultOnResponse::new().level(tracing::Level::INFO)),
);
axum::serve(listener, router).await?;
Ok(())
}
async fn get_page(
State(state): State<AppState>,
Host(host): Host,
uri: Uri,
request: Request,
) -> Result<Response<Body>> {
let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?;
if subdomain.is_empty() {
// Main page
if uri.path() != "/" {
return Err(Error::NotFound("path".into()));
}
Ok(Response::builder()
.typed_header(headers::ContentType::html())
.body(templates::Index::default().to_string().into())?)
} else {
let query = Query::from_subdomain(subdomain)?;
let path = percent_encoding::percent_decode_str(uri.path()).decode_utf8_lossy();
let hdrs = request.headers();
let res = state.i.cache.get_entry(&state.i.api, &query).await?;
match res {
GetEntryResult::Entry { entry, zip_path } => {
match entry.get_file(&path, uri.query().unwrap_or_default())? {
GetFileResult::File(res) => {
Self::serve_artifact_file(state, entry, zip_path, res, hdrs).await
}
GetFileResult::Listing(listing) => {
if !path.ends_with('/') {
return Ok(Redirect::to(&format!("{path}/")).into_response());
}
// TODO: store actual artifact names
let artifact_name = format!("A{}", query.artifact.unwrap());
let mut path_components = vec![
LinkItem {
name: query.shortid(),
url: state
.i
.cfg
.url_with_subdomain(&query.subdomain_with_artifact(None)),
},
LinkItem {
name: artifact_name.to_owned(),
url: "/".to_string(),
},
];
let mut buf = String::new();
for s in path.split('/').filter(|s| !s.is_empty()) {
buf.push('/');
buf += s;
path_components.push(LinkItem {
name: s.to_owned(),
url: buf.clone(),
});
}
let tmpl = templates::Listing {
main_url: state.i.cfg.main_url(),
version: templates::Version,
artifact_name: &artifact_name,
path_components,
n_dirs: listing.n_dirs,
n_files: listing.n_files,
has_parent: listing.has_parent,
entries: listing.entries,
};
Ok(Response::builder()
.typed_header(headers::ContentType::html())
.body(tmpl.to_string().into())?)
}
}
}
GetEntryResult::Artifacts(artifacts) => {
if uri.path() != "/" {
return Err(Error::NotFound("path".into()));
}
if artifacts.is_empty() {
return Err(Error::NotFound("artifacts".into()));
}
let tmpl = templates::Selection {
main_url: state.i.cfg.main_url(),
run_url: &query.forge_url(),
run_name: &query.shortid(),
artifacts: artifacts
.into_iter()
.map(|a| LinkItem::from_artifact(a, &query, &state.i.cfg))
.collect(),
};
Ok(Response::builder()
.typed_header(headers::ContentType::html())
.body(tmpl.to_string().into())?)
}
}
}
}
async fn post_homepage(
State(state): State<AppState>,
Host(host): Host,
Form(url): Form<UrlForm>,
) -> Result<Redirect> {
let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?;
if subdomain.is_empty() {
let query = Query::from_forge_url(&url.url)?;
let subdomain = query.subdomain();
let target = format!(
"{}{}.{}",
state.i.cfg.url_proto(),
subdomain,
state.i.cfg.load().root_domain
);
Ok(Redirect::to(&target))
} else {
Err(Error::MethodNotAllowed)
}
}
async fn serve_artifact_file(
state: AppState,
entry: Arc<CacheEntry>,
zip_path: PathBuf,
res: GetFileResultFile,
hdrs: &HeaderMap,
) -> Result<Response<Body>> {
let file = res.file;
// Dont serve files above the configured size limit
let lim = state.i.cfg.load().max_file_size;
if lim.is_some_and(|lim| file.uncompressed_size > lim) {
return Err(Error::BadRequest(
format!(
"file too large (size: {}, limit: {})",
file.uncompressed_size,
lim.unwrap()
)
.into(),
));
}
let mut resp = Response::builder()
.status(res.status)
.typed_header(headers::AcceptRanges::bytes());
if let Some(mime) = res.mime {
resp = resp.typed_header(headers::ContentType::from(mime));
}
if let Some(last_mod) = entry.last_modified {
resp = resp.typed_header(headers::LastModified::from(last_mod));
}
// handle if-(un)modified queries
if let Some(modified) = entry.last_modified {
if let Some(if_unmodified_since) = hdrs.typed_get::<headers::IfUnmodifiedSince>() {
if !if_unmodified_since.precondition_passes(modified) {
return Ok(resp
.status(StatusCode::PRECONDITION_FAILED)
.body(Body::empty())?);
}
}
if let Some(if_modified_since) = hdrs.typed_get::<headers::IfModifiedSince>() {
if !if_modified_since.is_modified(modified) {
return Ok(resp.status(StatusCode::NOT_MODIFIED).body(Body::empty())?);
}
}
}
let zip_file = File::open(&zip_path).await?;
let range = hdrs.typed_get::<headers::Range>();
if matches!(file.compression, async_zip::Compression::Deflate)
&& range.is_none()
&& util::accepts_gzip(hdrs)
{
// Read compressed file
let reader = PrecompressedGzipReader::new(zip_file, &file).await?;
resp = resp
.typed_header(headers::ContentLength(
u64::from(file.compressed_size) + GZIP_EXTRA_LEN,
))
.typed_header(headers::ContentEncoding::gzip());
Ok(resp.body(Body::from_stream(ReaderStream::new(reader)))?)
} else {
// Read decompressed file
let mut zip_reader = BufReader::new(zip_file);
util::seek_to_data_offset(&mut zip_reader, file.header_offset.into()).await?;
let reader = ZipEntryReader::new_with_owned(
zip_reader.compat(),
file.compression,
file.compressed_size.into(),
);
if let Some(rheader) = range {
let total_len = u64::from(file.uncompressed_size);
let mut ranges = rheader.satisfiable_ranges(total_len);
if let Some(range) = ranges.next() {
if ranges.next().is_some() {
return Err(Error::BadRequest(
"multipart ranges are not implemented".into(),
));
}
let start = match range.0 {
Bound::Included(n) => n,
Bound::Excluded(n) => n + 1,
Bound::Unbounded => 0,
};
let end = match range.1 {
Bound::Included(n) => n + 1,
Bound::Excluded(n) => n,
Bound::Unbounded => total_len,
};
let mut bufreader = tokio::io::BufReader::new(reader.compat());
// Advance the BufReader by the parsed offset
let mut to_consume = usize::try_from(start)?;
while to_consume > 0 {
let take = bufreader.fill_buf().await?.len().min(to_consume);
bufreader.consume(take);
to_consume -= take;
}
let content_length = end - start;
return Ok(resp
.status(StatusCode::PARTIAL_CONTENT)
.typed_header(headers::ContentLength(content_length))
.typed_header(
headers::ContentRange::bytes(range, total_len)
.map_err(|e| Error::Internal(e.to_string().into()))?,
)
.body(Body::from_stream(ReaderStream::new(
bufreader.take(content_length),
)))?);
}
}
Ok(resp
.typed_header(headers::ContentLength(file.uncompressed_size.into()))
.body(Body::from_stream(ReaderStream::new(reader.compat())))?)
}
}
/// API endpoint to list artifacts of a CI run
async fn get_artifacts(
State(state): State<AppState>,
Host(host): Host,
) -> Result<Json<Vec<Artifact>>> {
let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?;
let query = Query::from_subdomain(subdomain)?;
let artifacts = state.i.api.list(&query).await?;
Ok(Json(artifacts))
}
/// API endpoint to get the metadata of the current artifact
async fn get_artifact(
State(state): State<AppState>,
Host(host): Host,
) -> Result<Json<Artifact>> {
let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?;
let query = Query::from_subdomain(subdomain)?;
if query.artifact.is_none() {
return Err(Error::BadRequest("no artifact specified".into()));
}
let artifact = state.i.api.fetch(&query).await?;
match artifact {
ArtifactOrRun::Artifact(artifact) => Ok(Json(artifact)),
ArtifactOrRun::Run(_) => unreachable!(),
}
}
/// API endpoint to get a file listing
async fn get_files(
State(state): State<AppState>,
Host(host): Host,
) -> Result<Json<Vec<IndexEntry>>> {
let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?;
let query = Query::from_subdomain(subdomain)?;
if query.artifact.is_none() {
return Err(Error::BadRequest("no artifact specified".into()));
}
let res = state.i.cache.get_entry(&state.i.api, &query).await?;
let entry = match res {
GetEntryResult::Entry { entry, .. } => entry,
GetEntryResult::Artifacts(_) => unreachable!(),
};
let files = entry.get_files();
Ok(Json(files))
}
}
impl AppState {
pub fn new() -> Self {
let cfg = Config::default();
let cache = Cache::new(cfg.clone());
let api = ArtifactApi::new(cfg.clone());
Self {
i: Arc::new(AppInner { cfg, cache, api }),
}
}
}

View file

@ -1,10 +1,16 @@
//! API-Client to fetch CI artifacts from Github and Forgejo
use anyhow::{anyhow, Result};
use reqwest::{header, Client, ClientBuilder, IntoUrl, RequestBuilder};
use std::{fs::File, io::Cursor, path::Path};
use http::header;
use reqwest::{Client, ClientBuilder, IntoUrl, RequestBuilder, Url};
use serde::{Deserialize, Serialize};
use crate::{config::Config, query::Query};
use crate::{
config::Config,
error::{Error, Result},
query::Query,
};
pub struct ArtifactApi {
http: Client,
@ -20,6 +26,11 @@ pub struct Artifact {
pub download_url: String,
}
pub enum ArtifactOrRun {
Artifact(Artifact),
Run(Vec<Artifact>),
}
#[derive(Deserialize)]
struct GithubArtifact {
id: u64,
@ -61,7 +72,7 @@ impl From<GithubArtifact> for Artifact {
}
impl ForgejoArtifact {
fn to_artifact(self, id: u64, query: &Query) -> Artifact {
fn into_artifact(self, id: u64, query: &Query) -> Artifact {
Artifact {
download_url: format!(
"https://{}/{}/{}/actions/runs/{}/artifacts/{}",
@ -92,26 +103,76 @@ impl ArtifactApi {
pub async fn list(&self, query: &Query) -> Result<Vec<Artifact>> {
if query.is_github() {
self.list_forgejo(query).await
} else {
self.list_github(query).await
} else {
self.list_forgejo(query).await
}
}
pub async fn fetch(&self, query: &Query) -> Result<Artifact> {
pub async fn fetch(&self, query: &Query) -> Result<ArtifactOrRun> {
if query.is_github() {
self.fetch_github(query).await
} else {
// Forgejo currently has no API for fetching single artifacts
let mut artifacts = self.list_forgejo(query).await?;
let i = usize::try_from(query.artifact)?;
if i == 0 || i > artifacts.len() {
return Err(anyhow!("Artifact not found"));
match query.artifact {
Some(artifact) => {
let i = usize::try_from(artifact)?;
if i == 0 || i > artifacts.len() {
return Err(Error::NotFound("artifact".into()));
}
Ok(ArtifactOrRun::Artifact(artifacts.swap_remove(i - 1)))
}
None => Ok(ArtifactOrRun::Run(artifacts)),
}
Ok(artifacts.swap_remove(i - 1))
}
}
pub async fn download(&self, artifact: &Artifact, path: &Path) -> Result<()> {
if artifact.expired {
return Err(Error::Expired);
}
let lim = self.cfg.load().max_artifact_size;
let check_lim = |size: u64| {
if lim.is_some_and(|lim| u32::try_from(size).map(|size| size > lim).unwrap_or(true)) {
Err(Error::BadRequest(
format!(
"artifact too large (size: {}, limit: {})",
artifact.size,
lim.unwrap()
)
.into(),
))
} else {
Ok(())
}
};
check_lim(artifact.size)?;
let url = Url::parse(&artifact.download_url)?;
let req = if url.domain() == Some("api.github.com") {
self.get_github(url)
} else {
self.http.get(url)
};
let resp = req.send().await?.error_for_status()?;
if let Some(act_len) = resp.content_length() {
check_lim(act_len)?;
}
let tmp_path = path.with_extension(format!("tmp.{:x}", rand::random::<u32>()));
let mut file = File::create(&tmp_path)?;
let mut content = Cursor::new(resp.bytes().await?);
std::io::copy(&mut content, &mut file)?;
std::fs::rename(&tmp_path, path)?;
tracing::info!("Downloaded artifact from {}", artifact.download_url);
Ok(())
}
async fn list_forgejo(&self, query: &Query) -> Result<Vec<Artifact>> {
let url = format!(
"https://{}/{}/{}/actions/runs/{}/artifacts",
@ -131,7 +192,7 @@ impl ArtifactApi {
.artifacts
.into_iter()
.enumerate()
.map(|(i, a)| a.to_artifact(i as u64 + 1, query))
.map(|(i, a)| a.into_artifact(i as u64 + 1, query))
.collect::<Vec<_>>();
Ok(artifacts)
@ -154,21 +215,25 @@ impl ArtifactApi {
Ok(resp.artifacts.into_iter().map(Artifact::from).collect())
}
async fn fetch_github(&self, query: &Query) -> Result<Artifact> {
let url = format!(
"https://api.github.com/repos/{}/{}/actions/artifacts/{}",
query.user, query.repo, query.artifact
);
async fn fetch_github(&self, query: &Query) -> Result<ArtifactOrRun> {
match query.artifact {
Some(artifact) => {
let url = format!(
"https://api.github.com/repos/{}/{}/actions/artifacts/{}",
query.user, query.repo, artifact
);
let artifact = self
.get_github(url)
.send()
.await?
.error_for_status()?
.json::<GithubArtifact>()
.await?;
Ok(artifact.into())
let artifact = self
.get_github(url)
.send()
.await?
.error_for_status()?
.json::<GithubArtifact>()
.await?;
Ok(ArtifactOrRun::Artifact(artifact.into()))
}
None => Ok(ArtifactOrRun::Run(self.list_github(query).await?)),
}
}
fn get_github<U: IntoUrl>(&self, url: U) -> RequestBuilder {
@ -185,7 +250,7 @@ impl ArtifactApi {
mod tests {
use crate::{config::Config, query::Query};
use super::ArtifactApi;
use super::{ArtifactApi, ArtifactOrRun};
#[tokio::test]
async fn fetch_forgejo() {
@ -194,14 +259,22 @@ mod tests {
user: "HSA".to_owned(),
repo: "Visitenbuch".to_owned(),
run: 32,
artifact: 1,
artifact: Some(1),
};
let api = ArtifactApi::new(Config::default());
let res = api.fetch(&query).await.unwrap();
assert_eq!(res.name, "playwright-report");
assert_eq!(res.download_url, "https://code.thetadev.de/HSA/Visitenbuch/actions/runs/32/artifacts/playwright-report");
assert_eq!(res.id, 1);
assert_eq!(res.size, 574292);
if let ArtifactOrRun::Artifact(res) = res {
assert_eq!(res.name, "playwright-report");
assert_eq!(
res.download_url,
"https://code.thetadev.de/HSA/Visitenbuch/actions/runs/32/artifacts/playwright-report"
);
assert_eq!(res.id, 1);
assert_eq!(res.size, 574292);
} else {
panic!("got run");
}
}
#[tokio::test]
@ -211,13 +284,21 @@ mod tests {
user: "actions".to_owned(),
repo: "upload-artifact".to_owned(),
run: 8805345396,
artifact: 1440556464,
artifact: Some(1440556464),
};
let api = ArtifactApi::new(Config::default());
let res = api.fetch(&query).await.unwrap();
assert_eq!(res.name, "Artifact-Wildcard-macos-latest");
assert_eq!(res.download_url, "https://api.github.com/repos/actions/upload-artifact/actions/artifacts/1440556464/zip");
assert_eq!(res.id, 1440556464);
assert_eq!(res.size, 334);
if let ArtifactOrRun::Artifact(res) = res {
assert_eq!(res.name, "Artifact-Wildcard-macos-latest");
assert_eq!(
res.download_url,
"https://api.github.com/repos/actions/upload-artifact/actions/artifacts/1440556464/zip"
);
assert_eq!(res.id, 1440556464);
assert_eq!(res.size, 334);
} else {
panic!("got run");
}
}
}

317
src/cache.rs Normal file
View file

@ -0,0 +1,317 @@
use std::{
borrow::Cow,
collections::{BTreeMap, HashMap},
path::{Path, PathBuf},
sync::Arc,
time::{Duration, SystemTime},
};
use async_zip::{tokio::read::fs::ZipFileReader, Compression};
use http::StatusCode;
use mime::Mime;
use path_macro::path;
use quick_cache::sync::Cache as QuickCache;
use serde::Serialize;
use serde_hex::{SerHex, Strict};
use crate::{
artifact_api::{Artifact, ArtifactApi, ArtifactOrRun},
config::Config,
error::{Error, Result},
query::Query,
util,
};
pub struct Cache {
cfg: Config,
qc: QuickCache<[u8; 16], Arc<CacheEntry>>,
}
pub struct CacheEntry {
pub files: HashMap<String, FileEntry>,
pub last_modified: Option<SystemTime>,
}
#[derive(Clone)]
pub struct FileEntry {
pub header_offset: u32,
pub uncompressed_size: u32,
pub compressed_size: u32,
pub crc32: u32,
pub compression: Compression,
}
pub enum GetEntryResult {
Entry {
entry: Arc<CacheEntry>,
zip_path: PathBuf,
},
Artifacts(Vec<Artifact>),
}
pub enum GetFileResult {
File(GetFileResultFile),
Listing(Listing),
}
pub struct GetFileResultFile {
pub file: FileEntry,
pub mime: Option<Mime>,
pub status: StatusCode,
}
#[derive(Serialize)]
pub struct IndexEntry {
pub name: String,
pub size: u32,
#[serde(with = "SerHex::<Strict>")]
pub crc32: u32,
}
pub struct Listing {
pub entries: Vec<ListingEntry>,
pub n_files: usize,
pub n_dirs: usize,
pub has_parent: bool,
}
pub struct ListingEntry {
pub name: String,
pub url: String,
pub size: u32,
pub crc32: String,
pub is_dir: bool,
}
impl Cache {
pub fn new(cfg: Config) -> Self {
Self {
cfg,
qc: QuickCache::new(50),
}
}
pub fn get_path(&self, query: &Query) -> PathBuf {
path!(self.cfg.load().cache_dir / format!("{}.zip", hex::encode(query.siphash())))
}
pub async fn get_entry(&self, api: &ArtifactApi, query: &Query) -> Result<GetEntryResult> {
if query.artifact.is_some() {
let hash = query.siphash();
let zip_path = path!(self.cfg.load().cache_dir / format!("{}.zip", hex::encode(hash)));
if !zip_path.is_file() {
let artifact = api.fetch(query).await?;
let artifact = match artifact {
ArtifactOrRun::Artifact(artifact) => artifact,
ArtifactOrRun::Run(_) => unreachable!(),
};
api.download(&artifact, &zip_path).await?;
}
let timeout = self
.cfg
.load()
.zip_timeout_ms
.map(|t| Duration::from_millis(t.into()));
let mut entry = self
.qc
.get_or_insert_async(&hash, async {
Ok::<_, Error>(Arc::new(CacheEntry::new(&zip_path, timeout).await?))
})
.await?;
// Verify if the cached entry is fresh
let meta = tokio::fs::metadata(&zip_path).await?;
if meta.modified().ok() != entry.last_modified {
tracing::info!("cached file {zip_path:?} changed");
entry = Arc::new(CacheEntry::new(&zip_path, timeout).await?);
self.qc.insert(hash, entry.clone());
}
Ok(GetEntryResult::Entry { entry, zip_path })
} else {
let run = api.fetch(query).await?;
let artifacts = match run {
ArtifactOrRun::Artifact(_) => unreachable!(),
ArtifactOrRun::Run(run) => run,
};
Ok(GetEntryResult::Artifacts(artifacts))
}
}
}
impl CacheEntry {
async fn new(zip_path: &Path, timeout: Option<Duration>) -> Result<Self> {
let meta = tokio::fs::metadata(&zip_path).await?;
let zip_fut = ZipFileReader::new(&zip_path);
let zip = match timeout {
Some(timeout) => tokio::time::timeout(timeout, zip_fut).await??,
None => zip_fut.await?,
};
Ok(Self {
files: zip
.file()
.entries()
.iter()
.filter_map(|entry| {
Some((
entry.filename().as_str().ok()?.to_owned(),
FileEntry {
header_offset: entry.header_offset().try_into().ok()?,
uncompressed_size: entry.uncompressed_size().try_into().ok()?,
compressed_size: entry.compressed_size().try_into().ok()?,
crc32: entry.crc32(),
compression: entry.compression(),
},
))
})
.collect(),
last_modified: meta.modified().ok(),
})
}
pub fn get_file(&self, path: &str, url_query: &str) -> Result<GetFileResult> {
let path = path.trim_start_matches('/');
let mut index_path: Option<Cow<str>> = None;
if path.is_empty() {
// Special case: open index.html directly
index_path = Some("index.html".into());
}
// Attempt to access the following pages
// 1. Site path directly
// 2. Site path + `/index.html`
else if let Some(file) = self.files.get(path) {
return Ok(GetFileResult::File(GetFileResultFile {
file: file.clone(),
mime: util::path_mime(path),
status: StatusCode::OK,
}));
} else if util::site_path_ext(path).is_none() {
index_path = Some(format!("{path}/index.html").into());
}
if let Some(file) = index_path
.and_then(|p: Cow<str>| self.files.get(p.as_ref()))
.or_else(|| self.files.get("200.html"))
{
// index.html or SPA entrypoint
return Ok(GetFileResult::File(GetFileResultFile {
file: file.clone(),
mime: Some(mime::TEXT_HTML),
status: StatusCode::OK,
}));
}
// Directory listing
let path_as_dir: Cow<str> = if path.is_empty() || path.ends_with('/') {
path.into()
} else {
format!("{path}/").into()
};
if self
.files
.keys()
.any(|n| n.starts_with(path_as_dir.as_ref()))
{
let mut rev = false;
let mut col = b'N';
for (k, v) in url::form_urlencoded::parse(url_query.as_bytes()) {
if k == "C" && !v.is_empty() {
col = v.as_bytes()[0];
} else if k == "O" {
rev = v == "D";
}
}
return Ok(GetFileResult::Listing(self.get_listing(
&path_as_dir,
col,
rev,
)));
} else if let Some(file) = self.files.get("404.html") {
// Custom 404 error page
return Ok(GetFileResult::File(GetFileResultFile {
file: file.clone(),
mime: Some(mime::TEXT_HTML),
status: StatusCode::NOT_FOUND,
}));
}
Err(Error::NotFound("requested file".into()))
}
pub fn get_files(&self) -> Vec<IndexEntry> {
self.files
.iter()
.map(|(n, entry)| IndexEntry {
name: n.to_owned(),
size: entry.uncompressed_size,
crc32: entry.crc32,
})
.collect()
}
fn get_listing(&self, path: &str, col: u8, rev: bool) -> Listing {
let entries = self
.files
.iter()
.filter_map(|(n, entry)| {
n.strip_prefix(path).map(|n| {
let n = n.split_inclusive('/').next().unwrap();
(n, entry)
})
})
.collect::<BTreeMap<_, _>>();
// Put directories first
let mut directories = Vec::new();
let mut files = Vec::new();
let entries_iter: Box<dyn Iterator<Item = (&str, &FileEntry)>> = if col == b'N' && rev {
Box::new(entries.into_iter().rev())
} else {
Box::new(entries.into_iter())
};
for (n, entry) in entries_iter {
if n.ends_with('/') {
directories.push(ListingEntry {
name: n.to_owned(),
url: format!("{n}{path}"),
size: 0,
crc32: "-".to_string(),
is_dir: true,
});
} else {
files.push(ListingEntry {
name: n.to_owned(),
url: format!("{n}{path}"),
size: entry.uncompressed_size,
crc32: hex::encode(entry.crc32.to_le_bytes()),
is_dir: false,
});
}
}
// Sort by size
if col == b'S' {
if rev {
files.sort_by(|a, b| b.size.cmp(&a.size));
} else {
files.sort_by_key(|f| f.size);
}
}
let n_dirs = directories.len();
let n_files = files.len();
directories.append(&mut files);
Listing {
entries: directories,
n_dirs,
n_files,
has_parent: !path.is_empty(),
}
}
}

Some files were not shown because too many files have changed in this diff Show more