diff --git a/.editorconfig b/.editorconfig index 7bffa7a..f72739e 100644 --- a/.editorconfig +++ b/.editorconfig @@ -10,5 +10,5 @@ max_line_length = 88 [{Makefile,*.go}] indent_style = tab -[*.{json,md,rst,ini,yml,yaml,xml,html,js,jsx,ts,tsx,vue,svelte}] +[*.{json,md,rst,ini,yml,yaml,xml,html,js,jsx,ts,tsx,vue,svelte,hbs}] indent_size = 2 diff --git a/Cargo.lock b/Cargo.lock index 0af50cd..e8caebb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -150,9 +150,11 @@ dependencies = [ "envy", "flate2", "futures-lite", + "governor", "headers", "hex", "http", + "humansize", "mime", "mime_guess", "once_cell", @@ -169,7 +171,6 @@ dependencies = [ "serde-env", "serde-hex", "serde_json", - "siphasher", "thiserror", "tokio", "tokio-util", @@ -531,6 +532,19 @@ dependencies = [ "typenum", ] +[[package]] +name = "dashmap" +version = "5.5.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856" +dependencies = [ + "cfg-if", + "hashbrown", + "lock_api", + "once_cell", + "parking_lot_core", +] + [[package]] name = "deflate64" version = "0.1.8" @@ -683,6 +697,21 @@ dependencies = [ "percent-encoding", ] +[[package]] +name = "futures" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "645c6916888f6cb6350d2550b80fb63e734897a8498abe35cfb732b6487804b0" +dependencies = [ + "futures-channel", + "futures-core", + "futures-executor", + "futures-io", + "futures-sink", + "futures-task", + "futures-util", +] + [[package]] name = "futures-channel" version = "0.3.30" @@ -690,6 +719,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "eac8f7d7865dcb88bd4373ab671c8cf4508703796caa2b1985a9ca867b3fcb78" dependencies = [ "futures-core", + "futures-sink", ] [[package]] @@ -698,6 +728,17 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dfc6580bb841c5a68e9ef15c77ccc837b40a7504914d52e47b8b0e9bbda25a1d" +[[package]] +name = "futures-executor" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a576fc72ae164fca6b9db127eaa9a9dda0d61316034f33a0a0d4eda41f02b01d" +dependencies = [ + "futures-core", + "futures-task", + "futures-util", +] + [[package]] name = "futures-io" version = "0.3.30" @@ -717,6 +758,17 @@ dependencies = [ "pin-project-lite", ] +[[package]] +name = "futures-macro" +version = "0.3.30" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87750cf4b7a4c0625b1529e4c543c2182106e4dedc60a2a6455e00d212c489ac" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.66", +] + [[package]] name = "futures-sink" version = "0.3.30" @@ -729,16 +781,28 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38d84fa142264698cdce1a9f9172cf383a0c82de1bddcf3092901442c4097004" +[[package]] +name = "futures-timer" +version = "3.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f288b0a4f20f9a56b5d1da57e2227c661b7b16168e2f72365f57b63326e29b24" + [[package]] name = "futures-util" version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6401deb83407ab3da39eba7e33987a73c3df0c82b4bb5813ee871c19c41d48" dependencies = [ + "futures-channel", "futures-core", + "futures-io", + "futures-macro", + "futures-sink", "futures-task", + "memchr", "pin-project-lite", "pin-utils", + "slab", ] [[package]] @@ -774,6 +838,26 @@ version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" +[[package]] +name = "governor" +version = "0.6.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68a7f542ee6b35af73b06abc0dad1c1bae89964e4e253bc4b587b91c9637867b" +dependencies = [ + "cfg-if", + "dashmap", + "futures", + "futures-timer", + "no-std-compat", + "nonzero_ext", + "parking_lot", + "portable-atomic", + "quanta", + "rand", + "smallvec 1.13.2", + "spinning_top", +] + [[package]] name = "h2" version = "0.4.5" @@ -890,6 +974,15 @@ version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" +[[package]] +name = "humansize" +version = "2.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7" +dependencies = [ + "libm", +] + [[package]] name = "humantime" version = "2.1.0" @@ -1164,12 +1257,24 @@ dependencies = [ "tempfile", ] +[[package]] +name = "no-std-compat" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b93853da6d84c2e3c7d730d6473e8817692dd89be387eb01b94d7f108ecb5b8c" + [[package]] name = "nodrop" version = "0.1.14" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" +[[package]] +name = "nonzero_ext" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" + [[package]] name = "nu-ansi-term" version = "0.46.0" @@ -1373,6 +1478,12 @@ version = "0.3.30" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec" +[[package]] +name = "portable-atomic" +version = "1.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" + [[package]] name = "powerfmt" version = "0.2.0" @@ -1424,6 +1535,21 @@ dependencies = [ "unarray", ] +[[package]] +name = "quanta" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e5167a477619228a0b284fac2674e3c388cba90631d7b7de620e6f1fcd08da5" +dependencies = [ + "crossbeam-utils", + "libc", + "once_cell", + "raw-cpuid", + "wasi", + "web-sys", + "winapi", +] + [[package]] name = "quick-error" version = "1.2.3" @@ -1490,6 +1616,15 @@ dependencies = [ "rand_core", ] +[[package]] +name = "raw-cpuid" +version = "11.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e29830cbb1290e404f24c73af91c5d8d631ce7e128691e9477556b540cd01ecd" +dependencies = [ + "bitflags 2.5.0", +] + [[package]] name = "redox_syscall" version = "0.5.1" @@ -1568,10 +1703,12 @@ dependencies = [ "system-configuration", "tokio", "tokio-native-tls", + "tokio-util", "tower-service", "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-streams", "web-sys", "winreg", ] @@ -1830,12 +1967,6 @@ dependencies = [ "libc", ] -[[package]] -name = "siphasher" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" - [[package]] name = "slab" version = "0.4.9" @@ -1870,6 +2001,15 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "spinning_top" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d96d2d1d716fb500937168cc09353ffdc7a012be8475ac7308e1bdf0e3923300" +dependencies = [ + "lock_api", +] + [[package]] name = "subtle" version = "2.5.0" @@ -2374,6 +2514,19 @@ version = "0.2.92" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "af190c94f2773fdb3729c55b007a722abb5384da03bc0986df4c289bf5567e96" +[[package]] +name = "wasm-streams" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b65dc4c90b63b118468cf747d8bf3566c1913ef60be765b5730ead9e0a3ba129" +dependencies = [ + "futures-util", + "js-sys", + "wasm-bindgen", + "wasm-bindgen-futures", + "web-sys", +] + [[package]] name = "web-sys" version = "0.3.69" diff --git a/Cargo.toml b/Cargo.toml index 1f8173d..3b624d6 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,9 +11,11 @@ dotenvy = "0.15.7" envy = { path = "crates/envy" } flate2 = "1.0.30" futures-lite = "2.3.0" +governor = "0.6.3" headers = "0.4.0" hex = "0.4.3" http = "1.1.0" +humansize = "2.1.3" mime = "0.3.17" mime_guess = "2.0.4" once_cell = "1.19.0" @@ -23,12 +25,11 @@ pin-project = "1.1.5" quick_cache = "0.5.1" rand = "0.8.5" regex = "1.10.4" -reqwest = { version = "0.12.4", features = ["json"] } +reqwest = { version = "0.12.4", features = ["json", "stream"] } serde = { version = "1.0.203", features = ["derive"] } serde-env = "0.1.1" serde-hex = "0.1.0" serde_json = "1.0.117" -siphasher = "1.0.1" thiserror = "1.0.61" tokio = { version = "1.37.0", features = ["macros", "fs", "rt-multi-thread"] } tokio-util = { version = "0.7.11", features = ["io"] } diff --git a/resources/icon.opt.svg b/resources/icon.opt.svg index 63d9a3e..d4d2a8b 100644 --- a/resources/icon.opt.svg +++ b/resources/icon.opt.svg @@ -1 +1 @@ - + diff --git a/src/app.rs b/src/app.rs index 27980bf..b53b9a2 100644 --- a/src/app.rs +++ b/src/app.rs @@ -1,4 +1,4 @@ -use std::{ops::Bound, path::PathBuf, sync::Arc}; +use std::{net::SocketAddr, ops::Bound, path::PathBuf, sync::Arc}; use async_zip::tokio::read::ZipEntryReader; use axum::{ @@ -23,13 +23,13 @@ use tokio_util::{ use tower_http::trace::{DefaultOnResponse, TraceLayer}; use crate::{ - artifact_api::{Artifact, ArtifactApi, ArtifactOrRun}, - cache::{Cache, CacheEntry, GetEntryResult, GetFileResult, GetFileResultFile, IndexEntry}, + artifact_api::{Artifact, ArtifactApi}, + cache::{Cache, CacheEntry, GetFileResult, GetFileResultFile, IndexEntry}, config::Config, error::{Error, Result}, gzip_reader::{PrecompressedGzipReader, GZIP_EXTRA_LEN}, query::Query, - templates::{self, LinkItem}, + templates::{self, ArtifactItem, LinkItem}, util::{self, InsertTypedHeader}, App, }; @@ -70,11 +70,13 @@ impl App { let listener = tokio::net::TcpListener::bind(address).await?; tracing::info!("Listening on http://{address}"); + let state = self.new_state(); + let real_ip_header = state.i.cfg.load().real_ip_header.clone(); let router = Router::new() // Prevent search indexing since artifactview serves temporary artifacts .route( "/robots.txt", - get(|| async { "User-agent: *\nDisallow: /\n" }), + get(|| async { "# PLEASE dont scrape this website.\n# All of the data here is fetched from the public GitHub/Gitea APIs, this app is open source and it is not running on some Fortune 500 company server. \n\nUser-agent: *\nDisallow: /\n" }), ) // Put the API in the .well-known folder, since it is disabled for pages .route("/.well-known/api/artifacts", get(Self::get_artifacts)) @@ -87,16 +89,21 @@ impl App { .route("/", get(Self::get_page)) .route("/", post(Self::post_homepage)) .fallback(get(Self::get_page)) - .with_state(self.new_state()) + .with_state(state) // Log requests .layer( TraceLayer::new_for_http() - .make_span_with(|request: &Request| { - tracing::error_span!("request", url = util::full_url_from_request(request),) + .make_span_with(move |request: &Request| { + let ip = util::get_ip_address(request, real_ip_header.as_deref()).map(|ip| ip.to_string()).unwrap_or_default(); + tracing::error_span!("request", url = util::full_url_from_request(request), ip) }) .on_response(DefaultOnResponse::new().level(tracing::Level::INFO)), ); - axum::serve(listener, router).await?; + axum::serve( + listener, + router.into_make_service_with_connect_info::(), + ) + .await?; Ok(()) } @@ -120,22 +127,26 @@ impl App { let query = Query::from_subdomain(subdomain)?; let path = percent_encoding::percent_decode_str(uri.path()).decode_utf8_lossy(); let hdrs = request.headers(); + let ip = util::get_ip_address(&request, state.i.cfg.load().real_ip_header.as_deref())?; + + match query { + Query::Artifact(query) => { + let entry_res = state.i.cache.get_entry(&state.i.api, &query, &ip).await?; + let entry = entry_res.entry; + if entry_res.downloaded { + state.garbage_collect(); + } - let res = state.i.cache.get_entry(&state.i.api, &query).await?; - match res { - GetEntryResult::Entry { entry, zip_path } => { match entry.get_file(&path, uri.query().unwrap_or_default())? { GetFileResult::File(res) => { - Self::serve_artifact_file(state, entry, zip_path, res, hdrs).await + Self::serve_artifact_file(state, entry, entry_res.zip_path, res, hdrs) + .await } GetFileResult::Listing(listing) => { if !path.ends_with('/') { return Ok(Redirect::to(&format!("{path}/")).into_response()); } - // TODO: store actual artifact names - let artifact_name = format!("A{}", query.artifact.unwrap()); - let mut path_components = vec![ LinkItem { name: query.shortid(), @@ -145,7 +156,7 @@ impl App { .url_with_subdomain(&query.subdomain_with_artifact(None)), }, LinkItem { - name: artifact_name.to_owned(), + name: entry.name.to_owned(), url: "/".to_string(), }, ]; @@ -162,7 +173,8 @@ impl App { let tmpl = templates::Listing { main_url: state.i.cfg.main_url(), version: templates::Version, - artifact_name: &artifact_name, + run_url: &query.forge_url(), + artifact_name: &entry.name, path_components, n_dirs: listing.n_dirs, n_files: listing.n_files, @@ -176,7 +188,9 @@ impl App { } } } - GetEntryResult::Artifacts(artifacts) => { + Query::Run(query) => { + let artifacts = state.i.api.list(&query).await?; + if uri.path() != "/" { return Err(Error::NotFound("path".into())); } @@ -185,11 +199,16 @@ impl App { } let tmpl = templates::Selection { main_url: state.i.cfg.main_url(), + version: templates::Version, run_url: &query.forge_url(), run_name: &query.shortid(), + publisher: LinkItem { + name: query.user.to_owned(), + url: format!("https://{}/{}", query.host, query.user), + }, artifacts: artifacts .into_iter() - .map(|a| LinkItem::from_artifact(a, &query, &state.i.cfg)) + .map(|a| ArtifactItem::from_artifact(a, &query, &state.i.cfg)) .collect(), }; Ok(Response::builder() @@ -233,7 +252,7 @@ impl App { // Dont serve files above the configured size limit let lim = state.i.cfg.load().max_file_size; - if lim.is_some_and(|lim| file.uncompressed_size > lim) { + if lim.is_some_and(|lim| file.uncompressed_size > lim.into()) { return Err(Error::BadRequest( format!( "file too large (size: {}, limit: {})", @@ -246,27 +265,23 @@ impl App { let mut resp = Response::builder() .status(res.status) - .typed_header(headers::AcceptRanges::bytes()); + .typed_header(headers::AcceptRanges::bytes()) + .typed_header(headers::LastModified::from(entry.last_modified)); if let Some(mime) = res.mime { resp = resp.typed_header(headers::ContentType::from(mime)); } - if let Some(last_mod) = entry.last_modified { - resp = resp.typed_header(headers::LastModified::from(last_mod)); - } // handle if-(un)modified queries - if let Some(modified) = entry.last_modified { - if let Some(if_unmodified_since) = hdrs.typed_get::() { - if !if_unmodified_since.precondition_passes(modified) { - return Ok(resp - .status(StatusCode::PRECONDITION_FAILED) - .body(Body::empty())?); - } + if let Some(if_unmodified_since) = hdrs.typed_get::() { + if !if_unmodified_since.precondition_passes(entry.last_modified) { + return Ok(resp + .status(StatusCode::PRECONDITION_FAILED) + .body(Body::empty())?); } - if let Some(if_modified_since) = hdrs.typed_get::() { - if !if_modified_since.is_modified(modified) { - return Ok(resp.status(StatusCode::NOT_MODIFIED).body(Body::empty())?); - } + } + if let Some(if_modified_since) = hdrs.typed_get::() { + if !if_modified_since.is_modified(entry.last_modified) { + return Ok(resp.status(StatusCode::NOT_MODIFIED).body(Body::empty())?); } } @@ -352,7 +367,7 @@ impl App { Host(host): Host, ) -> Result>> { let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - let query = Query::from_subdomain(subdomain)?; + let query = Query::from_subdomain(subdomain)?.into_runquery(); let artifacts = state.i.api.list(&query).await?; Ok(Json(artifacts)) } @@ -363,37 +378,25 @@ impl App { Host(host): Host, ) -> Result> { let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - let query = Query::from_subdomain(subdomain)?; - - if query.artifact.is_none() { - return Err(Error::BadRequest("no artifact specified".into())); - } - + let query = Query::from_subdomain(subdomain)?.try_into_artifactquery()?; let artifact = state.i.api.fetch(&query).await?; - match artifact { - ArtifactOrRun::Artifact(artifact) => Ok(Json(artifact)), - ArtifactOrRun::Run(_) => unreachable!(), - } + Ok(Json(artifact)) } /// API endpoint to get a file listing async fn get_files( State(state): State, Host(host): Host, + request: Request, ) -> Result>> { let subdomain = util::get_subdomain(&host, &state.i.cfg.load().root_domain)?; - let query = Query::from_subdomain(subdomain)?; - - if query.artifact.is_none() { - return Err(Error::BadRequest("no artifact specified".into())); + let ip = util::get_ip_address(&request, state.i.cfg.load().real_ip_header.as_deref())?; + let query = Query::from_subdomain(subdomain)?.try_into_artifactquery()?; + let entry_res = state.i.cache.get_entry(&state.i.api, &query, &ip).await?; + if entry_res.downloaded { + state.garbage_collect(); } - - let res = state.i.cache.get_entry(&state.i.api, &query).await?; - let entry = match res { - GetEntryResult::Entry { entry, .. } => entry, - GetEntryResult::Artifacts(_) => unreachable!(), - }; - let files = entry.get_files(); + let files = entry_res.entry.get_files(); Ok(Json(files)) } } @@ -407,4 +410,14 @@ impl AppState { i: Arc::new(AppInner { cfg, cache, api }), } } + + /// Run garbage collection in the background if necessary + pub fn garbage_collect(&self) { + let state = self.clone(); + tokio::spawn(async move { + if let Err(e) = state.i.cache.garbage_collect().await { + tracing::error!("error during garbage collect: {e}"); + } + }); + } } diff --git a/src/artifact_api.rs b/src/artifact_api.rs index e49cafa..8841a72 100644 --- a/src/artifact_api.rs +++ b/src/artifact_api.rs @@ -1,34 +1,38 @@ //! API-Client to fetch CI artifacts from Github and Forgejo -use std::{fs::File, io::Cursor, path::Path}; +use std::path::Path; +use futures_lite::StreamExt; use http::header; +use quick_cache::sync::Cache as QuickCache; use reqwest::{Client, ClientBuilder, IntoUrl, RequestBuilder, Url}; use serde::{Deserialize, Serialize}; +use tokio::{fs::File, io::AsyncWriteExt}; use crate::{ config::Config, error::{Error, Result}, - query::Query, + query::{ArtifactQuery, QueryData}, }; pub struct ArtifactApi { http: Client, cfg: Config, + qc: QuickCache>, } -#[derive(Serialize, Deserialize)] +#[derive(Clone, Serialize, Deserialize)] pub struct Artifact { pub id: u64, pub name: String, pub size: u64, pub expired: bool, + /// Artifact download URL used by the server pub download_url: String, -} - -pub enum ArtifactOrRun { - Artifact(Artifact), - Run(Vec), + /// Artifact download URL shown to the user. If None, download_url is used + /// + /// GitHub uses different download URLs for their API and their frontend. + pub user_download_url: Option, } #[derive(Deserialize)] @@ -59,20 +63,24 @@ enum ForgejoArtifactStatus { Expired, } -impl From for Artifact { - fn from(value: GithubArtifact) -> Self { - Self { - id: value.id, - name: value.name, - size: value.size_in_bytes, - expired: value.expired, - download_url: value.archive_download_url, +impl GithubArtifact { + fn into_artifact(self, query: &QueryData) -> Artifact { + Artifact { + id: self.id, + name: self.name, + size: self.size_in_bytes, + expired: self.expired, + download_url: self.archive_download_url, + user_download_url: Some(format!( + "https://github.com/{}/{}/actions/runs/{}/artifacts/{}", + query.user, query.repo, query.run, self.id + )), } } } impl ForgejoArtifact { - fn into_artifact(self, id: u64, query: &Query) -> Artifact { + fn into_artifact(self, id: u64, query: &QueryData) -> Artifact { Artifact { download_url: format!( "https://{}/{}/{}/actions/runs/{}/artifacts/{}", @@ -82,6 +90,7 @@ impl ForgejoArtifact { name: self.name, size: self.size, expired: matches!(self.status, ForgejoArtifactStatus::Expired), + user_download_url: None, } } } @@ -97,35 +106,36 @@ impl ArtifactApi { )) .build() .unwrap(), + qc: QuickCache::new(cfg.load().mem_cache_size), cfg, } } - pub async fn list(&self, query: &Query) -> Result> { - if query.is_github() { - self.list_github(query).await - } else { - self.list_forgejo(query).await - } + pub async fn list(&self, query: &QueryData) -> Result> { + let subdomain = query.subdomain_with_artifact(None); + self.qc + .get_or_insert_async(&subdomain, async { + if query.is_github() { + self.list_github(query).await + } else { + self.list_forgejo(query).await + } + }) + .await } - pub async fn fetch(&self, query: &Query) -> Result { + pub async fn fetch(&self, query: &ArtifactQuery) -> Result { if query.is_github() { self.fetch_github(query).await } else { // Forgejo currently has no API for fetching single artifacts let mut artifacts = self.list_forgejo(query).await?; - match query.artifact { - Some(artifact) => { - let i = usize::try_from(artifact)?; - if i == 0 || i > artifacts.len() { - return Err(Error::NotFound("artifact".into())); - } - Ok(ArtifactOrRun::Artifact(artifacts.swap_remove(i - 1))) - } - None => Ok(ArtifactOrRun::Run(artifacts)), + let i = usize::try_from(query.artifact)?; + if i == 0 || i > artifacts.len() { + return Err(Error::NotFound("artifact".into())); } + Ok(artifacts.swap_remove(i - 1)) } } @@ -136,7 +146,11 @@ impl ArtifactApi { let lim = self.cfg.load().max_artifact_size; let check_lim = |size: u64| { - if lim.is_some_and(|lim| u32::try_from(size).map(|size| size > lim).unwrap_or(true)) { + if lim.is_some_and(|lim| { + u32::try_from(size) + .map(|size| size > lim.into()) + .unwrap_or(true) + }) { Err(Error::BadRequest( format!( "artifact too large (size: {}, limit: {})", @@ -165,15 +179,23 @@ impl ArtifactApi { } let tmp_path = path.with_extension(format!("tmp.{:x}", rand::random::())); - let mut file = File::create(&tmp_path)?; - let mut content = Cursor::new(resp.bytes().await?); - std::io::copy(&mut content, &mut file)?; - std::fs::rename(&tmp_path, path)?; + + { + let mut file = File::create(&tmp_path).await?; + let mut stream = resp.bytes_stream(); + while let Some(item) = stream.next().await { + let mut chunk = item?; + file.write_all_buf(&mut chunk).await?; + } + } + tokio::fs::write(path.with_extension("name"), &artifact.name).await?; + tokio::fs::rename(&tmp_path, path).await?; + tracing::info!("Downloaded artifact from {}", artifact.download_url); Ok(()) } - async fn list_forgejo(&self, query: &Query) -> Result> { + async fn list_forgejo(&self, query: &QueryData) -> Result> { let url = format!( "https://{}/{}/{}/actions/runs/{}/artifacts", query.host, query.user, query.repo, query.run @@ -198,7 +220,7 @@ impl ArtifactApi { Ok(artifacts) } - async fn list_github(&self, query: &Query) -> Result> { + async fn list_github(&self, query: &QueryData) -> Result> { let url = format!( "https://api.github.com/repos/{}/{}/actions/runs/{}/artifacts", query.user, query.repo, query.run @@ -212,28 +234,27 @@ impl ArtifactApi { .json::>() .await?; - Ok(resp.artifacts.into_iter().map(Artifact::from).collect()) + Ok(resp + .artifacts + .into_iter() + .map(|a| a.into_artifact(query)) + .collect()) } - async fn fetch_github(&self, query: &Query) -> Result { - match query.artifact { - Some(artifact) => { - let url = format!( - "https://api.github.com/repos/{}/{}/actions/artifacts/{}", - query.user, query.repo, artifact - ); + async fn fetch_github(&self, query: &ArtifactQuery) -> Result { + let url = format!( + "https://api.github.com/repos/{}/{}/actions/artifacts/{}", + query.user, query.repo, query.artifact + ); - let artifact = self - .get_github(url) - .send() - .await? - .error_for_status()? - .json::() - .await?; - Ok(ArtifactOrRun::Artifact(artifact.into())) - } - None => Ok(ArtifactOrRun::Run(self.list_github(query).await?)), - } + let artifact = self + .get_github(url) + .send() + .await? + .error_for_status()? + .json::() + .await?; + Ok(artifact.into_artifact(query)) } fn get_github(&self, url: U) -> RequestBuilder { @@ -248,57 +269,39 @@ impl ArtifactApi { #[cfg(test)] mod tests { - use crate::{config::Config, query::Query}; + use crate::{config::Config, query::ArtifactQuery}; - use super::{ArtifactApi, ArtifactOrRun}; + use super::ArtifactApi; #[tokio::test] async fn fetch_forgejo() { - let query = Query { + let query = ArtifactQuery { host: "code.thetadev.de".to_owned(), user: "HSA".to_owned(), repo: "Visitenbuch".to_owned(), run: 32, - artifact: Some(1), + artifact: 1, }; let api = ArtifactApi::new(Config::default()); let res = api.fetch(&query).await.unwrap(); - if let ArtifactOrRun::Artifact(res) = res { - assert_eq!(res.name, "playwright-report"); - assert_eq!( - res.download_url, - "https://code.thetadev.de/HSA/Visitenbuch/actions/runs/32/artifacts/playwright-report" - ); - assert_eq!(res.id, 1); - assert_eq!(res.size, 574292); - } else { - panic!("got run"); - } + assert_eq!(res.id, 1); + assert_eq!(res.size, 574292); } #[tokio::test] async fn fetch_github() { - let query = Query { + let query = ArtifactQuery { host: "github.com".to_owned(), user: "actions".to_owned(), repo: "upload-artifact".to_owned(), run: 8805345396, - artifact: Some(1440556464), + artifact: 1440556464, }; let api = ArtifactApi::new(Config::default()); let res = api.fetch(&query).await.unwrap(); - if let ArtifactOrRun::Artifact(res) = res { - assert_eq!(res.name, "Artifact-Wildcard-macos-latest"); - assert_eq!( - res.download_url, - "https://api.github.com/repos/actions/upload-artifact/actions/artifacts/1440556464/zip" - ); - assert_eq!(res.id, 1440556464); - assert_eq!(res.size, 334); - } else { - panic!("got run"); - } + assert_eq!(res.id, 1440556464); + assert_eq!(res.size, 334); } } diff --git a/src/cache.rs b/src/cache.rs index 0c5a5bf..28a05fe 100644 --- a/src/cache.rs +++ b/src/cache.rs @@ -1,12 +1,16 @@ use std::{ borrow::Cow, collections::{BTreeMap, HashMap}, + fs::FileTimes, + net::IpAddr, + num::{NonZeroU32, NonZeroUsize}, path::{Path, PathBuf}, sync::Arc, time::{Duration, SystemTime}, }; use async_zip::{tokio::read::fs::ZipFileReader, Compression}; +use governor::{Quota, RateLimiter}; use http::StatusCode; use mime::Mime; use path_macro::path; @@ -15,21 +19,29 @@ use serde::Serialize; use serde_hex::{SerHex, Strict}; use crate::{ - artifact_api::{Artifact, ArtifactApi, ArtifactOrRun}, + artifact_api::ArtifactApi, config::Config, error::{Error, Result}, - query::Query, - util, + query::ArtifactQuery, + util::{self, IgnoreFileNotFound}, }; pub struct Cache { cfg: Config, - qc: QuickCache<[u8; 16], Arc>, + qc: QuickCache>, + lim_download: Option< + governor::DefaultKeyedRateLimiter< + IpAddr, + governor::middleware::NoOpMiddleware, + >, + >, + lim_gc: governor::DefaultDirectRateLimiter, } pub struct CacheEntry { pub files: HashMap, - pub last_modified: Option, + pub name: String, + pub last_modified: SystemTime, } #[derive(Clone)] @@ -41,12 +53,10 @@ pub struct FileEntry { pub compression: Compression, } -pub enum GetEntryResult { - Entry { - entry: Arc, - zip_path: PathBuf, - }, - Artifacts(Vec), +pub struct GetEntryResult { + pub entry: Arc, + pub zip_path: PathBuf, + pub downloaded: bool, } pub enum GetFileResult { @@ -78,70 +88,143 @@ pub struct Listing { pub struct ListingEntry { pub name: String, pub url: String, - pub size: u32, + pub size: Size, pub crc32: String, pub is_dir: bool, } +pub struct Size(pub u32); + impl Cache { pub fn new(cfg: Config) -> Self { Self { - cfg, - qc: QuickCache::new(50), - } - } - - pub fn get_path(&self, query: &Query) -> PathBuf { - path!(self.cfg.load().cache_dir / format!("{}.zip", hex::encode(query.siphash()))) - } - - pub async fn get_entry(&self, api: &ArtifactApi, query: &Query) -> Result { - if query.artifact.is_some() { - let hash = query.siphash(); - let zip_path = path!(self.cfg.load().cache_dir / format!("{}.zip", hex::encode(hash))); - if !zip_path.is_file() { - let artifact = api.fetch(query).await?; - let artifact = match artifact { - ArtifactOrRun::Artifact(artifact) => artifact, - ArtifactOrRun::Run(_) => unreachable!(), - }; - api.download(&artifact, &zip_path).await?; - } - - let timeout = self - .cfg + qc: QuickCache::new(cfg.load().mem_cache_size), + lim_download: cfg .load() - .zip_timeout_ms - .map(|t| Duration::from_millis(t.into())); - let mut entry = self - .qc - .get_or_insert_async(&hash, async { - Ok::<_, Error>(Arc::new(CacheEntry::new(&zip_path, timeout).await?)) - }) - .await?; - - // Verify if the cached entry is fresh - let meta = tokio::fs::metadata(&zip_path).await?; - if meta.modified().ok() != entry.last_modified { - tracing::info!("cached file {zip_path:?} changed"); - entry = Arc::new(CacheEntry::new(&zip_path, timeout).await?); - self.qc.insert(hash, entry.clone()); - } - Ok(GetEntryResult::Entry { entry, zip_path }) - } else { - let run = api.fetch(query).await?; - let artifacts = match run { - ArtifactOrRun::Artifact(_) => unreachable!(), - ArtifactOrRun::Run(run) => run, - }; - - Ok(GetEntryResult::Artifacts(artifacts)) + .limit_artifacts_per_min + .map(|lim| RateLimiter::keyed(Quota::per_minute(lim))), + lim_gc: RateLimiter::direct(Quota::per_hour(NonZeroU32::MIN)), + cfg, } } + + pub async fn get_entry( + &self, + api: &ArtifactApi, + query: &ArtifactQuery, + ip: &IpAddr, + ) -> Result { + let subdomain = query.subdomain(); + let zip_path = path!(self.cfg.load().cache_dir / format!("{subdomain}.zip")); + let downloaded = !zip_path.is_file(); + if downloaded { + let artifact = api.fetch(query).await?; + if let Some(limiter) = &self.lim_download { + limiter.check_key(ip)?; + } + api.download(&artifact, &zip_path).await?; + } + + let timeout = self + .cfg + .load() + .zip_timeout_ms + .map(|t| Duration::from_millis(u32::from(t).into())); + let max_file_count = self.cfg.load().max_file_count; + let mut entry = self + .qc + .get_or_insert_async(&subdomain, async { + Ok::<_, Error>(Arc::new( + CacheEntry::new(&zip_path, timeout, max_file_count, query.artifact).await?, + )) + }) + .await?; + + // Verify if the cached entry is fresh + let metadata = tokio::fs::metadata(&zip_path).await?; + let modified = metadata + .modified() + .map_err(|_| Error::Internal("no file modified time".into()))?; + let accessed = metadata + .accessed() + .map_err(|_| Error::Internal("no file accessed time".into()))?; + if modified != entry.last_modified { + tracing::info!("cached file {zip_path:?} changed"); + entry = Arc::new( + CacheEntry::new(&zip_path, timeout, max_file_count, query.artifact).await?, + ); + self.qc.insert(subdomain, entry.clone()); + } + // Update last_accessed time if older than 30min + // some systems may have access time disabled and we need it to keep track of stale artifacts + let now = SystemTime::now(); + if now + .duration_since(accessed) + .map_err(|e| Error::Internal(e.to_string().into()))? + > Duration::from_secs(1800) + { + let file = std::fs::File::open(&zip_path)?; + file.set_times(FileTimes::new().set_accessed(now))?; + } + + Ok(GetEntryResult { + entry, + zip_path, + downloaded, + }) + } + + pub async fn garbage_collect(&self) -> Result<()> { + if self.lim_gc.check().is_err() { + return Ok(()); + } + tracing::info!("starting garbage collect"); + + let now = SystemTime::now(); + let max_age = Duration::from_secs(u64::from(u32::from(self.cfg.load().max_age_h)) * 3600); + + let mut n = 0; + let mut rd = tokio::fs::read_dir(&self.cfg.load().cache_dir).await?; + while let Some(entry) = rd.next_entry().await? { + if entry.file_type().await?.is_file() + && entry.path().extension().is_some_and(|ext| ext == "zip") + { + let accessed = entry + .metadata() + .await? + .accessed() + .map_err(|_| Error::Internal("no file accessed time".into()))?; + if now + .duration_since(accessed) + .map_err(|e| Error::Internal(e.to_string().into()))? + > max_age + { + let path = entry.path(); + if let Some(name) = path.file_stem().and_then(|s| s.to_str()) { + self.qc.remove(name); + } + tokio::fs::remove_file(path.with_extension("name")) + .await + .ignore_file_not_found()?; + tokio::fs::remove_file(&path) + .await + .ignore_file_not_found()?; + n += 1; + } + } + } + tracing::info!("garbage collect finished: {n} artifacts removed"); + Ok(()) + } } impl CacheEntry { - async fn new(zip_path: &Path, timeout: Option) -> Result { + async fn new( + zip_path: &Path, + timeout: Option, + max_file_count: Option, + artifact: u64, + ) -> Result { let meta = tokio::fs::metadata(&zip_path).await?; let zip_fut = ZipFileReader::new(&zip_path); let zip = match timeout { @@ -149,6 +232,16 @@ impl CacheEntry { None => zip_fut.await?, }; + if max_file_count.is_some_and(|lim| zip.file().entries().len() > lim.into()) { + return Err(Error::BadRequest("artifact contains too many files".into())); + } + + let name_path = zip_path.with_extension("name"); + let name = tokio::fs::read_to_string(name_path) + .await + .ok() + .unwrap_or_else(|| format!("A{artifact}")); + Ok(Self { files: zip .file() @@ -167,7 +260,10 @@ impl CacheEntry { )) }) .collect(), - last_modified: meta.modified().ok(), + name, + last_modified: meta + .modified() + .map_err(|_| Error::Internal("no file modified time".into()))?, }) } @@ -279,7 +375,7 @@ impl CacheEntry { directories.push(ListingEntry { name: n.to_owned(), url: format!("{n}{path}"), - size: 0, + size: Size(0), crc32: "-".to_string(), is_dir: true, }); @@ -287,7 +383,7 @@ impl CacheEntry { files.push(ListingEntry { name: n.to_owned(), url: format!("{n}{path}"), - size: entry.uncompressed_size, + size: Size(entry.uncompressed_size), crc32: hex::encode(entry.crc32.to_le_bytes()), is_dir: false, }); @@ -297,9 +393,9 @@ impl CacheEntry { // Sort by size if col == b'S' { if rev { - files.sort_by(|a, b| b.size.cmp(&a.size)); + files.sort_by(|a, b| b.size.0.cmp(&a.size.0)); } else { - files.sort_by_key(|f| f.size); + files.sort_by_key(|f| f.size.0); } } diff --git a/src/config.rs b/src/config.rs index e24657d..113c040 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,4 +1,5 @@ use std::{ + num::{NonZeroU32, NonZeroUsize}, path::{Path, PathBuf}, sync::Arc, }; @@ -20,14 +21,40 @@ struct ConfigInner { #[derive(Debug, Serialize, Deserialize)] #[serde(default)] pub struct ConfigData { + /// Folder where the downloaded artifacts are stored pub cache_dir: PathBuf, + /// Root domain under which the server is available + /// + /// The individual artifacts are served under `.` pub root_domain: String, + /// Set to true if the server is not available under HTTPS pub no_https: bool, - pub max_artifact_size: Option, - pub max_file_size: Option, - pub max_age_h: Option, - pub zip_timeout_ms: Option, + /// Maximum artifact (ZIP) size to be downloaded + pub max_artifact_size: Option, + /// Maximum file size to be served + pub max_file_size: Option, + /// Maximum file count within a ZIP file + pub max_file_count: Option, + /// Maximum age in hours after which artifacts are deleted + pub max_age_h: NonZeroU32, + /// Maximum time in milliseconds for reading a zip file index + pub zip_timeout_ms: Option, + /// GitHub API token for downloading GitHub artifacts + /// + /// Using a fine-grained token with public read permissions is recommended. pub github_token: Option, + /// Number of artifact indexes to keep in memory + pub mem_cache_size: usize, + /// Get the client IP address from a HTTP request header + /// + /// If Artifactview is exposed to the network directly, this option + /// has to be unset. If you are using a reverse proxy the proxy needs to + /// be configured to send the actual client IP as a request header. + /// + /// For most proxies this header is `x-forwarded-for`. + pub real_ip_header: Option, + /// Limit the amount of downloaded artifacts per IP address and minute + pub limit_artifacts_per_min: Option, } impl Default for ConfigData { @@ -36,11 +63,15 @@ impl Default for ConfigData { cache_dir: Path::new("/tmp/artifactview").into(), root_domain: "localhost:3000".to_string(), no_https: false, - max_artifact_size: Some(100_000_000), - max_file_size: Some(100_000_000), - max_age_h: Some(12), - zip_timeout_ms: Some(1000), + max_artifact_size: Some(NonZeroU32::new(100_000_000).unwrap()), + max_file_size: Some(NonZeroU32::new(100_000_000).unwrap()), + max_file_count: Some(NonZeroUsize::new(10_000).unwrap()), + max_age_h: NonZeroU32::new(12).unwrap(), + zip_timeout_ms: Some(NonZeroU32::new(1000).unwrap()), github_token: None, + mem_cache_size: 50, + real_ip_header: None, + limit_artifacts_per_min: Some(NonZeroU32::new(5).unwrap()), } } } diff --git a/src/error.rs b/src/error.rs index a00c8ba..fc54f0b 100644 --- a/src/error.rs +++ b/src/error.rs @@ -37,6 +37,8 @@ pub enum Error { Timeout(#[from] tokio::time::error::Elapsed), #[error("method not allowed")] MethodNotAllowed, + #[error("you are fetching new artifacts too fast, please wait a minute and try again")] + Ratelimit, } impl From for Error { @@ -60,6 +62,12 @@ impl From for Error { } } +impl From> for Error { + fn from(_: governor::NotUntil) -> Self { + Self::Ratelimit + } +} + impl Error { pub fn status(&self) -> StatusCode { match self { @@ -67,6 +75,7 @@ impl Error { Error::NotFound(_) | Error::Inaccessible | Error::Expired => StatusCode::NOT_FOUND, Error::HttpClient(_, status) => *status, Error::MethodNotAllowed => StatusCode::METHOD_NOT_ALLOWED, + Error::Ratelimit => StatusCode::TOO_MANY_REQUESTS, _ => StatusCode::INTERNAL_SERVER_ERROR, } } diff --git a/src/query.rs b/src/query.rs index c3de5ce..31d608d 100644 --- a/src/query.rs +++ b/src/query.rs @@ -2,13 +2,21 @@ use std::{fmt::Write, hash::Hash}; use once_cell::sync::Lazy; use regex::{Captures, Regex}; -use siphasher::sip128::{Hasher128, SipHasher}; use url::Url; use crate::error::{Error, Result}; +#[derive(Debug, PartialEq, Eq)] +pub enum Query { + Artifact(ArtifactQuery), + Run(RunQuery), +} + +pub type RunQuery = QueryData<()>; +pub type ArtifactQuery = QueryData; + #[derive(Debug, PartialEq, Eq, Hash)] -pub struct Query { +pub struct QueryData { /// Forge host pub host: String, /// User/org name (case-insensitive) @@ -17,8 +25,8 @@ pub struct Query { pub repo: String, /// CI run id pub run: u64, - /// Artifact id (unique for every run) - pub artifact: Option, + // Optional selected artifact + pub artifact: T, } static RE_REPO_NAME: Lazy = Lazy::new(|| Regex::new("^[A-z0-9\\-_\\.]+$").unwrap()); @@ -35,15 +43,26 @@ impl Query { return Err(Error::InvalidUrl); } - Ok(Self { - host: Self::decode_domain(segments[0], '.'), - user: Self::decode_domain(segments[1], '-'), - repo: Self::decode_domain(segments[2], '-'), - run: run_and_artifact[0].parse().ok().ok_or(Error::InvalidUrl)?, - artifact: match run_and_artifact.get(1) { - Some(x) => Some(x.parse().ok().ok_or(Error::InvalidUrl)?), - None => None, - }, + let host = decode_domain(segments[0], '.'); + let user = decode_domain(segments[1], '-'); + let repo = decode_domain(segments[2], '-'); + let run = run_and_artifact[0].parse().ok().ok_or(Error::InvalidUrl)?; + + Ok(match run_and_artifact.get(1) { + Some(x) => Self::Artifact(QueryData { + host, + user, + repo, + run, + artifact: x.parse().ok().ok_or(Error::InvalidUrl)?, + }), + None => Self::Run(QueryData { + host, + user, + repo, + run, + artifact: (), + }), }) } @@ -78,25 +97,56 @@ impl Query { .and_then(|s| s.parse::().ok()) .ok_or(Error::BadRequest("no run ID".into()))?; - Ok(Self { + Ok(Self::Run(RunQuery { host: host.to_owned(), user: user.to_owned(), repo: repo.to_owned(), run, - artifact: None, - }) + artifact: (), + })) } pub fn subdomain(&self) -> String { - self.subdomain_with_artifact(self.artifact) + match self { + Query::Artifact(q) => q.subdomain(), + Query::Run(q) => q.subdomain(), + } } + pub fn into_runquery(self) -> RunQuery { + match self { + Query::Artifact(q) => q.into_runquery(), + Query::Run(q) => q, + } + } + + pub fn try_into_artifactquery(self) -> Result { + match self { + Query::Artifact(q) => Ok(q), + Query::Run(_) => Err(Error::BadRequest("no artifact specified".into())), + } + } +} + +impl ArtifactQuery { + pub fn subdomain(&self) -> String { + self.subdomain_with_artifact(Some(self.artifact)) + } +} + +impl RunQuery { + pub fn subdomain(&self) -> String { + self.subdomain_with_artifact(None) + } +} + +impl QueryData { pub fn subdomain_with_artifact(&self, artifact: Option) -> String { let mut res = format!( "{}--{}--{}--{}", - Self::encode_domain(&self.host, '.'), - Self::encode_domain(&self.user, '-'), - Self::encode_domain(&self.repo, '-'), + encode_domain(&self.host, '.'), + encode_domain(&self.user, '-'), + encode_domain(&self.repo, '-'), self.run, ); if let Some(artifact) = artifact { @@ -120,82 +170,86 @@ impl Query { self.host == "github.com" } - pub fn siphash(&self) -> [u8; 16] { - let mut h = SipHasher::new(); - self.hash(&mut h); - h.finish128().as_bytes() - } - - fn encode_domain(s: &str, bias: char) -> String { - // Check if the character at the given position is in the middle of the string - // and it is not followed by escape seq numbers or further escapable characters - let is_mid_single = |pos: usize| -> bool { - if pos == 0 || pos >= (s.len() - 1) { - return false; - } - let next_char = s[pos..].chars().nth(1).unwrap(); - !('0'..='2').contains(&next_char) && !matches!(next_char, '-' | '.' | '_') - }; - - // Escape dashes - let mut buf = String::with_capacity(s.len()); - let mut last_pos = 0; - for (pos, c) in s.match_indices('-') { - buf += &s[last_pos..pos]; - if bias == '-' && is_mid_single(pos) { - buf.push('-'); - } else { - buf += "-1"; - } - last_pos = pos + c.len(); + pub fn into_runquery(self) -> RunQuery { + RunQuery { + host: self.host, + user: self.user, + repo: self.repo, + run: self.run, + artifact: (), } - buf += &s[last_pos..]; - - // Replace special chars [._] - let mut buf2 = String::with_capacity(buf.len()); - last_pos = 0; - for (pos, c) in buf.match_indices(['.', '_']) { - buf2 += &buf[last_pos..pos]; - let cchar = c.chars().next().unwrap(); - if cchar == bias && is_mid_single(pos) { - buf2.push('-'); - } else if cchar == '.' { - buf2 += "-0" - } else { - buf2 += "-2" - } - last_pos = pos + c.len(); - } - buf2 += &buf[last_pos..]; - - buf2 } +} - fn decode_domain(s: &str, bias: char) -> String { - static ESCAPE_PATTEN: Lazy = Lazy::new(|| Regex::new("-([0-2])").unwrap()); - static SINGLE_DASHES: Lazy = Lazy::new(|| Regex::new("-([^0-2-])").unwrap()); +fn encode_domain(s: &str, bias: char) -> String { + // Check if the character at the given position is in the middle of the string + // and it is not followed by escape seq numbers or further escapable characters + let is_mid_single = |pos: usize| -> bool { + if pos == 0 || pos >= (s.len() - 1) { + return false; + } + let next_char = s[pos..].chars().nth(1).unwrap(); + !('0'..='2').contains(&next_char) && !matches!(next_char, '-' | '.' | '_') + }; - let repl = ESCAPE_PATTEN.replace_all(s, |c: &Captures| { - match &c[1] { - "1" => "\0", // Temporary character (to be replaced with -) - "0" => ".", - _ => "_", - } - }); - - let repl2 = if bias == '-' { - repl + // Escape dashes + let mut buf = String::with_capacity(s.len()); + let mut last_pos = 0; + for (pos, c) in s.match_indices('-') { + buf += &s[last_pos..pos]; + if bias == '-' && is_mid_single(pos) { + buf.push('-'); } else { - SINGLE_DASHES.replace_all(&repl, |c: &Captures| bias.to_string() + &c[1]) - }; - - repl2.replace('\0', "-") + buf += "-1"; + } + last_pos = pos + c.len(); } + buf += &s[last_pos..]; + + // Replace special chars [._] + let mut buf2 = String::with_capacity(buf.len()); + last_pos = 0; + for (pos, c) in buf.match_indices(['.', '_']) { + buf2 += &buf[last_pos..pos]; + let cchar = c.chars().next().unwrap(); + if cchar == bias && is_mid_single(pos) { + buf2.push('-'); + } else if cchar == '.' { + buf2 += "-0" + } else { + buf2 += "-2" + } + last_pos = pos + c.len(); + } + buf2 += &buf[last_pos..]; + + buf2 +} + +fn decode_domain(s: &str, bias: char) -> String { + static ESCAPE_PATTEN: Lazy = Lazy::new(|| Regex::new("-([0-2])").unwrap()); + static SINGLE_DASHES: Lazy = Lazy::new(|| Regex::new("-([^0-2-])").unwrap()); + + let repl = ESCAPE_PATTEN.replace_all(s, |c: &Captures| { + match &c[1] { + "1" => "\0", // Temporary character (to be replaced with -) + "0" => ".", + _ => "_", + } + }); + + let repl2 = if bias == '-' { + repl + } else { + SINGLE_DASHES.replace_all(&repl, |c: &Captures| bias.to_string() + &c[1]) + }; + + repl2.replace('\0', "-") } #[cfg(test)] mod tests { - use super::Query; + use super::{ArtifactQuery, Query}; use proptest::prelude::*; use rstest::rstest; @@ -206,7 +260,7 @@ mod tests { #[case("_h--de.x-u", '.', "-2h-1-1de-x-1u")] #[case("0-0", '-', "0-10")] fn encode_domain(#[case] s: &str, #[case] bias: char, #[case] expect: &str) { - assert_eq!(Query::encode_domain(s, bias), expect); + assert_eq!(super::encode_domain(s, bias), expect); } #[rstest] @@ -215,14 +269,14 @@ mod tests { #[case("-2h-1-1de-x-1u", '.', "_h--de.x-u")] #[case("0-10", '-', "0-0")] fn decode_domain(#[case] s: &str, #[case] bias: char, #[case] expect: &str) { - assert_eq!(Query::decode_domain(s, bias), expect); + assert_eq!(super::decode_domain(s, bias), expect); } proptest! { #[test] fn pt_encode_domain_roundtrip(s in "[a-z0-9\\-_\\.]+") { - let enc = Query::encode_domain(&s, '-'); - let dec = Query::decode_domain(&enc, '-'); + let enc = super::encode_domain(&s, '-'); + let dec = super::decode_domain(&enc, '-'); assert_eq!(dec, s); assert!(!enc.contains("--"), "got: `{s}` -> `{enc}`"); } @@ -234,27 +288,14 @@ mod tests { let query = Query::from_subdomain(d1).unwrap(); assert_eq!( query, - Query { + Query::Artifact(ArtifactQuery { host: "github.com".to_owned(), user: "thetadev".to_owned(), repo: "newpipe-extractor".to_owned(), run: 14, - artifact: Some(123), - } + artifact: 123 + }) ); assert_eq!(query.subdomain(), d1); } - - #[test] - fn siphash() { - let q = Query { - host: "github.com".to_owned(), - user: "thetadev".to_owned(), - repo: "newpipe-extractor".to_owned(), - run: 14, - artifact: Some(123), - }; - let hash = q.siphash(); - assert_eq!(hex::encode(hash), "e523468ef42c848155a43f40895dff5a"); - } } diff --git a/src/templates.rs b/src/templates.rs index cde8110..a1b042d 100644 --- a/src/templates.rs +++ b/src/templates.rs @@ -1,15 +1,14 @@ -use crate::{artifact_api::Artifact, cache::ListingEntry, config::Config, query::Query}; +use crate::{ + artifact_api::Artifact, + cache::{ListingEntry, Size}, + config::Config, + query::QueryData, +}; use yarte::{Render, Template}; #[derive(Default)] pub struct Version; -impl Render for Version { - fn render(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { - f.write_str(env!("CARGO_PKG_VERSION")) - } -} - #[derive(Template, Default)] #[template(path = "index")] pub struct Index { @@ -27,9 +26,11 @@ pub struct Error<'a> { #[template(path = "selection")] pub struct Selection<'a> { pub main_url: &'a str, + pub version: Version, pub run_url: &'a str, pub run_name: &'a str, - pub artifacts: Vec, + pub publisher: LinkItem, + pub artifacts: Vec, } #[derive(Template)] @@ -37,6 +38,7 @@ pub struct Selection<'a> { pub struct Listing<'a> { pub main_url: &'a str, pub version: Version, + pub run_url: &'a str, pub artifact_name: &'a str, pub path_components: Vec, pub n_dirs: usize, @@ -50,11 +52,38 @@ pub struct LinkItem { pub url: String, } -impl LinkItem { - pub fn from_artifact(artifact: Artifact, query: &Query, cfg: &Config) -> Self { +pub struct ArtifactItem { + pub name: String, + pub url: String, + pub size: Size, + pub expired: bool, + pub download_url: String, +} + +impl ArtifactItem { + pub fn from_artifact(artifact: Artifact, query: &QueryData, cfg: &Config) -> Self { Self { name: artifact.name, url: cfg.url_with_subdomain(&query.subdomain_with_artifact(Some(artifact.id))), + size: Size(artifact.size as u32), + expired: artifact.expired, + download_url: artifact.user_download_url.unwrap_or(artifact.download_url), } } } + +impl Render for Version { + fn render(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.write_str(env!("CARGO_PKG_VERSION")) + } +} + +impl Render for Size { + fn render(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + write!( + f, + "{}", + humansize::SizeFormatter::new(self.0, humansize::DECIMAL), + ) + } +} diff --git a/src/util.rs b/src/util.rs index 685dfb2..380e12d 100644 --- a/src/util.rs +++ b/src/util.rs @@ -1,7 +1,14 @@ -use std::io::SeekFrom; +use std::{ + io::SeekFrom, + net::{IpAddr, SocketAddr}, + str::FromStr, +}; use async_zip::error::ZipError; -use axum::{extract::Request, http::HeaderMap}; +use axum::{ + extract::{ConnectInfo, Request}, + http::HeaderMap, +}; use headers::{Header, HeaderMapExt}; use http::header; use mime_guess::Mime; @@ -136,6 +143,42 @@ pub fn get_subdomain<'a>(host: &'a str, root_domain: &str) -> Result<&'a str> { Ok(stripped.trim_end_matches('.')) } +pub fn get_ip_address(request: &Request, real_ip_header: Option<&str>) -> Result { + match real_ip_header.and_then(|header| { + request + .headers() + .get(header) + .and_then(|val| val.to_str().ok()) + .and_then(|val| IpAddr::from_str(val).ok()) + }) { + Some(from_header) => Ok(from_header), + None => { + let socket_addr = request + .extensions() + .get::>() + .ok_or(Error::Internal("could get request ip address".into()))? + .0; + Ok(socket_addr.ip()) + } + } +} + +pub trait IgnoreFileNotFound { + fn ignore_file_not_found(self) -> core::result::Result<(), std::io::Error>; +} + +impl IgnoreFileNotFound for core::result::Result { + fn ignore_file_not_found(self) -> core::result::Result<(), std::io::Error> { + match self { + Ok(_) => Ok(()), + Err(e) => match e.kind() { + std::io::ErrorKind::NotFound => Ok(()), + _ => todo!(), + }, + } + } +} + #[cfg(test)] mod tests { use http::{header, HeaderMap}; diff --git a/templates/error.hbs b/templates/error.hbs index 63f7d40..2e42d4c 100644 --- a/templates/error.hbs +++ b/templates/error.hbs @@ -1,40 +1,46 @@ - + Artifactview + -
- -

Error {{status}}

-

{{msg}}

+
+ +
+
+
+

Error {{status}}

+

{{msg}}

+
diff --git a/templates/index.hbs b/templates/index.hbs index 8cbf827..94580f7 100644 --- a/templates/index.hbs +++ b/templates/index.hbs @@ -1,98 +1,72 @@ - Artifactview -
- -

Enter a GitHub/Gitea/Forgejo Actions run url to browse CI artifacts

-
- - -
- +
+ +
+
+
+

Enter a GitHub/Gitea/Forgejo Actions run url to browse CI artifacts

+
+ + +
+
+
+ + Artifactview + + {{version}} +

+ Disclaimer: + Artifactview does not host any websites, the data is fetched from the respective + software forge and is only stored temporarily on this server. The publisher of + the artifact is the only one responsible for the content. Most forges delete + artifacts after 90 days. +

+
diff --git a/templates/listing.hbs b/templates/listing.hbs index 85b9f10..635c9f6 100644 --- a/templates/listing.hbs +++ b/templates/listing.hbs @@ -1,82 +1,174 @@ - - - - - - -Index of {{artifact_name}} - - + + + + + + Index: + {{artifact_name}} + + - - + + -
- - - -

- {{#each path_components}}{{this.name}} /{{/each}} -

-
+
+ + + +

+ {{#each path_components}}{{this.name}} /{{/each}} +

+
+
+
+
+ {{n_dirs}} director{{#if n_dirs != 1}}ies{{else}}y{{/if}} + {{n_files}} file{{#if n_files != 1}}s{{/if}} + CI run + +
+
+
+ + + + + + + + + + {{#if has_parent}} + + + + + + {{/if}} + {{#each entries}} + + + + + + {{/each}} + +
Name  ↓ Size  ↓ CRC32
Parent directory
+ + + {{this.name}} + + {{#if this.is_dir}}—{{else}}{{this.size}}{{/if}}{{#if this.is_dir}}—{{else}}{{this.crc32}}{{/if}}
+
+
-
-
-
-{{n_dirs}} directories -{{n_files}} files - -
-
-
- - - - - - - - -{{#if has_parent}} - - - - - - -{{/if}} -{{#each entries}} - - - - - -{{/each}} - -
Name  ↓ Size  ↓ CRC32
Parent directory
- - - {{this.name}} - - {{#if this.is_dir}}—{{else}}{{this.size}}{{/if}}{{#if this.is_dir}}—{{else}}{{this.crc32}}{{/if}}
-
-
+ + - + + diff --git a/templates/selection.hbs b/templates/selection.hbs index be1b878..12ab94f 100644 --- a/templates/selection.hbs +++ b/templates/selection.hbs @@ -1,49 +1,186 @@ - - + - - - - Artifactview + + Artifacts: + {{run_name}} + - -
- - + + + + +
+ + -

CI artifacts for {{run_name}}:

- {{#each artifacts}} - {{this.name}} - {{/each}} -
+

+ {{run_name}} + / +

+ + +
+
+
+ {{artifacts.len()}} artifact{{#if artifacts.len() != 1}}s{{/if}} + CI run + +
+
+
+ + + + + + + + + + {{#each artifacts}} + + {{#if this.expired}} + + {{else}} + + {{/if}} + + + + {{/each}} + +
ArtifactSizeDownload
+ + {{this.name}} + + + + {{this.name}} + + {{this.size}} + {{#if this.expired}} + — + {{else}} + Download + {{/if}} +
+
+
+ +
+ Served with + Artifactview + {{version}} +

+ Disclaimer: Artifactview does not host any websites, the data is fetched + from the respective software forge and is only stored temporarily on this server. + The publisher of this artifact, + {{publisher.name}}, + is the only one responsible for the content. + Most forges delete artifacts after 90 days. +

+
+ +