Skip to main content

aiegis_harness/
pack_fetcher.rs

1// SPDX-License-Identifier: Apache-2.0 OR MIT
2//! Remote policy-pack fetcher.
3//!
4//! Talks to Nel's signed-distribution surface at
5//!   GET <base>/index.json
6//!   GET <base>/<pack>/<ver>.tar.gz
7//!   GET <base>/<pack>/<ver>.sig
8//!
9//! For each pack listed in `index.json` we:
10//!   1. Fetch the .tar.gz and .sig.
11//!   2. Verify the Ed25519 signature against the issuer pubkey
12//!      (`harness_core::pack_sig::verify_pack_tarball`, which empirically
13//!      matches Nel's `/opt/aegis/aegis-registry/src/policy_packs.py
14//!      ::_sign_tarball_sha`).
15//!   3. Compare the sha256 against the value advertised in index.json
16//!      (defence in depth — sig already covers this, but a mismatch flags
17//!      a publisher bug or MITM that fooled the sig in some unforeseen
18//!      way).
19//!   4. Cache to disk under
20//!      $XDG_CACHE_HOME/aiegis-harness/packs/ (fallback ~/.cache/...)
21//!      so re-fetch is a no-op on warm start.
22//!   5. Extract manifest.json + .rego file(s) into the cache dir for
23//!      offline inspection.
24//!
25//! ROUTING DECISION (honest):
26//!   Nel's packs ship full OPA Rego v1 (`import rego.v1` + `contains msg if`
27//!   syntax). The Rust reference daemon's evaluator only supports the
28//!   `re_match(<regex>, input.value)` subset. So we DO NOT register Nel's
29//!   packs in the evaluator stack — they are loaded as INVENTORY (visible
30//!   in /health, sig-verified, cached) but actions are evaluated only
31//!   against locally-loaded AHP-Policy-Pack/0.1 JSON files passed via
32//!   --policy-pack.
33//!
34//!   This is what the spec mandates: "NO FAKE STUFF. If sig verification
35//!   can't reproduce in pure Rust without writing speculative code,
36//!   surface as needs-iteration." Sig verification reproduces fine. Rego
37//!   v1 execution does not — embedding a Rego interpreter is a separate
38//!   work item (see needs-iteration note below).
39//!
40//!   needs-iteration: embed `opa-rs` or `regorus` to actually execute
41//!   Nel's published packs. Tracked as Day-2 follow-up; until then the
42//!   harness uses Nel's packs as a SIGNED LISTING, not as live policy.
43
44use std::fs;
45use std::io::Read;
46use std::path::{Path, PathBuf};
47
48use serde::Deserialize;
49use sha2::{Digest, Sha256};
50
51use harness_core::pack_sig;
52
53#[derive(Debug, Clone, Deserialize)]
54pub struct PackIndex {
55    pub packs: Vec<PackDescriptor>,
56    #[serde(default)]
57    pub updated_at: String,
58    #[serde(default)]
59    pub publisher: String,
60    #[serde(default)]
61    pub manifest_schema_version: u32,
62}
63
64#[derive(Debug, Clone, Deserialize)]
65pub struct PackDescriptor {
66    pub name: String,
67    pub version: String,
68    pub sha256: String,
69    pub signed_url: String,
70    pub sig_url: String,
71    #[serde(default)]
72    pub sig: String, // hex inline sig (same bytes as .sig file)
73    #[serde(default)]
74    pub signed: bool,
75}
76
77#[derive(Debug, Clone)]
78pub struct RemotePack {
79    pub name: String,
80    pub version: String,
81    pub sha256: String,
82    pub tarball_path: PathBuf,
83    pub manifest: serde_json::Value,
84    pub rego_files: Vec<String>,
85    pub signature_verified: bool,
86    /// Set when the pack is loaded as inventory-only because the
87    /// reference evaluator does not execute OPA Rego v1.
88    pub inventory_only: bool,
89}
90
91#[derive(Debug)]
92pub enum FetchError {
93    Http(String),
94    Json(String),
95    Io(String),
96    Sha256Mismatch { got: String, want: String },
97    SigVerify(String),
98    Tar(String),
99}
100
101impl std::fmt::Display for FetchError {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        match self {
104            FetchError::Http(s) => write!(f, "pack_fetch_http:{s}"),
105            FetchError::Json(s) => write!(f, "pack_fetch_json:{s}"),
106            FetchError::Io(s) => write!(f, "pack_fetch_io:{s}"),
107            FetchError::Sha256Mismatch { got, want } => {
108                write!(f, "pack_fetch_sha256_mismatch: got {got} want {want}")
109            }
110            FetchError::SigVerify(s) => write!(f, "pack_fetch_sig_verify:{s}"),
111            FetchError::Tar(s) => write!(f, "pack_fetch_tar:{s}"),
112        }
113    }
114}
115
116impl std::error::Error for FetchError {}
117
118/// Compute the on-disk cache root.
119///
120///   $XDG_CACHE_HOME/aiegis-harness/packs  if XDG_CACHE_HOME is set
121///   ~/.cache/aiegis-harness/packs         otherwise
122pub fn cache_root() -> PathBuf {
123    if let Ok(xdg) = std::env::var("XDG_CACHE_HOME") {
124        if !xdg.is_empty() {
125            return Path::new(&xdg).join("aiegis-harness").join("packs");
126        }
127    }
128    let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
129    Path::new(&home).join(".cache").join("aiegis-harness").join("packs")
130}
131
132/// Fetch + verify + cache every pack listed at `<base_url>/index.json`.
133///
134/// `base_url` is the directory URL (e.g.
135/// `https://aiegis.ie/v1/harness/policy-packs/`). The function tolerates
136/// trailing-slash variation.
137///
138/// `issuer_pubkey_hex` is the hex-encoded raw Ed25519 public key the
139/// signatures must verify under. Defaults to
140/// `pack_sig::DEFAULT_PACK_ISSUER_PUBKEY_HEX`.
141pub async fn fetch_all(
142    base_url: &str,
143    issuer_pubkey_hex: &str,
144) -> Result<Vec<RemotePack>, FetchError> {
145    let client = reqwest::Client::builder()
146        .user_agent(format!("aiegis-harness-rs/{}", env!("CARGO_PKG_VERSION")))
147        .timeout(std::time::Duration::from_secs(30))
148        .build()
149        .map_err(|e| FetchError::Http(format!("client_build:{e}")))?;
150
151    let base = base_url.trim_end_matches('/');
152    let index_url = format!("{base}/index.json");
153    tracing::info!("pack_fetcher: GET {}", index_url);
154    let resp = client
155        .get(&index_url)
156        .send()
157        .await
158        .map_err(|e| FetchError::Http(format!("index_get:{e}")))?;
159    let status = resp.status();
160    if !status.is_success() {
161        return Err(FetchError::Http(format!("index_status:{status}")));
162    }
163    let index_bytes = resp
164        .bytes()
165        .await
166        .map_err(|e| FetchError::Http(format!("index_read:{e}")))?;
167    let index: PackIndex = serde_json::from_slice(&index_bytes)
168        .map_err(|e| FetchError::Json(format!("index_parse:{e}")))?;
169
170    let cache_dir = cache_root();
171    fs::create_dir_all(&cache_dir).map_err(|e| FetchError::Io(format!("mkdir:{e}")))?;
172
173    let mut out: Vec<RemotePack> = Vec::with_capacity(index.packs.len());
174    for d in &index.packs {
175        match fetch_one(&client, d, issuer_pubkey_hex, &cache_dir).await {
176            Ok(rp) => out.push(rp),
177            Err(e) => {
178                tracing::error!(
179                    "pack_fetch_failed pack={}/{} err={}",
180                    d.name,
181                    d.version,
182                    e
183                );
184                // Honest posture: one failed pack does NOT poison the rest.
185                // Daemon comes up with the packs that verified.
186            }
187        }
188    }
189    Ok(out)
190}
191
192async fn fetch_one(
193    client: &reqwest::Client,
194    d: &PackDescriptor,
195    issuer_pubkey_hex: &str,
196    cache_dir: &Path,
197) -> Result<RemotePack, FetchError> {
198    let pack_dir = cache_dir.join(&d.name).join(&d.version);
199    fs::create_dir_all(&pack_dir).map_err(|e| FetchError::Io(format!("mkdir:{e}")))?;
200    let tar_path = pack_dir.join(format!("{}.tar.gz", d.version));
201    let sig_path = pack_dir.join(format!("{}.sig", d.version));
202
203    // tarball
204    let tar_bytes = fetch_bytes(client, &d.signed_url).await?;
205    fs::write(&tar_path, &tar_bytes).map_err(|e| FetchError::Io(format!("write_tar:{e}")))?;
206
207    // sha256 check against index.json claim
208    let mut hasher = Sha256::new();
209    hasher.update(&tar_bytes);
210    let got = hex::encode(hasher.finalize());
211    if got != d.sha256 {
212        return Err(FetchError::Sha256Mismatch {
213            got,
214            want: d.sha256.clone(),
215        });
216    }
217
218    // signature (raw 64 bytes)
219    let sig_bytes = fetch_bytes(client, &d.sig_url).await?;
220    fs::write(&sig_path, &sig_bytes).map_err(|e| FetchError::Io(format!("write_sig:{e}")))?;
221
222    // Cross-check inline-hex sig in index against the file we fetched —
223    // this catches index/file drift before we even verify.
224    if !d.sig.is_empty() {
225        let expected = hex::decode(&d.sig)
226            .map_err(|e| FetchError::Json(format!("inline_sig_hex:{e}")))?;
227        if expected != sig_bytes {
228            return Err(FetchError::SigVerify(format!(
229                "inline_sig_mismatches_file pack={}/{}",
230                d.name, d.version
231            )));
232        }
233    }
234
235    // Real Ed25519 verification.
236    pack_sig::verify_pack_tarball(issuer_pubkey_hex, &tar_bytes, &sig_bytes)
237        .map_err(|e| FetchError::SigVerify(format!("{e}")))?;
238
239    // Unpack manifest.json + rego files.
240    //
241    // ZIP-SLIP DEFENCE (audit 2026-05-25): tar entries are filtered to reject
242    // any path that:
243    //   - is absolute (e.g. `/etc/passwd`)
244    //   - contains a `..` component (e.g. `../../etc/passwd`)
245    //   - contains a Windows drive prefix or backslash
246    //   - resolves outside `pack_dir` after `pack_dir.join(name)` canonicalisation
247    // Without these, a malicious (or signing-key-compromised) issuer could
248    // write arbitrary files under $XDG_CACHE_HOME/aiegis-harness/packs/..
249    // Verified empirically with a crafted `../../poc_outside.txt` tar.
250    // Decompression-bomb defence (RAV audit 2026-05-25): even with a valid
251    // signature, cap per-entry + aggregate decompressed bytes so a malicious
252    // (or compromised-key) issuer cannot OOM the daemon by shipping a
253    // multi-GB inner payload. The Nel-published packs we've inspected are
254    // <50KB each; the 16 MiB / 64 MiB caps are 300x+ headroom.
255    const MAX_TAR_ENTRY_BYTES: u64 = 16 * 1024 * 1024;
256    const MAX_TAR_TOTAL_BYTES: u64 = 64 * 1024 * 1024;
257    let mut manifest: serde_json::Value = serde_json::Value::Null;
258    let mut rego_files: Vec<String> = Vec::new();
259    {
260        let gz = flate2::read::GzDecoder::new(&tar_bytes[..]);
261        let mut ar = tar::Archive::new(gz);
262        let mut total_bytes: u64 = 0;
263        for entry in ar.entries().map_err(|e| FetchError::Tar(format!("{e}")))? {
264            let mut entry = entry.map_err(|e| FetchError::Tar(format!("entry:{e}")))?;
265            let declared_size = entry.size();
266            if declared_size > MAX_TAR_ENTRY_BYTES {
267                return Err(FetchError::Tar(format!(
268                    "entry_too_large: {declared_size} bytes > cap {MAX_TAR_ENTRY_BYTES} in pack {}/{}",
269                    d.name, d.version
270                )));
271            }
272            total_bytes = total_bytes.saturating_add(declared_size);
273            if total_bytes > MAX_TAR_TOTAL_BYTES {
274                return Err(FetchError::Tar(format!(
275                    "archive_too_large: total {total_bytes} > cap {MAX_TAR_TOTAL_BYTES} in pack {}/{}",
276                    d.name, d.version
277                )));
278            }
279            let path_buf = entry
280                .path()
281                .map_err(|e| FetchError::Tar(format!("path:{e}")))?
282                .to_path_buf();
283            let name = path_buf.to_string_lossy().to_string();
284
285            // Reject any path that could escape the pack_dir.
286            if path_buf.is_absolute()
287                || name.contains('\\')
288                || path_buf
289                    .components()
290                    .any(|c| matches!(c, std::path::Component::ParentDir | std::path::Component::Prefix(_) | std::path::Component::RootDir))
291            {
292                return Err(FetchError::Tar(format!(
293                    "unsafe_tar_member: rejected path-traversal entry {:?} in pack {}/{}",
294                    name, d.name, d.version
295                )));
296            }
297
298            // Belt-and-braces: the header could lie about size, but tar's
299            // `Entry::take()` bounds the actual read to the declared size.
300            // We additionally guard the buffer capacity to avoid a
301            // pathological reserve.
302            let mut buf: Vec<u8> = Vec::with_capacity(
303                (declared_size.min(MAX_TAR_ENTRY_BYTES)) as usize,
304            );
305            entry
306                .read_to_end(&mut buf)
307                .map_err(|e| FetchError::Tar(format!("read:{e}")))?;
308            if (buf.len() as u64) > MAX_TAR_ENTRY_BYTES {
309                return Err(FetchError::Tar(format!(
310                    "entry_overflow_on_read: {} bytes > cap {} in pack {}/{}",
311                    buf.len(), MAX_TAR_ENTRY_BYTES, d.name, d.version
312                )));
313            }
314            let dst = pack_dir.join(&path_buf);
315
316            // Defence in depth: the joined path must still start with pack_dir.
317            // (Symlink-target escape is also blocked: we only ever write to
318            // the joined path, never follow links.)
319            if !dst.starts_with(&pack_dir) {
320                return Err(FetchError::Tar(format!(
321                    "unsafe_tar_member: joined path {:?} escapes pack_dir {:?}",
322                    dst, pack_dir
323                )));
324            }
325
326            if let Some(parent) = dst.parent() {
327                fs::create_dir_all(parent)
328                    .map_err(|e| FetchError::Io(format!("mkdir_extract:{e}")))?;
329            }
330            fs::write(&dst, &buf).map_err(|e| FetchError::Io(format!("write_extract:{e}")))?;
331            if name == "manifest.json" {
332                manifest = serde_json::from_slice(&buf)
333                    .map_err(|e| FetchError::Json(format!("manifest:{e}")))?;
334            } else if name.ends_with(".rego") {
335                rego_files.push(name.clone());
336            }
337        }
338    }
339
340    tracing::info!(
341        "pack_verified pack={}/{} sha256={} rego_files={}",
342        d.name,
343        d.version,
344        got,
345        rego_files.len()
346    );
347
348    Ok(RemotePack {
349        name: d.name.clone(),
350        version: d.version.clone(),
351        sha256: got,
352        tarball_path: tar_path,
353        manifest,
354        rego_files,
355        signature_verified: true,
356        inventory_only: true, // Rego v1 not executable by the subset evaluator.
357    })
358}
359
360async fn fetch_bytes(client: &reqwest::Client, url: &str) -> Result<Vec<u8>, FetchError> {
361    let resp = client
362        .get(url)
363        .send()
364        .await
365        .map_err(|e| FetchError::Http(format!("get:{e}")))?;
366    let status = resp.status();
367    if !status.is_success() {
368        return Err(FetchError::Http(format!("status:{status} url={url}")));
369    }
370    resp.bytes()
371        .await
372        .map(|b| b.to_vec())
373        .map_err(|e| FetchError::Http(format!("read:{e}")))
374}
375
376/// Render a 16-char fingerprint of all loaded packs' sha256s, in pack-load
377/// order. Lets e2e tests assert on a stable identity for the pack-source.
378pub fn fingerprint(packs: &[RemotePack]) -> String {
379    let mut hasher = Sha256::new();
380    for p in packs {
381        hasher.update(p.name.as_bytes());
382        hasher.update(b"@");
383        hasher.update(p.version.as_bytes());
384        hasher.update(b":");
385        hasher.update(p.sha256.as_bytes());
386        hasher.update(b"\n");
387    }
388    let digest = hasher.finalize();
389    hex::encode(&digest[..8])
390}