#!/usr/bin/env python3 """Audit the public WildFIRE-FM release before upload.""" from __future__ import annotations import json import re from pathlib import Path ROOT = Path(__file__).resolve().parents[1] REQUIRED = [ "README.md", "LICENSE", "requirements.txt", "data_sources/DATA_SOURCES.md", "models/wildfire_fm/README.md", "models/wildfire_fm/modeling_unet.py", "models/wildfire_fm/checkpoint_manifest.json", "paper/wildfire_fm_evaluation_contracts.pdf", "paper_outputs/figures/overview_wildfire.pdf", "paper_outputs/figures/matching.pdf", "paper_outputs/figures/fig_task_contract_tiles.pdf", "paper_outputs/figures/fig_primary_rank_change_map.pdf", "paper_outputs/figures/fig_selection_regret_scatter.pdf", "paper_outputs/figures/fig_rank_heatmap1.pdf", "assets/wildfire_fm_model_card.svg", "assets/release_contents.svg", "assets/selection_regret_final.png", "assets/supporting_rank_map_final.png", "assets/primary_rank_change_final.png", "artifacts/manifests/paper_outputs.sha256", "scripts/check_paper_output_hashes.py", ] FORBIDDEN_FILE_SUFFIXES = {".tex", ".bib", ".tikz"} FORBIDDEN_FILE_NAMES = {"manuscript_final.pdf"} FORBIDDEN_TEXT = [ "/home/yx21e", "/blue/", "/orange/", "fsu-compsci", "TBD", "N/A", "Pangu24", ] TEXT_SUFFIXES = {".md", ".py", ".sh", ".tex", ".csv", ".json", ".yml", ".yaml", ".txt"} SKIP_FOR_FORBIDDEN = {"audit_release.py", "build_selection_regret_rq2_figure.py"} def iter_text_files() -> list[Path]: out: list[Path] = [] for path in ROOT.rglob("*"): if ".git" in path.parts or "__pycache__" in path.parts: continue if path.name in SKIP_FOR_FORBIDDEN: continue if path.is_file() and path.suffix in TEXT_SUFFIXES: out.append(path) return sorted(out) def main() -> None: issues: list[str] = [] for rel in REQUIRED: if not (ROOT / rel).exists(): issues.append(f"missing required file: {rel}") for path in ROOT.rglob("*"): if ".git" in path.parts or "__pycache__" in path.parts: continue if path.is_file() and (path.suffix in FORBIDDEN_FILE_SUFFIXES or path.name in FORBIDDEN_FILE_NAMES): issues.append(f"forbidden manuscript/source artifact present: {path.relative_to(ROOT)}") for path in iter_text_files(): text = path.read_text(errors="ignore") for token in FORBIDDEN_TEXT: if token in text: issues.append(f"{path.relative_to(ROOT)} contains forbidden token {token!r}") readme = (ROOT / "README.md").read_text(errors="ignore") for phrase in ["WildFIRE-FM", "Quick Load", "Data Sources", "Evaluation Snapshot"]: if phrase not in readme: issues.append(f"README missing expected model-card phrase: {phrase}") manifest_path = ROOT / "models/wildfire_fm/checkpoint_manifest.json" if manifest_path.exists(): data = json.loads(manifest_path.read_text()) checkpoints = data.get("checkpoints", []) if len(checkpoints) != 5: issues.append("checkpoint manifest should list five seeded checkpoints") for item in checkpoints: rel = item.get("filename", "") if not rel.startswith("models/wildfire_fm/checkpoints/seed_"): issues.append(f"unexpected checkpoint filename in manifest: {rel}") if "source_path" in item: issues.append("checkpoint manifest exposes source_path") if not re.fullmatch(r"[0-9a-f]{64}", str(item.get("sha256", ""))): issues.append(f"bad sha256 in checkpoint manifest: {item}") for path in (ROOT / "paper_outputs/tables").glob("*.tex"): text = path.read_text(errors="ignore") if re.search(r"\\ms\{[^}]*\}\{0\.0000\}", text): issues.append(f"{path.relative_to(ROOT)} displays zero std in an \\ms cell") checksum_manifest = ROOT / "artifacts/manifests/paper_outputs.sha256" if checksum_manifest.exists(): listed: list[str] = [] for line in checksum_manifest.read_text(errors="ignore").splitlines(): if not line.strip(): continue parts = line.split(None, 1) if len(parts) != 2: issues.append(f"bad checksum manifest line: {line!r}") continue rel = parts[1].strip() listed.append(rel) if not (ROOT / rel).exists(): issues.append(f"checksum manifest lists missing output: {rel}") expected_paths = [] for rel_root in ["paper", "paper_outputs", "assets"]: root_dir = ROOT / rel_root if root_dir.exists(): expected_paths.extend(str(p.relative_to(ROOT)) for p in root_dir.rglob("*") if p.is_file()) expected = sorted(set(expected_paths)) if sorted(listed) != expected: missing = sorted(set(expected) - set(listed)) extra = sorted(set(listed) - set(expected)) if missing: issues.append(f"checksum manifest missing outputs: {missing}") if extra: issues.append(f"checksum manifest has extra outputs: {extra}") if issues: print("Release audit failed:") for issue in issues: print(f"- {issue}") raise SystemExit(1) print("Release audit passed.") if __name__ == "__main__": main()