| |
| """Audit the public WildFIRE-FM release before upload.""" |
|
|
| from __future__ import annotations |
|
|
| import json |
| import re |
| from pathlib import Path |
|
|
|
|
| ROOT = Path(__file__).resolve().parents[1] |
|
|
| REQUIRED = [ |
| "README.md", |
| "LICENSE", |
| "requirements.txt", |
| "data_sources/DATA_SOURCES.md", |
| "models/wildfire_fm/README.md", |
| "models/wildfire_fm/modeling_unet.py", |
| "models/wildfire_fm/checkpoint_manifest.json", |
| "paper/wildfire_fm_evaluation_contracts.pdf", |
| "paper_outputs/figures/overview_wildfire.pdf", |
| "paper_outputs/figures/matching.pdf", |
| "paper_outputs/figures/fig_task_contract_tiles.pdf", |
| "paper_outputs/figures/fig_primary_rank_change_map.pdf", |
| "paper_outputs/figures/fig_selection_regret_scatter.pdf", |
| "paper_outputs/figures/fig_rank_heatmap1.pdf", |
| "assets/wildfire_fm_model_card.svg", |
| "assets/release_contents.svg", |
| "assets/selection_regret_final.png", |
| "assets/supporting_rank_map_final.png", |
| "assets/primary_rank_change_final.png", |
| "artifacts/manifests/paper_outputs.sha256", |
| "scripts/check_paper_output_hashes.py", |
| ] |
|
|
| FORBIDDEN_FILE_SUFFIXES = {".tex", ".bib", ".tikz"} |
| FORBIDDEN_FILE_NAMES = {"manuscript_final.pdf"} |
|
|
| FORBIDDEN_TEXT = [ |
| "/home/yx21e", |
| "/blue/", |
| "/orange/", |
| "fsu-compsci", |
| "TBD", |
| "N/A", |
| "Pangu24", |
| ] |
|
|
| TEXT_SUFFIXES = {".md", ".py", ".sh", ".tex", ".csv", ".json", ".yml", ".yaml", ".txt"} |
| SKIP_FOR_FORBIDDEN = {"audit_release.py", "build_selection_regret_rq2_figure.py"} |
|
|
|
|
| def iter_text_files() -> list[Path]: |
| out: list[Path] = [] |
| for path in ROOT.rglob("*"): |
| if ".git" in path.parts or "__pycache__" in path.parts: |
| continue |
| if path.name in SKIP_FOR_FORBIDDEN: |
| continue |
| if path.is_file() and path.suffix in TEXT_SUFFIXES: |
| out.append(path) |
| return sorted(out) |
|
|
|
|
| def main() -> None: |
| issues: list[str] = [] |
|
|
| for rel in REQUIRED: |
| if not (ROOT / rel).exists(): |
| issues.append(f"missing required file: {rel}") |
| for path in ROOT.rglob("*"): |
| if ".git" in path.parts or "__pycache__" in path.parts: |
| continue |
| if path.is_file() and (path.suffix in FORBIDDEN_FILE_SUFFIXES or path.name in FORBIDDEN_FILE_NAMES): |
| issues.append(f"forbidden manuscript/source artifact present: {path.relative_to(ROOT)}") |
|
|
| for path in iter_text_files(): |
| text = path.read_text(errors="ignore") |
| for token in FORBIDDEN_TEXT: |
| if token in text: |
| issues.append(f"{path.relative_to(ROOT)} contains forbidden token {token!r}") |
|
|
| readme = (ROOT / "README.md").read_text(errors="ignore") |
| for phrase in ["WildFIRE-FM", "Quick Load", "Data Sources", "Evaluation Snapshot"]: |
| if phrase not in readme: |
| issues.append(f"README missing expected model-card phrase: {phrase}") |
|
|
| manifest_path = ROOT / "models/wildfire_fm/checkpoint_manifest.json" |
| if manifest_path.exists(): |
| data = json.loads(manifest_path.read_text()) |
| checkpoints = data.get("checkpoints", []) |
| if len(checkpoints) != 5: |
| issues.append("checkpoint manifest should list five seeded checkpoints") |
| for item in checkpoints: |
| rel = item.get("filename", "") |
| if not rel.startswith("models/wildfire_fm/checkpoints/seed_"): |
| issues.append(f"unexpected checkpoint filename in manifest: {rel}") |
| if "source_path" in item: |
| issues.append("checkpoint manifest exposes source_path") |
| if not re.fullmatch(r"[0-9a-f]{64}", str(item.get("sha256", ""))): |
| issues.append(f"bad sha256 in checkpoint manifest: {item}") |
|
|
| for path in (ROOT / "paper_outputs/tables").glob("*.tex"): |
| text = path.read_text(errors="ignore") |
| if re.search(r"\\ms\{[^}]*\}\{0\.0000\}", text): |
| issues.append(f"{path.relative_to(ROOT)} displays zero std in an \\ms cell") |
|
|
| checksum_manifest = ROOT / "artifacts/manifests/paper_outputs.sha256" |
| if checksum_manifest.exists(): |
| listed: list[str] = [] |
| for line in checksum_manifest.read_text(errors="ignore").splitlines(): |
| if not line.strip(): |
| continue |
| parts = line.split(None, 1) |
| if len(parts) != 2: |
| issues.append(f"bad checksum manifest line: {line!r}") |
| continue |
| rel = parts[1].strip() |
| listed.append(rel) |
| if not (ROOT / rel).exists(): |
| issues.append(f"checksum manifest lists missing output: {rel}") |
| expected_paths = [] |
| for rel_root in ["paper", "paper_outputs", "assets"]: |
| root_dir = ROOT / rel_root |
| if root_dir.exists(): |
| expected_paths.extend(str(p.relative_to(ROOT)) for p in root_dir.rglob("*") if p.is_file()) |
| expected = sorted(set(expected_paths)) |
| if sorted(listed) != expected: |
| missing = sorted(set(expected) - set(listed)) |
| extra = sorted(set(listed) - set(expected)) |
| if missing: |
| issues.append(f"checksum manifest missing outputs: {missing}") |
| if extra: |
| issues.append(f"checksum manifest has extra outputs: {extra}") |
|
|
| if issues: |
| print("Release audit failed:") |
| for issue in issues: |
| print(f"- {issue}") |
| raise SystemExit(1) |
| print("Release audit passed.") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|