File size: 5,499 Bytes
80ef3b2
84b67b3
80ef3b2
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
 
 
 
 
84b67b3
 
 
0847cd0
84b67b3
80ef3b2
 
84b67b3
 
 
d3bc17d
 
 
 
 
84b67b3
80ef3b2
 
 
d3bbb53
 
80ef3b2
 
 
 
 
 
 
 
 
 
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
 
 
 
 
 
d3bbb53
 
 
 
 
80ef3b2
 
 
 
 
 
 
84b67b3
d3bc17d
84b67b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
80ef3b2
 
 
 
 
 
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
 
 
 
 
84b67b3
0847cd0
 
 
 
84b67b3
80ef3b2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python3
"""Audit the public WildFIRE-FM release before upload."""

from __future__ import annotations

import json
import re
from pathlib import Path


ROOT = Path(__file__).resolve().parents[1]

REQUIRED = [
    "README.md",
    "LICENSE",
    "requirements.txt",
    "data_sources/DATA_SOURCES.md",
    "models/wildfire_fm/README.md",
    "models/wildfire_fm/modeling_unet.py",
    "models/wildfire_fm/checkpoint_manifest.json",
    "paper/wildfire_fm_evaluation_contracts.pdf",
    "paper_outputs/figures/overview_wildfire.pdf",
    "paper_outputs/figures/matching.pdf",
    "paper_outputs/figures/fig_task_contract_tiles.pdf",
    "paper_outputs/figures/fig_primary_rank_change_map.pdf",
    "paper_outputs/figures/fig_selection_regret_scatter.pdf",
    "paper_outputs/figures/fig_rank_heatmap1.pdf",
    "assets/wildfire_fm_model_card.svg",
    "assets/release_contents.svg",
    "assets/selection_regret_final.png",
    "assets/supporting_rank_map_final.png",
    "assets/primary_rank_change_final.png",
    "artifacts/manifests/paper_outputs.sha256",
    "scripts/check_paper_output_hashes.py",
]

FORBIDDEN_FILE_SUFFIXES = {".tex", ".bib", ".tikz"}
FORBIDDEN_FILE_NAMES = {"manuscript_final.pdf"}

FORBIDDEN_TEXT = [
    "/home/yx21e",
    "/blue/",
    "/orange/",
    "fsu-compsci",
    "TBD",
    "N/A",
    "Pangu24",
]

TEXT_SUFFIXES = {".md", ".py", ".sh", ".tex", ".csv", ".json", ".yml", ".yaml", ".txt"}
SKIP_FOR_FORBIDDEN = {"audit_release.py", "build_selection_regret_rq2_figure.py"}


def iter_text_files() -> list[Path]:
    out: list[Path] = []
    for path in ROOT.rglob("*"):
        if ".git" in path.parts or "__pycache__" in path.parts:
            continue
        if path.name in SKIP_FOR_FORBIDDEN:
            continue
        if path.is_file() and path.suffix in TEXT_SUFFIXES:
            out.append(path)
    return sorted(out)


def main() -> None:
    issues: list[str] = []

    for rel in REQUIRED:
        if not (ROOT / rel).exists():
            issues.append(f"missing required file: {rel}")
    for path in ROOT.rglob("*"):
        if ".git" in path.parts or "__pycache__" in path.parts:
            continue
        if path.is_file() and (path.suffix in FORBIDDEN_FILE_SUFFIXES or path.name in FORBIDDEN_FILE_NAMES):
            issues.append(f"forbidden manuscript/source artifact present: {path.relative_to(ROOT)}")

    for path in iter_text_files():
        text = path.read_text(errors="ignore")
        for token in FORBIDDEN_TEXT:
            if token in text:
                issues.append(f"{path.relative_to(ROOT)} contains forbidden token {token!r}")

    readme = (ROOT / "README.md").read_text(errors="ignore")
    for phrase in ["WildFIRE-FM", "Quick Load", "Data Sources", "Evaluation Snapshot"]:
        if phrase not in readme:
            issues.append(f"README missing expected model-card phrase: {phrase}")

    manifest_path = ROOT / "models/wildfire_fm/checkpoint_manifest.json"
    if manifest_path.exists():
        data = json.loads(manifest_path.read_text())
        checkpoints = data.get("checkpoints", [])
        if len(checkpoints) != 5:
            issues.append("checkpoint manifest should list five seeded checkpoints")
        for item in checkpoints:
            rel = item.get("filename", "")
            if not rel.startswith("models/wildfire_fm/checkpoints/seed_"):
                issues.append(f"unexpected checkpoint filename in manifest: {rel}")
            if "source_path" in item:
                issues.append("checkpoint manifest exposes source_path")
            if not re.fullmatch(r"[0-9a-f]{64}", str(item.get("sha256", ""))):
                issues.append(f"bad sha256 in checkpoint manifest: {item}")

    for path in (ROOT / "paper_outputs/tables").glob("*.tex"):
        text = path.read_text(errors="ignore")
        if re.search(r"\\ms\{[^}]*\}\{0\.0000\}", text):
            issues.append(f"{path.relative_to(ROOT)} displays zero std in an \\ms cell")

    checksum_manifest = ROOT / "artifacts/manifests/paper_outputs.sha256"
    if checksum_manifest.exists():
        listed: list[str] = []
        for line in checksum_manifest.read_text(errors="ignore").splitlines():
            if not line.strip():
                continue
            parts = line.split(None, 1)
            if len(parts) != 2:
                issues.append(f"bad checksum manifest line: {line!r}")
                continue
            rel = parts[1].strip()
            listed.append(rel)
            if not (ROOT / rel).exists():
                issues.append(f"checksum manifest lists missing output: {rel}")
        expected_paths = []
        for rel_root in ["paper", "paper_outputs", "assets"]:
            root_dir = ROOT / rel_root
            if root_dir.exists():
                expected_paths.extend(str(p.relative_to(ROOT)) for p in root_dir.rglob("*") if p.is_file())
        expected = sorted(set(expected_paths))
        if sorted(listed) != expected:
            missing = sorted(set(expected) - set(listed))
            extra = sorted(set(listed) - set(expected))
            if missing:
                issues.append(f"checksum manifest missing outputs: {missing}")
            if extra:
                issues.append(f"checksum manifest has extra outputs: {extra}")

    if issues:
        print("Release audit failed:")
        for issue in issues:
            print(f"- {issue}")
        raise SystemExit(1)
    print("Release audit passed.")


if __name__ == "__main__":
    main()