Spaces:
Running
Running
| """Plot eval/results/summary.json into PNG images for the README. | |
| Run after `eval.eval`:: | |
| python -m eval.plot_results --in eval/results/summary.json --out-dir eval/results | |
| Generates: | |
| * `bar_dismiss_on_malicious.png` — the headline plot. | |
| * `bar_macro_f1.png` — macro F1 by model. | |
| * `confusion_<model>.png` — one heatmap per evaluated model. | |
| We use matplotlib only; no seaborn dependency. This keeps the Hugging | |
| Face Space slim and lets the plotter run on CPU only. | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import os | |
| import sys | |
| _HERE = os.path.dirname(os.path.abspath(__file__)) | |
| sys.path.insert(0, os.path.dirname(_HERE)) | |
| from eval.metrics import ALL_ACTIONS # noqa: E402 | |
| def _try_matplotlib(): | |
| try: | |
| import matplotlib | |
| matplotlib.use("Agg") | |
| import matplotlib.pyplot as plt | |
| return plt | |
| except ImportError: | |
| return None | |
| def main() -> None: | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--in", dest="inp", default="eval/results/summary.json") | |
| parser.add_argument("--out-dir", default="eval/results") | |
| args = parser.parse_args() | |
| plt = _try_matplotlib() | |
| if plt is None: | |
| sys.exit("matplotlib is required to render plots: `pip install matplotlib`") | |
| inp = os.path.join(os.path.dirname(_HERE), args.inp) | |
| out_dir = os.path.join(os.path.dirname(_HERE), args.out_dir) | |
| os.makedirs(out_dir, exist_ok=True) | |
| with open(inp, "r") as f: | |
| summaries = json.load(f) | |
| labels = [s["label"] for s in summaries] | |
| miss = [s["dismiss_on_malicious"] for s in summaries] | |
| f1s = [s["macro_f1"] for s in summaries] | |
| fig, ax = plt.subplots(figsize=(7, 4)) | |
| ax.bar(labels, miss) | |
| ax.set_ylabel("dismiss-on-malicious rate (lower is better)") | |
| ax.set_title("Missed-malicious rate by model") | |
| plt.xticks(rotation=20, ha="right") | |
| fig.tight_layout() | |
| fig.savefig(os.path.join(out_dir, "bar_dismiss_on_malicious.png"), dpi=150) | |
| plt.close(fig) | |
| fig, ax = plt.subplots(figsize=(7, 4)) | |
| ax.bar(labels, f1s) | |
| ax.set_ylabel("macro F1 (higher is better)") | |
| ax.set_title("Macro F1 by model") | |
| plt.xticks(rotation=20, ha="right") | |
| fig.tight_layout() | |
| fig.savefig(os.path.join(out_dir, "bar_macro_f1.png"), dpi=150) | |
| plt.close(fig) | |
| for s in summaries: | |
| cm = s["confusion_matrix"] | |
| rows = [[cm.get(gt, {}).get(p, 0) for p in ALL_ACTIONS] for gt in ALL_ACTIONS] | |
| fig, ax = plt.subplots(figsize=(5.5, 4.5)) | |
| im = ax.imshow(rows, cmap="Blues") | |
| ax.set_xticks(range(len(ALL_ACTIONS)), ALL_ACTIONS, rotation=25, ha="right") | |
| ax.set_yticks(range(len(ALL_ACTIONS)), ALL_ACTIONS) | |
| ax.set_xlabel("predicted") | |
| ax.set_ylabel("ground truth") | |
| ax.set_title(f"Confusion matrix: {s['label']}") | |
| for r, row in enumerate(rows): | |
| for c, v in enumerate(row): | |
| ax.text(c, r, str(v), ha="center", va="center", fontsize=8, | |
| color="white" if v > max(max(rr) for rr in rows) / 2 else "black") | |
| fig.colorbar(im, ax=ax, fraction=0.046, pad=0.04) | |
| fig.tight_layout() | |
| fig.savefig(os.path.join(out_dir, f"confusion_{s['label']}.png"), dpi=150) | |
| plt.close(fig) | |
| print(f"Wrote plots to {out_dir}") | |
| if __name__ == "__main__": | |
| main() | |