#!/usr/bin/env python3 from __future__ import annotations import json from collections import defaultdict from pathlib import Path from typing import Any def build_cycle1a_report(run_dir: Path) -> dict[str, Any]: runs = [] for path in sorted(run_dir.glob("*.json")): try: payload = json.loads(path.read_text(encoding="utf-8")) except Exception: continue if isinstance(payload, dict) and "benchmark" in payload: runs.append((path.name, payload)) n_failed = sum(1 for _, payload in runs if payload.get("status") == "failed") by_benchmark: dict[str, dict[str, dict[str, float]]] = defaultdict(dict) for filename, payload in runs: if payload.get("status") == "failed": continue parts = filename.removesuffix('.json').split('_') if len(parts) < 3: continue benchmark = payload["benchmark"] variant = '_'.join(parts[1:-1]) score = float(payload.get("score", 0.0)) slot = by_benchmark.setdefault(benchmark, {}).setdefault(variant, {"scores": []}) slot["scores"].append(score) for benchmark, variants in by_benchmark.items(): for variant, slot in variants.items(): scores = slot.pop("scores") slot["mean_score"] = sum(scores) / len(scores) slot["n_scores"] = len(scores) if runs and n_failed == len(runs): panel_status = "blocked" elif n_failed > 0: panel_status = "partial" else: panel_status = "ready" return { "n_runs": len(runs), "n_failed": n_failed, "panel_status": panel_status, "by_benchmark": by_benchmark, }