Spaces:
Sleeping
Sleeping
File size: 2,188 Bytes
91e7690 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | from __future__ import annotations
import argparse
import json
import os
import re
import subprocess
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
AGENT = ROOT / "high_grade_agent.py"
def parse_scores(text: str) -> dict[str, float]:
out: dict[str, float] = {}
for line in text.splitlines():
m = re.search(r"task_(\d)\s*:\s*([0-9.]+)", line)
if m:
out[f"task_{m.group(1)}"] = float(m.group(2))
m2 = re.search(r"mean\s*:\s*([0-9.]+)", line)
if m2:
out["mean"] = float(m2.group(1))
return out
def run_once(seed: int, env: dict[str, str]) -> dict[str, float]:
env2 = dict(env)
env2["SEED"] = str(seed)
p = subprocess.run([sys.executable, str(AGENT)], cwd=str(ROOT), env=env2, capture_output=True, text=True)
if p.returncode != 0:
raise RuntimeError(p.stderr or p.stdout)
return parse_scores(p.stdout)
def main() -> None:
parser = argparse.ArgumentParser(description="Deeper multi-seed evaluator for high-grade agent")
parser.add_argument("--seed-start", type=int, default=42)
parser.add_argument("--runs", type=int, default=5)
args = parser.parse_args()
env = os.environ.copy()
env.setdefault("RL_POLICY_PATH", str(ROOT / "outputs" / "rl_policy.json"))
env.setdefault("AGENT_MEMORY_PATH", str(ROOT / "outputs" / "agent_memory.json"))
env.setdefault("ENV_URL", "http://localhost:7860")
rows: list[dict[str, float]] = []
for i in range(args.runs):
seed = args.seed_start + i
score = run_once(seed, env)
score["seed"] = float(seed)
rows.append(score)
agg: dict[str, float] = {}
keys = ["task_1", "task_2", "task_3", "mean"]
for k in keys:
vals = [r.get(k, 0.0) for r in rows]
agg[f"{k}_avg"] = sum(vals) / max(1, len(vals))
payload = {"runs": rows, "aggregate": agg}
out_file = ROOT / "outputs" / "deep_eval_summary.json"
out_file.parent.mkdir(parents=True, exist_ok=True)
out_file.write_text(json.dumps(payload, indent=2))
print(json.dumps(payload, indent=2))
print(f"saved: {out_file}")
if __name__ == "__main__":
main()
|