Spaces:
Sleeping
Sleeping
File size: 3,477 Bytes
fcb838d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | """
Baseline script — establishes pre-training metrics.
Run this BEFORE training to get the "before" numbers for your README.
Usage:
python scripts/baseline.py --episodes 50
"""
import sys
import os
import json
from pathlib import Path
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from env.codecourt_env import CodeCourtEnv
from agents.setter import SetterAgent
from agents.solver import SolverAgent
def update_root_manifest(baseline_payload: dict):
manifest_path = Path("./outputs/artifact_manifest.json")
manifest = {}
if manifest_path.exists():
try:
manifest = json.loads(manifest_path.read_text())
except json.JSONDecodeError:
manifest = {}
manifest["baseline"] = baseline_payload
manifest_path.write_text(json.dumps(manifest, indent=2))
def run_baseline(n_episodes: int = 50, use_brute_force: bool = True):
"""
Run baseline with:
- Setter: reference solutions (optimal)
- Solver: brute-force solutions (suboptimal, will TLE on hard problems)
"""
env = CodeCourtEnv(difficulty_progression=False)
setter = SetterAgent(use_reference=True)
solver = SolverAgent(use_brute_force=use_brute_force)
results = []
print(f"\nRunning baseline ({n_episodes} episodes, brute_force={use_brute_force})...")
print("-" * 60)
for ep in range(n_episodes):
obs = env.reset()
full_problem = env._current_state.problem
setter_code = setter.generate_solution(full_problem)
solver_code = solver.solve(full_problem)
setter_info, solver_info, done, info = env.step(setter_code, solver_code)
results.append({
"episode": ep,
"archetype": obs["archetype"],
"outcome": info["outcome"],
"setter_reward": setter_info["reward"],
"solver_reward": solver_info["reward"],
"solver_pass_rate": info["solver_pass_rate"],
})
# Summary stats
outcomes = [r["outcome"] for r in results]
solver_pass_rates = [r["solver_pass_rate"] for r in results]
solver_rewards = [r["solver_reward"] for r in results]
summary = {
"total_episodes": n_episodes,
"solver_mode": "brute_force" if use_brute_force else "reference",
"solver_win_rate": outcomes.count("solver_wins") / n_episodes,
"setter_win_rate": outcomes.count("setter_wins") / n_episodes,
"invalid_rate": outcomes.count("invalid") / n_episodes,
"avg_solver_pass_rate": sum(solver_pass_rates) / len(solver_pass_rates),
"avg_solver_reward": sum(solver_rewards) / len(solver_rewards),
}
print(f"\nBaseline Results:")
print(json.dumps(summary, indent=2))
# Save
os.makedirs("./outputs", exist_ok=True)
with open("./outputs/baseline_results.json", "w") as f:
json.dump({"summary": summary, "episodes": results}, f, indent=2)
update_root_manifest({
"path": "./outputs/baseline_results.json",
"summary": summary,
})
print("\n✓ Baseline saved to ./outputs/baseline_results.json")
return summary
if __name__ == "__main__":
import argparse
p = argparse.ArgumentParser()
p.add_argument("--episodes", type=int, default=50)
p.add_argument("--optimal", action="store_true",
help="Use optimal solver (sanity check)")
args = p.parse_args()
run_baseline(args.episodes, use_brute_force=not args.optimal)
|