File size: 2,255 Bytes
fcb838d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
"""
Legacy episode runner for CodeCourt.
This keeps the old simulation-only loop available for quick smoke tests.
"""

import argparse
import json
import os
import sys

sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from agents.setter import SetterAgent
from agents.solver import SolverAgent
from env.codecourt_env import CodeCourtEnv


def parse_args():
    parser = argparse.ArgumentParser(description="Legacy CodeCourt mock runner")
    parser.add_argument("--episodes", type=int, default=200)
    parser.add_argument("--save-every", type=int, default=50)
    parser.add_argument("--output-dir", type=str, default="./outputs")
    return parser.parse_args()


def main():
    args = parse_args()
    os.makedirs(args.output_dir, exist_ok=True)

    setter = SetterAgent(use_reference=True)
    solver = SolverAgent(use_reference=False, use_brute_force=False)
    env = CodeCourtEnv(difficulty_progression=True)

    history = []
    for ep in range(args.episodes):
        obs = env.reset()
        full_problem = env._current_state.problem

        setter_code = setter.generate_solution(full_problem)
        solver_code = solver.solve(full_problem)
        setter_info, solver_info, _, info = env.step(setter_code, solver_code)

        record = {
            "episode": ep,
            "archetype": obs["archetype"],
            "task_id": obs["task_id"],
            "difficulty": obs["difficulty"],
            "setter_reward": setter_info["reward"],
            "solver_reward": solver_info["reward"],
            "outcome": info["outcome"],
            "setter_elo": info["elo"]["setter_elo"],
            "solver_elo": info["elo"]["solver_elo"],
            "solver_pass_rate": info["solver_pass_rate"],
        }
        history.append(record)

        if (ep + 1) % args.save_every == 0:
            ckpt_path = os.path.join(args.output_dir, f"history_ep{ep+1}.json")
            with open(ckpt_path, "w") as f:
                json.dump(history, f, indent=2)

    final_path = os.path.join(args.output_dir, "training_history.json")
    with open(final_path, "w") as f:
        json.dump(history, f, indent=2)

    print(f"Saved legacy mock history to {final_path}")


if __name__ == "__main__":
    main()