| """ |
| scripts/run_env.py โ Run a single episode of the Self-Improving Agent Environment. |
| |
| Usage |
| ----- |
| python scripts/run_env.py [--task TASK_ID] [--seed SEED] [--verbose] |
| |
| Examples |
| -------- |
| python scripts/run_env.py |
| python scripts/run_env.py --task hard_coding_001 --seed 7 --verbose |
| """ |
|
|
| import argparse |
| import json |
| import logging |
| import sys |
| import os |
|
|
| |
| sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) |
|
|
| from env.environment import SelfImprovingAgentEnv |
| from agent.baseline_agent import BaselineAgent |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Run a single environment episode.") |
| parser.add_argument("--task", type=str, default=None, help="Task ID to run (default: random)") |
| parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") |
| parser.add_argument("--verbose", action="store_true", help="Enable DEBUG logging") |
| args = parser.parse_args() |
|
|
| level = logging.DEBUG if args.verbose else logging.INFO |
| logging.basicConfig( |
| level=level, |
| format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", |
| ) |
|
|
| env = SelfImprovingAgentEnv(seed=args.seed, task_id=args.task) |
| agent = BaselineAgent(seed=args.seed) |
|
|
| print("\n" + "=" * 60) |
| print(" Self-Improving Tool-Optimizing Agent Environment") |
| print("=" * 60) |
|
|
| summary = agent.run_episode(env, task_id=args.task) |
|
|
| print("\n๐ Episode Summary") |
| print("-" * 40) |
| for k, v in summary.items(): |
| print(f" {k:20s}: {v}") |
|
|
| print("\n๐ Step-by-step history") |
| print("-" * 40) |
| for i, record in enumerate(agent.history, 1): |
| action = record["action"] |
| atype = action.get("action_type", "?") |
| reward = record["reward"] |
| info_keys = list(record["info"].keys()) |
| print(f" Step {i:2d} | {atype:20s} | reward={reward:+.4f} | info keys={info_keys}") |
|
|
| print("\nโ
Done. Log written to logs/env_log.jsonl") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|