""" scripts/run_env.py — Run a single episode of the Self-Improving Agent Environment. Usage ----- python scripts/run_env.py [--task TASK_ID] [--seed SEED] [--verbose] Examples -------- python scripts/run_env.py python scripts/run_env.py --task hard_coding_001 --seed 7 --verbose """ import argparse import json import logging import sys import os # Make project root importable sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..")) from env.environment import SelfImprovingAgentEnv from agent.baseline_agent import BaselineAgent def main(): parser = argparse.ArgumentParser(description="Run a single environment episode.") parser.add_argument("--task", type=str, default=None, help="Task ID to run (default: random)") parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)") parser.add_argument("--verbose", action="store_true", help="Enable DEBUG logging") args = parser.parse_args() level = logging.DEBUG if args.verbose else logging.INFO logging.basicConfig( level=level, format="%(asctime)s [%(levelname)s] %(name)s: %(message)s", ) env = SelfImprovingAgentEnv(seed=args.seed, task_id=args.task) agent = BaselineAgent(seed=args.seed) print("\n" + "=" * 60) print(" Self-Improving Tool-Optimizing Agent Environment") print("=" * 60) summary = agent.run_episode(env, task_id=args.task) print("\nšŸ“‹ Episode Summary") print("-" * 40) for k, v in summary.items(): print(f" {k:20s}: {v}") print("\nšŸ”„ Step-by-step history") print("-" * 40) for i, record in enumerate(agent.history, 1): action = record["action"] atype = action.get("action_type", "?") reward = record["reward"] info_keys = list(record["info"].keys()) print(f" Step {i:2d} | {atype:20s} | reward={reward:+.4f} | info keys={info_keys}") print("\nāœ… Done. Log written to logs/env_log.jsonl") if __name__ == "__main__": main()