SelfEvo / scripts /run_env.py
Akhil-8605's picture
Punlishing to Hugging Face
62f4978
"""
scripts/run_env.py โ€” Run a single episode of the Self-Improving Agent Environment.
Usage
-----
python scripts/run_env.py [--task TASK_ID] [--seed SEED] [--verbose]
Examples
--------
python scripts/run_env.py
python scripts/run_env.py --task hard_coding_001 --seed 7 --verbose
"""
import argparse
import json
import logging
import sys
import os
# Make project root importable
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from env.environment import SelfImprovingAgentEnv
from agent.baseline_agent import BaselineAgent
def main():
parser = argparse.ArgumentParser(description="Run a single environment episode.")
parser.add_argument("--task", type=str, default=None, help="Task ID to run (default: random)")
parser.add_argument("--seed", type=int, default=42, help="Random seed (default: 42)")
parser.add_argument("--verbose", action="store_true", help="Enable DEBUG logging")
args = parser.parse_args()
level = logging.DEBUG if args.verbose else logging.INFO
logging.basicConfig(
level=level,
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
)
env = SelfImprovingAgentEnv(seed=args.seed, task_id=args.task)
agent = BaselineAgent(seed=args.seed)
print("\n" + "=" * 60)
print(" Self-Improving Tool-Optimizing Agent Environment")
print("=" * 60)
summary = agent.run_episode(env, task_id=args.task)
print("\n๐Ÿ“‹ Episode Summary")
print("-" * 40)
for k, v in summary.items():
print(f" {k:20s}: {v}")
print("\n๐Ÿ”„ Step-by-step history")
print("-" * 40)
for i, record in enumerate(agent.history, 1):
action = record["action"]
atype = action.get("action_type", "?")
reward = record["reward"]
info_keys = list(record["info"].keys())
print(f" Step {i:2d} | {atype:20s} | reward={reward:+.4f} | info keys={info_keys}")
print("\nโœ… Done. Log written to logs/env_log.jsonl")
if __name__ == "__main__":
main()