| """ |
| train.py β Autonomy Calibration Training & Trajectory Collection |
| This script runs agents through the environment, collects rewards, |
| and prepares datasets for Hugging Face integration. |
| """ |
| import os |
| import time |
| import json |
| import random |
| import requests |
| from typing import List, Dict, Any |
| from huggingface_hub import HfApi, login |
|
|
| |
| API_BASE = "http://localhost:8000/api" |
| TASKS = ["email_triage", "devops_incident", "financial_request"] |
| NUM_EPISODES = 10 |
| HF_REPO_NAME = "autonomy-calibration-results" |
|
|
| class TrainingAgent: |
| """A simple agent that can be extended to use LLMs or Neural Networks.""" |
| def __init__(self, name: str = "baseline_agent"): |
| self.name = name |
|
|
| def select_action(self, observation: Dict[str, Any]) -> str: |
| |
| |
| avail = observation.get("available_actions", []) |
| if not avail: return "" |
| |
| prompt = observation.get("prompt", "").lower() |
| if "fraud" in prompt or "suspicious" in prompt: |
| for a in avail: |
| if "reject" in a or "flag" in a: return a |
| |
| return random.choice(avail) |
|
|
| def run_training_cycle(): |
| print(f"π Starting Autonomy Training Cycle...") |
| agent = TrainingAgent() |
| trajectories = [] |
|
|
| for i in range(NUM_EPISODES): |
| task_name = random.choice(TASKS) |
| print(f"--- Episode {i+1}/{NUM_EPISODES} | Task: {task_name} ---") |
| |
| |
| res = requests.post(f"{API_BASE}/reset", json={"task": task_name}) |
| if res.status_code != 200: |
| print(f"β Reset failed: {res.text}") |
| continue |
| |
| obs = res.json() |
| done = False |
| episode_reward = 0.0 |
| steps = [] |
|
|
| |
| while not done: |
| action_type = agent.select_action(obs) |
| step_res = requests.post(f"{API_BASE}/step", json={"type": action_type}) |
| |
| if step_res.status_code != 200: |
| print(f"β Step failed: {step_res.text}") |
| break |
| |
| data = step_res.json() |
| reward = data["reward"]["value"] |
| episode_reward += reward |
| |
| steps.append({ |
| "observation": obs["prompt"], |
| "action": action_type, |
| "reward": reward, |
| "breakdown": data["reward"]["breakdown"] |
| }) |
| |
| obs = data["observation"] |
| done = data["done"] |
|
|
| print(f"π Episode Finished. Total Reward: {episode_reward:.2f}") |
| trajectories.append({ |
| "episode": i, |
| "task": task_name, |
| "total_reward": episode_reward, |
| "steps": steps |
| }) |
|
|
| |
| with open("training_trajectories.json", "w") as f: |
| json.dump(trajectories, f, indent=2) |
| print(f"πΎ Trajectories saved to training_trajectories.json") |
|
|
| |
| if os.getenv("HF_TOKEN"): |
| try: |
| api = HfApi() |
| api.upload_file( |
| path_or_fileobj="training_trajectories.json", |
| path_in_repo=f"results_{int(time.time())}.json", |
| repo_id=HF_REPO_NAME, |
| repo_type="dataset", |
| token=os.getenv("HF_TOKEN") |
| ) |
| print(f"π€ Results successfully pushed to Hugging Face Dataset: {HF_REPO_NAME}") |
| except Exception as e: |
| print(f"β οΈ Hugging Face push failed: {e}") |
|
|
| if __name__ == "__main__": |
| |
| try: |
| run_training_cycle() |
| except requests.exceptions.ConnectionError: |
| print("β Error: Could not connect to the environment server.") |
| print("π‘ Make sure uvicorn is running: uvicorn main:app --port 8000") |
|
|