Spaces:

JOY0021
/

autonomy-calibration-benchmark

Paused

File size: 4,016 Bytes

ef737d3

"""
train.py — Autonomy Calibration Training & Trajectory Collection
This script runs agents through the environment, collects rewards, 
and prepares datasets for Hugging Face integration.
"""
import os
import time
import json
import random
import requests
from typing import List, Dict, Any
from huggingface_hub import HfApi, login

# --- CONFIGURATION ---
API_BASE = "http://localhost:8000/api"
TASKS = ["email_triage", "devops_incident", "financial_request"]
NUM_EPISODES = 10
HF_REPO_NAME = "autonomy-calibration-results" # Change to your HF username/repo

class TrainingAgent:
    """A simple agent that can be extended to use LLMs or Neural Networks."""
    def __init__(self, name: str = "baseline_agent"):
        self.name = name

    def select_action(self, observation: Dict[str, Any]) -> str:
        # Heuristic: If it's a financial request and signals look bad, STOP.
        # Otherwise, pick a random available action for exploration.
        avail = observation.get("available_actions", [])
        if not avail: return ""
        
        prompt = observation.get("prompt", "").lower()
        if "fraud" in prompt or "suspicious" in prompt:
            for a in avail:
                if "reject" in a or "flag" in a: return a
                
        return random.choice(avail)

def run_training_cycle():
    print(f"🚀 Starting Autonomy Training Cycle...")
    agent = TrainingAgent()
    trajectories = []

    for i in range(NUM_EPISODES):
        task_name = random.choice(TASKS)
        print(f"--- Episode {i+1}/{NUM_EPISODES} | Task: {task_name} ---")
        
        # 1. Reset
        res = requests.post(f"{API_BASE}/reset", json={"task": task_name})
        if res.status_code != 200:
            print(f"❌ Reset failed: {res.text}")
            continue
            
        obs = res.json()
        done = False
        episode_reward = 0.0
        steps = []

        # 2. Step Loop
        while not done:
            action_type = agent.select_action(obs)
            step_res = requests.post(f"{API_BASE}/step", json={"type": action_type})
            
            if step_res.status_code != 200:
                print(f"❌ Step failed: {step_res.text}")
                break
                
            data = step_res.json()
            reward = data["reward"]["value"]
            episode_reward += reward
            
            steps.append({
                "observation": obs["prompt"],
                "action": action_type,
                "reward": reward,
                "breakdown": data["reward"]["breakdown"]
            })
            
            obs = data["observation"]
            done = data["done"]

        print(f"🏁 Episode Finished. Total Reward: {episode_reward:.2f}")
        trajectories.append({
            "episode": i,
            "task": task_name,
            "total_reward": episode_reward,
            "steps": steps
        })

    # 3. Save locally
    with open("training_trajectories.json", "w") as f:
        json.dump(trajectories, f, indent=2)
    print(f"💾 Trajectories saved to training_trajectories.json")

    # 4. Integrate with Hugging Face (Optional)
    if os.getenv("HF_TOKEN"):
        try:
            api = HfApi()
            api.upload_file(
                path_or_fileobj="training_trajectories.json",
                path_in_repo=f"results_{int(time.time())}.json",
                repo_id=HF_REPO_NAME,
                repo_type="dataset",
                token=os.getenv("HF_TOKEN")
            )
            print(f"📤 Results successfully pushed to Hugging Face Dataset: {HF_REPO_NAME}")
        except Exception as e:
            print(f"⚠️ Hugging Face push failed: {e}")

if __name__ == "__main__":
    # Ensure uvicorn is running before starting
    try:
        run_training_cycle()
    except requests.exceptions.ConnectionError:
        print("❌ Error: Could not connect to the environment server.")
        print("💡 Make sure uvicorn is running: uvicorn main:app --port 8000")