"""
AegisGym Simulation Script
Runs multiple audit episodes using the LLM for inference (CPU-friendly).
This generates the logs and metrics to analyze the system's performance.
"""
import os
import torch
import json
from transformers import AutoTokenizer, AutoModelForCausalLM
from client_env import get_sync_client
from train import parse_action, SYSTEM_PROMPT

MODEL_NAME = "Qwen/Qwen2.5-0.5B-Instruct"
ENV_URL = "https://armaan020-aegisgym.hf.space"

def run_simulation(num_episodes=5):
    print(f"=== Starting AegisGym Simulation (Inference Only) ===")
    print(f"Model: {MODEL_NAME}")
    print(f"Env:   {ENV_URL}\n")

    print(f"Loading model on CPU...")
    tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
    model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype="auto", device_map="cpu")
    print("Model loaded.\n")

    env = get_sync_client(ENV_URL)
    
    total_reward = 0
    results = []

    for i in range(num_episodes):
        print(f"--- Episode {i+1} ---")
        result = env.reset()
        obs_dict = result.get("observation", {})
        
        state = env.state()
        tier = state.get("current_tier", "easy")
        
        user_msg = (
            f"Audit the following transaction.\n\n"
            f"Tier: {tier.upper()}\n"
            f"Transactions: {obs_dict.get('transactions', [])}\n"
            f"Context: {obs_dict.get('retrieved_regs', [])}\n"
            f"Account: {obs_dict.get('account_metadata', {})}"
        )
        messages = [
            {"role": "system", "content": SYSTEM_PROMPT},
            {"role": "user",   "content": user_msg},
        ]
        prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
        
        print(f"[Audit Prompt]:\n{user_msg}")
        
        # Inference
        inputs = tokenizer(prompt, return_tensors="pt")
        with torch.no_grad():
            outputs = model.generate(**inputs, max_new_tokens=128)
        
        completion = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
        print(f"[Model Reasoning]:\n{completion}")
        
        action = parse_action(completion)
        print(f"[Action]: {action.action_type} on {action.target_id}")
        
        step_result = env.step(action.model_dump())
        reward = step_result.get("reward", 0.0)
        done = step_result.get("done", False)
        
        print(f"[Reward]: {reward} | [Done]: {done}\n")
        total_reward += reward
        results.append({
            "episode": i+1,
            "tier": tier,
            "action": action.action_type,
            "reward": reward
        })

    print(f"=== Simulation Complete ===")
    print(f"Average Reward: {total_reward / num_episodes}")

if __name__ == "__main__":
    run_simulation(num_episodes=3)