import random
from pathlib import Path
from commitguard_env.environment import CommitGuardEnvironment
from commitguard_env.models import CommitGuardAction

def run_random_episodes(n=100):
    env = CommitGuardEnvironment(data_path=Path("data/devign_filtered.jsonl"))
    
    rewards = []
    episode_lengths = []
    
    for i in range(n):
        obs = env.reset()
        done = False
        total_reward = 0
        steps = 0
        
        while not done:
            # Randomly choose an action
            action_type = random.choice(["request_context", "analyze", "verdict"])
            
            if action_type == "request_context":
                action = CommitGuardAction(action_type="request_context", file_path="random_file.c")
            elif action_type == "analyze":
                action = CommitGuardAction(action_type="analyze", reasoning="Thinking...")
            else:
                action = CommitGuardAction(
                    action_type="verdict", 
                    is_vulnerable=random.choice([True, False]),
                    vuln_type="CWE-119",
                    exploit_sketch="Random exploit attempt"
                )
            
            obs, reward, done = env.step(action)
            total_reward += reward
            steps += 1
            
            if steps > 10: # Safety break
                break
        
        rewards.append(total_reward)
        episode_lengths.append(steps)
    
    print(f"Finished {n} episodes.")
    print(f"Average reward: {sum(rewards)/n:.4f}")
    print(f"Max reward: {max(rewards):.4f}")
    print(f"Min reward: {min(rewards):.4f}")
    print(f"Average episode length: {sum(episode_lengths)/n:.2f}")
    print(f"Max episode length: {max(episode_lengths)}")
    
    # Check distribution
    unique_rewards = set(rewards)
    print(f"Unique rewards: {len(unique_rewards)}")
    if len(unique_rewards) > 1:
        print("Reward distribution looks healthy (not all zeros).")
    else:
        print("Warning: Only one reward value found.")

if __name__ == "__main__":
    run_random_episodes(100)