commitguard / smoke_test_episodes.py
Nitishkumar-ai's picture
Upload folder using huggingface_hub
e4f3d12 verified
import random
from pathlib import Path
from commitguard_env.environment import CommitGuardEnvironment
from commitguard_env.models import CommitGuardAction
def run_random_episodes(n=100):
env = CommitGuardEnvironment(data_path=Path("data/devign_filtered.jsonl"))
rewards = []
episode_lengths = []
for i in range(n):
obs = env.reset()
done = False
total_reward = 0
steps = 0
while not done:
# Randomly choose an action
action_type = random.choice(["request_context", "analyze", "verdict"])
if action_type == "request_context":
action = CommitGuardAction(action_type="request_context", file_path="random_file.c")
elif action_type == "analyze":
action = CommitGuardAction(action_type="analyze", reasoning="Thinking...")
else:
action = CommitGuardAction(
action_type="verdict",
is_vulnerable=random.choice([True, False]),
vuln_type="CWE-119",
exploit_sketch="Random exploit attempt"
)
obs, reward, done = env.step(action)
total_reward += reward
steps += 1
if steps > 10: # Safety break
break
rewards.append(total_reward)
episode_lengths.append(steps)
print(f"Finished {n} episodes.")
print(f"Average reward: {sum(rewards)/n:.4f}")
print(f"Max reward: {max(rewards):.4f}")
print(f"Min reward: {min(rewards):.4f}")
print(f"Average episode length: {sum(episode_lengths)/n:.2f}")
print(f"Max episode length: {max(episode_lengths)}")
# Check distribution
unique_rewards = set(rewards)
print(f"Unique rewards: {len(unique_rewards)}")
if len(unique_rewards) > 1:
print("Reward distribution looks healthy (not all zeros).")
else:
print("Warning: Only one reward value found.")
if __name__ == "__main__":
run_random_episodes(100)