Spaces:
Sleeping
Sleeping
File size: 1,339 Bytes
d416acc | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 | """Shared grading helper used by all per-task grader modules."""
import sys
from pathlib import Path
# Ensure project root is on sys.path so environment package is importable
_project_root = str(Path(__file__).parent.parent)
if _project_root not in sys.path:
sys.path.insert(0, _project_root)
from environment.api_triage_env import APITriageEnv
from environment.incident_generator import get_incident_by_type
def run_agent_on_incident(incident_type: str, max_steps: int = 10) -> float:
"""Simulate an optimal agent solving a specific incident type.
Returns a float score strictly between 0 and 1.
"""
env = APITriageEnv(max_steps=max_steps)
# Force the specific incident (bypass curriculum randomness)
env.incident = get_incident_by_type(incident_type)
if env.incident is None:
return 0.05
env.fix_applied = False
env.done = False
env.step_counter = 0
env.total_reward = 0.0
correct_action = env.incident["fix_action"]
# Optimal sequence: inspect → fix → resolve
actions = ["inspect_logs", correct_action, "resolve"]
for action in actions:
state, reward, done, info = env.step(action)
if done:
if info.get("resolution") == "success":
return 0.95
else:
return 0.05
return 0.1
|