Spaces:

XcodeAddy
/

incident-triage-env

Running

App Files Files Community

XcodeAddy commited on Apr 12

Commit

af2ccc5

1 Parent(s): 18aa055

Keep reset and schema rewards inside unit interval

Browse files

Files changed (5) hide show

environment.py +7 -6
inference.py +8 -7
models.py +4 -4
tests/test_env.py +3 -3
tests/test_inference.py +4 -4

environment.py CHANGED Viewed

@@ -39,6 +39,7 @@ TASK_SPECS = {
     },
 }
 DEFAULT_RESET_SEED = 42
 def validate_ticket_dataset(tickets: list[dict]) -> None:
@@ -77,9 +78,9 @@ class IncidentEnv:
         self.episode_id = ""
         self.step_count = 0
         self.max_steps = 1
-        self.total_reward = 0.0
         self.done = False
-        self.last_reward = 0.0
         self.last_action_summary = None
     def reset(
@@ -92,14 +93,14 @@ class IncidentEnv:
         self.current_ticket = self._select_ticket(normalized_task, ticket_id, seed)
         self.episode_id = str(uuid.uuid4())
         self.step_count = 0
-        self.total_reward = 0.0
         self.done = False
-        self.last_reward = 0.0
         self.last_action_summary = None
         return StepResult(
             observation=self._build_observation(),
-            reward=IncidentReward(value=0.0, reason="Episode initialized."),
             done=False,
             info={
                 "episode_id": self.episode_id,
@@ -138,7 +139,7 @@ class IncidentEnv:
         self.step_count += 1
         self.last_reward = reward_value
-        self.total_reward += reward_value
         self.done = self.step_count >= self.max_steps
         self.last_action_summary = f"Submitted {selected_field}={agent_answer}"

     },
 }
 DEFAULT_RESET_SEED = 42
+INITIAL_REWARD = 0.01
 def validate_ticket_dataset(tickets: list[dict]) -> None:
         self.episode_id = ""
         self.step_count = 0
         self.max_steps = 1
+        self.total_reward = INITIAL_REWARD
         self.done = False
+        self.last_reward = INITIAL_REWARD
         self.last_action_summary = None
     def reset(
         self.current_ticket = self._select_ticket(normalized_task, ticket_id, seed)
         self.episode_id = str(uuid.uuid4())
         self.step_count = 0
+        self.total_reward = INITIAL_REWARD
         self.done = False
+        self.last_reward = INITIAL_REWARD
         self.last_action_summary = None
         return StepResult(
             observation=self._build_observation(),
+            reward=IncidentReward(value=INITIAL_REWARD, reason="Episode initialized and awaiting first action."),
             done=False,
             info={
                 "episode_id": self.episode_id,
         self.step_count += 1
         self.last_reward = reward_value
+        self.total_reward = reward_value
         self.done = self.step_count >= self.max_steps
         self.last_action_summary = f"Submitted {selected_field}={agent_answer}"

inference.py CHANGED Viewed

@@ -27,6 +27,7 @@ BENCHMARK = "incident-triage-env"
 MAX_TOKENS = 300
 TEMPERATURE = 0.0
 OUTPUT_PATH = Path(os.environ.get("OUTPUT_PATH") or "/tmp/outputs/baseline_scores.json")
 SYSTEM_PROMPT = """You are an expert SRE triaging production incidents.
 You will receive an incident alert, structured context, and the expected output field.
@@ -356,8 +357,8 @@ def get_action(model_client: Optional[OpenAI], observation: Dict[str, Any]) -> D
 def reward_value(step_data: Dict[str, Any]) -> float:
     reward = step_data.get("reward", {})
     if isinstance(reward, dict):
-        return float(reward.get("value", 0.0))
-    return float(reward or 0.0)
 def active_model_name(model_client: Optional[OpenAI]) -> str:
@@ -379,7 +380,7 @@ def run_episode(
 ) -> Dict[str, Any]:
     rewards: List[float] = []
     steps_taken = 0
-    score = 0.0
     success = False
     episode_result: Dict[str, Any]
@@ -417,18 +418,18 @@ def run_episode(
             "agent_answer": step_data.get("info", {}).get("agent_answer"),
         }
     except Exception as exc:
-        log_step(step=max(steps_taken, 1), action="error", reward=0.0, done=True, error=str(exc))
-        score = 0.0
         success = False
         episode_result = {
             "incident_id": ticket["incident_id"],
             "task_type": ticket["task_type"],
-            "score": 0.0,
             "success": False,
             "error": str(exc),
         }
     finally:
-        log_end(success=success, steps=max(steps_taken, 1), score=score, rewards=rewards or [0.0])
     return episode_result

 MAX_TOKENS = 300
 TEMPERATURE = 0.0
 OUTPUT_PATH = Path(os.environ.get("OUTPUT_PATH") or "/tmp/outputs/baseline_scores.json")
+MIN_EPISODE_SCORE = 0.01
 SYSTEM_PROMPT = """You are an expert SRE triaging production incidents.
 You will receive an incident alert, structured context, and the expected output field.
 def reward_value(step_data: Dict[str, Any]) -> float:
     reward = step_data.get("reward", {})
     if isinstance(reward, dict):
+        return float(reward.get("value", MIN_EPISODE_SCORE))
+    return float(reward or MIN_EPISODE_SCORE)
 def active_model_name(model_client: Optional[OpenAI]) -> str:
 ) -> Dict[str, Any]:
     rewards: List[float] = []
     steps_taken = 0
+    score = MIN_EPISODE_SCORE
     success = False
     episode_result: Dict[str, Any]
             "agent_answer": step_data.get("info", {}).get("agent_answer"),
         }
     except Exception as exc:
+        log_step(step=max(steps_taken, 1), action="error", reward=MIN_EPISODE_SCORE, done=True, error=str(exc))
+        score = MIN_EPISODE_SCORE
         success = False
         episode_result = {
             "incident_id": ticket["incident_id"],
             "task_type": ticket["task_type"],
+            "score": MIN_EPISODE_SCORE,
             "success": False,
             "error": str(exc),
         }
     finally:
+        log_end(success=success, steps=max(steps_taken, 1), score=score, rewards=rewards or [MIN_EPISODE_SCORE])
     return episode_result

models.py CHANGED Viewed

@@ -47,7 +47,7 @@ class IncidentObservation(BaseModel):
     step_count: int = 0
     max_steps: int = 1
     last_action_summary: Optional[str] = None
-    last_reward: float = 0.0
     episode_status: str = "awaiting_action"
@@ -82,7 +82,7 @@ class IncidentAction(BaseModel):
 class IncidentReward(BaseModel):
-    value: float = Field(..., ge=0.0, le=1.0)
     reason: str
@@ -98,13 +98,13 @@ class IncidentState(BaseModel):
     session_id: Optional[str] = None
     step_count: int
     max_steps: int
-    total_reward: float = 0.0
     done: bool
     incident_id: str
     task_type: TaskType
     difficulty: str
     status: str
-    last_reward: float = 0.0
 class ResetRequest(BaseModel):

     step_count: int = 0
     max_steps: int = 1
     last_action_summary: Optional[str] = None
+    last_reward: float = Field(default=0.01, gt=0.0, lt=1.0)
     episode_status: str = "awaiting_action"
 class IncidentReward(BaseModel):
+    value: float = Field(..., gt=0.0, lt=1.0)
     reason: str
     session_id: Optional[str] = None
     step_count: int
     max_steps: int
+    total_reward: float = Field(default=0.01, gt=0.0, lt=1.0)
     done: bool
     incident_id: str
     task_type: TaskType
     difficulty: str
     status: str
+    last_reward: float = Field(default=0.01, gt=0.0, lt=1.0)
 class ResetRequest(BaseModel):

tests/test_env.py CHANGED Viewed

@@ -82,7 +82,7 @@ class IncidentEnvApiTests(unittest.TestCase):
         body = response.json()
         self.assertEqual(body["observation"]["incident_id"], "INC-014")
         self.assertEqual(body["observation"]["task_type"], "task3")
-        self.assertEqual(body["reward"]["value"], 0.0)
         self.assertFalse(body["done"])
         self.assertIn("session_id", body["info"])
         self.assertEqual(body["info"]["state"]["status"], "awaiting_action")
@@ -201,13 +201,13 @@ class IncidentEnvApiTests(unittest.TestCase):
             session_id="done-session",
             step_count=1,
             max_steps=1,
-            total_reward=1.0,
             done=True,
             incident_id="INC-001",
             task_type=TaskType.TASK1,
             difficulty="easy",
             status="completed",
-            last_reward=1.0,
         )
         with TestClient(app) as client:

         body = response.json()
         self.assertEqual(body["observation"]["incident_id"], "INC-014")
         self.assertEqual(body["observation"]["task_type"], "task3")
+        self.assertEqual(body["reward"]["value"], 0.01)
         self.assertFalse(body["done"])
         self.assertIn("session_id", body["info"])
         self.assertEqual(body["info"]["state"]["status"], "awaiting_action")
             session_id="done-session",
             step_count=1,
             max_steps=1,
+            total_reward=0.99,
             done=True,
             incident_id="INC-001",
             task_type=TaskType.TASK1,
             difficulty="easy",
             status="completed",
+            last_reward=0.99,
         )
         with TestClient(app) as client:

tests/test_inference.py CHANGED Viewed

@@ -19,7 +19,7 @@ class InferenceOutputTests(unittest.TestCase):
     def test_write_results_writes_summary_to_configured_path(self) -> None:
         results = [
-            {"incident_id": "INC-001", "task_type": "task1", "score": 1.0, "success": True},
             {"incident_id": "INC-002", "task_type": "task2", "score": 0.5, "success": False},
         ]
@@ -30,13 +30,13 @@ class InferenceOutputTests(unittest.TestCase):
             self.assertTrue(output_path.exists())
             payload = json.loads(output_path.read_text())
             self.assertEqual(payload["episodes"], 2)
-            self.assertAlmostEqual(payload["average_score"], 0.75)
-            self.assertEqual(payload["by_task"]["task1"]["average_score"], 1.0)
             self.assertEqual(payload["by_task"]["task2"]["average_score"], 0.5)
     def test_write_results_tolerates_unwritable_path(self) -> None:
         results = [
-            {"incident_id": "INC-001", "task_type": "task1", "score": 1.0, "success": True},
         ]
         with tempfile.TemporaryDirectory() as temp_dir:

     def test_write_results_writes_summary_to_configured_path(self) -> None:
         results = [
+            {"incident_id": "INC-001", "task_type": "task1", "score": 0.99, "success": True},
             {"incident_id": "INC-002", "task_type": "task2", "score": 0.5, "success": False},
         ]
             self.assertTrue(output_path.exists())
             payload = json.loads(output_path.read_text())
             self.assertEqual(payload["episodes"], 2)
+            self.assertAlmostEqual(payload["average_score"], 0.745)
+            self.assertEqual(payload["by_task"]["task1"]["average_score"], 0.99)
             self.assertEqual(payload["by_task"]["task2"]["average_score"], 0.5)
     def test_write_results_tolerates_unwritable_path(self) -> None:
         results = [
+            {"incident_id": "INC-001", "task_type": "task1", "score": 0.99, "success": True},
         ]
         with tempfile.TemporaryDirectory() as temp_dir: