Spaces:

alok098
/

network-rca-env

Sleeping

App Files Files Community

alok098 commited on Apr 9

Commit

4936ba3

1 Parent(s): 0c1a94e

Fix Phase 2: grader scores in (0,1), task IDs, and smoke tests

Browse files

Files changed (6) hide show

app.py +2 -1
data/tasks.json +3 -0
inference.py +3 -3
openenv.yaml +3 -3
smoke_test.py +16 -4
tasks.py +6 -4

app.py CHANGED Viewed

@@ -95,7 +95,7 @@ def grader(req: GraderRequest):
         max_steps=env.task_data.get("max_steps", 20),
     )
     return {
-        "score": score,
         "feedback": feedback,
         "breakdown": breakdown,
         "task_id": env.task_data.get("id", f"{env.difficulty}-0"),
@@ -120,6 +120,7 @@ def tasks():
                 "required_evidence": task.get("required_evidence", []),
                 "supports_actions": ["investigate", "correlate", "query_metrics", "check_logs", "conclude"],
                 "has_grader": True,
             })
     return {"tasks": task_list, "action_schema": action_schema}

         max_steps=env.task_data.get("max_steps", 20),
     )
     return {
+        "score": float(score),
         "feedback": feedback,
         "breakdown": breakdown,
         "task_id": env.task_data.get("id", f"{env.difficulty}-0"),
                 "required_evidence": task.get("required_evidence", []),
                 "supports_actions": ["investigate", "correlate", "query_metrics", "check_logs", "conclude"],
                 "has_grader": True,
+                "grader": {"enabled": True},
             })
     return {"tasks": task_list, "action_schema": action_schema}

data/tasks.json CHANGED Viewed

@@ -1,6 +1,7 @@
 {
   "easy": [
     {
       "alarms": [
         {
           "id": "INC-HMIS-Shahdara-J1.1K-PE-T4-NR-269506_1770963126.613",
@@ -152,6 +153,7 @@
   ],
   "medium": [
     {
       "alarms": [
         {
           "id": "INC-Baroda_House_MX-204-cfabf4bc_1771263950.9",
@@ -496,6 +498,7 @@
   ],
   "hard": [
     {
       "alarms": [
         {
           "id": "INC-GTL-C8201-T1-R2-P-3db6a007_1772517515.197",

 {
   "easy": [
     {
+      "id": "easy-0",
       "alarms": [
         {
           "id": "INC-HMIS-Shahdara-J1.1K-PE-T4-NR-269506_1770963126.613",
   ],
   "medium": [
     {
+      "id": "medium-0",
       "alarms": [
         {
           "id": "INC-Baroda_House_MX-204-cfabf4bc_1771263950.9",
   ],
   "hard": [
     {
+      "id": "hard-0",
       "alarms": [
         {
           "id": "INC-GTL-C8201-T1-R2-P-3db6a007_1772517515.197",

inference.py CHANGED Viewed

@@ -88,17 +88,17 @@ def run_episode(task_name: str = "network-rca", benchmark: str = "openenv") -> N
             step_idx = idx
             reward_value = float(score)
-            rewards.append(f"{reward_value:.2f}")
             error_value = _safe_error(info.get("last_action_error"))
             print(
                 f"[STEP] step={step_idx} action={action.action_type} "
-                f"reward={reward_value:.2f} done={_bool_str(step_idx == len(task_ids))} error={error_value}"
             )
         success = True
     except Exception as exc:
         print(
             f"[STEP] step={step_idx + 1} action=error "
-            f"reward=0.00 done=false error={_safe_error(exc)}"
         )
     finally:
         if hasattr(env, "close"):

             step_idx = idx
             reward_value = float(score)
+            rewards.append(f"{reward_value:.4f}")
             error_value = _safe_error(info.get("last_action_error"))
             print(
                 f"[STEP] step={step_idx} action={action.action_type} "
+                f"reward={reward_value:.4f} done={_bool_str(step_idx == len(task_ids))} error={error_value}"
             )
         success = True
     except Exception as exc:
         print(
             f"[STEP] step={step_idx + 1} action=error "
+            f"reward=0.0100 done=false error={_safe_error(exc)}"
         )
     finally:
         if hasattr(env, "close"):

openenv.yaml CHANGED Viewed

@@ -15,21 +15,21 @@ tasks:
     description: Diagnose a single node-down incident from alarms, topology, metrics, and logs.
     difficulty: easy
     max_steps: 10
-    reward_range: [0.0, 1.0]
     has_grader: true
   - id: medium-0
     name: OSPF neighbor cascade (medium)
     description: Correlate multiple OSPF neighbor alarms and identify the root cause with evidence.
     difficulty: medium
     max_steps: 10
-    reward_range: [0.0, 1.0]
     has_grader: true
   - id: hard-0
     name: Multi-link hard RCA (hard)
     description: Resolve a complex link-down scenario requiring metrics and logs on multiple devices.
     difficulty: hard
     max_steps: 10
-    reward_range: [0.0, 1.0]
     has_grader: true
 endpoints:

     description: Diagnose a single node-down incident from alarms, topology, metrics, and logs.
     difficulty: easy
     max_steps: 10
+    reward_range: [0.01, 0.99]
     has_grader: true
   - id: medium-0
     name: OSPF neighbor cascade (medium)
     description: Correlate multiple OSPF neighbor alarms and identify the root cause with evidence.
     difficulty: medium
     max_steps: 10
+    reward_range: [0.01, 0.99]
     has_grader: true
   - id: hard-0
     name: Multi-link hard RCA (hard)
     description: Resolve a complex link-down scenario requiring metrics and logs on multiple devices.
     difficulty: hard
     max_steps: 10
+    reward_range: [0.01, 0.99]
     has_grader: true
 endpoints:

smoke_test.py CHANGED Viewed

@@ -7,14 +7,16 @@ import urllib.request
 BASE_URL = "http://127.0.0.1:7860"
-def _request(method: str, path: str, body: dict | None = None) -> dict:
     data = None
     headers = {}
     if body is not None:
         data = json.dumps(body).encode("utf-8")
         headers["Content-Type"] = "application/json"
     req = urllib.request.Request(f"{BASE_URL}{path}", data=data, headers=headers, method=method)
-    with urllib.request.urlopen(req, timeout=20) as resp:
         payload = resp.read().decode("utf-8")
         return json.loads(payload) if payload else {}
@@ -39,7 +41,10 @@ def main() -> int:
         checks.append(("/state", isinstance(state.get("step_count"), int), "step_count missing"))
         tasks = _request("GET", "/tasks")
-        checks.append(("/tasks", len(tasks.get("tasks", [])) >= 3, "tasks < 3"))
         grader = _request("POST", "/grader", {"conclusion": "power outage"})
         score = grader.get("score")
@@ -47,7 +52,14 @@ def main() -> int:
         score_ok = isinstance(score, (int, float)) and 0.0 < float(score) < 1.0
         checks.append(("/grader", score_ok, "grader score must be strictly between 0 and 1"))
-        baseline = _request("GET", "/baseline")
         has_results = isinstance(baseline.get("results"), dict) and len(baseline["results"]) > 0
         checks.append(("/baseline", has_results, "baseline results missing"))
     except urllib.error.URLError as e:

 BASE_URL = "http://127.0.0.1:7860"
+def _request(
+    method: str, path: str, body: dict | None = None, *, timeout_s: float = 20.0
+) -> dict:
     data = None
     headers = {}
     if body is not None:
         data = json.dumps(body).encode("utf-8")
         headers["Content-Type"] = "application/json"
     req = urllib.request.Request(f"{BASE_URL}{path}", data=data, headers=headers, method=method)
+    with urllib.request.urlopen(req, timeout=timeout_s) as resp:
         payload = resp.read().decode("utf-8")
         return json.loads(payload) if payload else {}
         checks.append(("/state", isinstance(state.get("step_count"), int), "step_count missing"))
         tasks = _request("GET", "/tasks")
+        task_rows = tasks.get("tasks", [])
+        checks.append(("/tasks", len(task_rows) >= 3, "tasks < 3"))
+        graded = sum(1 for t in task_rows if t.get("has_grader") or (t.get("grader") or {}).get("enabled"))
+        checks.append(("/tasks", graded >= 3, f"tasks with graders < 3 (got {graded})"))
         grader = _request("POST", "/grader", {"conclusion": "power outage"})
         score = grader.get("score")
         score_ok = isinstance(score, (int, float)) and 0.0 < float(score) < 1.0
         checks.append(("/grader", score_ok, "grader score must be strictly between 0 and 1"))
+        for tid in ("easy-0", "medium-0", "hard-0"):
+            _request("POST", "/reset", {"difficulty": "easy", "task_id": tid})
+            g = _request("POST", "/grader", {"conclusion": "probe"})
+            sc = g.get("score")
+            ok = isinstance(sc, (int, float)) and 0.0 < float(sc) < 1.0
+            checks.append((f"/grader[{tid}]", ok, f"score out of (0,1): {sc!r}"))
+        baseline = _request("GET", "/baseline", timeout_s=180.0)
         has_results = isinstance(baseline.get("results"), dict) and len(baseline["results"]) > 0
         checks.append(("/baseline", has_results, "baseline results missing"))
     except urllib.error.URLError as e:

tasks.py CHANGED Viewed

@@ -12,7 +12,8 @@ with open(_TASKS_FILE, 'r') as f:
     _TASKS = json.load(f)
 # Phase-2 validators require grader scores strictly inside (0, 1), not 0.0 or 1.0.
-_GRADER_EPS = 1e-4
 def _clamp_grader_score_open_interval(score: float) -> float:
@@ -109,10 +110,11 @@ def grade_episode(
     final_score = max(0.0, min(1.0, final_score))
     final_score = _clamp_grader_score_open_interval(final_score)
     breakdown = {
-        "root_cause_score": round(root_score, 4),
-        "evidence_score": round(evidence_score, 4),
-        "efficiency_score": round(efficiency_score, 4),
         "weights": weights,
         "missing_evidence": missing_evidence,
     }

     _TASKS = json.load(f)
 # Phase-2 validators require grader scores strictly inside (0, 1), not 0.0 or 1.0.
+# Use a margin large enough to survive float/json round-trips and strict boundary checks.
+_GRADER_EPS = 0.01
 def _clamp_grader_score_open_interval(score: float) -> float:
     final_score = max(0.0, min(1.0, final_score))
     final_score = _clamp_grader_score_open_interval(final_score)
+    # Some validators scan all floats in the grader payload; keep components off 0.0/1.0 too.
     breakdown = {
+        "root_cause_score": round(_clamp_grader_score_open_interval(root_score), 4),
+        "evidence_score": round(_clamp_grader_score_open_interval(evidence_score), 4),
+        "efficiency_score": round(_clamp_grader_score_open_interval(efficiency_score), 4),
         "weights": weights,
         "missing_evidence": missing_evidence,
     }