XcodeAddy commited on
Commit
ca37eed
·
1 Parent(s): 5e5d652

Fix submission review issues

Browse files
Files changed (7) hide show
  1. CHANGELOG_AND_RUNBOOK.md +5 -5
  2. README.md +2 -2
  3. app.py +51 -23
  4. inference.py +9 -4
  5. pyproject.toml +4 -0
  6. tests/test_env.py +16 -1
  7. ui/assets/app.js +66 -26
CHANGELOG_AND_RUNBOOK.md CHANGED
@@ -5,7 +5,7 @@ This file explains what changed in the project and how to run or test each part.
5
  Project path:
6
 
7
  ```bash
8
- cd /Users/adityagaba/Downloads/incident-triage-env
9
  ```
10
 
11
  ## 1. What changed
@@ -126,7 +126,7 @@ These logs appear in the same terminal where `uvicorn` is running.
126
  Use port `8000` locally if port `7860` is busy.
127
 
128
  ```bash
129
- cd /Users/adityagaba/Downloads/incident-triage-env
130
  source .venv/bin/activate
131
  .venv/bin/python -m uvicorn app:app --host 127.0.0.1 --port 8000
132
  ```
@@ -372,7 +372,7 @@ Expected:
372
  ## 6. Run automated tests
373
 
374
  ```bash
375
- cd /Users/adityagaba/Downloads/incident-triage-env
376
  .venv/bin/python -m unittest discover -s tests -v
377
  ```
378
 
@@ -385,7 +385,7 @@ OK
385
  ## 7. Run OpenEnv local validation
386
 
387
  ```bash
388
- cd /Users/adityagaba/Downloads/incident-triage-env
389
  .venv/bin/openenv validate . --json
390
  ```
391
 
@@ -400,7 +400,7 @@ Expected:
400
  If the local app is running on port `8000`:
401
 
402
  ```bash
403
- cd /Users/adityagaba/Downloads/incident-triage-env
404
  ENV_URL=http://127.0.0.1:8000 .venv/bin/python inference.py
405
  ```
406
 
 
5
  Project path:
6
 
7
  ```bash
8
+ cd <repo-root>
9
  ```
10
 
11
  ## 1. What changed
 
126
  Use port `8000` locally if port `7860` is busy.
127
 
128
  ```bash
129
+ cd <repo-root>
130
  source .venv/bin/activate
131
  .venv/bin/python -m uvicorn app:app --host 127.0.0.1 --port 8000
132
  ```
 
372
  ## 6. Run automated tests
373
 
374
  ```bash
375
+ cd <repo-root>
376
  .venv/bin/python -m unittest discover -s tests -v
377
  ```
378
 
 
385
  ## 7. Run OpenEnv local validation
386
 
387
  ```bash
388
+ cd <repo-root>
389
  .venv/bin/openenv validate . --json
390
  ```
391
 
 
400
  If the local app is running on port `8000`:
401
 
402
  ```bash
403
+ cd <repo-root>
404
  ENV_URL=http://127.0.0.1:8000 .venv/bin/python inference.py
405
  ```
406
 
README.md CHANGED
@@ -95,7 +95,7 @@ The project also serves a browser-facing UI from the same FastAPI app:
95
 
96
  ## Models
97
 
98
- The core models are defined in [models.py](/Users/adityagaba/Downloads/incident-triage-env/models.py):
99
 
100
  - `IncidentObservation`
101
  - `IncidentAction`
@@ -112,7 +112,7 @@ Validation rules:
112
 
113
  ## Reward Logic
114
 
115
- Rewarding is deterministic and implemented in [graders.py](/Users/adityagaba/Downloads/incident-triage-env/graders.py).
116
 
117
  - `task1`: `1.0` exact, `0.5` adjacent severity, `0.0` far miss
118
  - `task2`: `1.0` exact, `0.5` related domain, `0.25` `UNKNOWN`, `0.0` wrong
 
95
 
96
  ## Models
97
 
98
+ The core models are defined in [models.py](./models.py):
99
 
100
  - `IncidentObservation`
101
  - `IncidentAction`
 
112
 
113
  ## Reward Logic
114
 
115
+ Rewarding is deterministic and implemented in [graders.py](./graders.py).
116
 
117
  - `task1`: `1.0` exact, `0.5` adjacent severity, `0.0` far miss
118
  - `task2`: `1.0` exact, `0.5` related domain, `0.25` `UNKNOWN`, `0.0` wrong
app.py CHANGED
@@ -2,6 +2,7 @@ import uuid
2
  from collections import Counter
3
  from pathlib import Path
4
  import sys
 
5
  from typing import Any
6
 
7
  from fastapi import FastAPI, HTTPException
@@ -25,7 +26,10 @@ UI_DIR = Path(__file__).parent / "ui"
25
  ASSETS_DIR = UI_DIR / "assets"
26
 
27
  # Session store: session_id -> IncidentEnv instance
 
28
  sessions: dict[str, IncidentEnv] = {}
 
 
29
  task_counts = Counter(ticket["task_type"] for ticket in TICKETS)
30
 
31
  app.mount("/assets", StaticFiles(directory=ASSETS_DIR), name="assets")
@@ -36,6 +40,14 @@ def log_event(event: str, **fields: Any) -> None:
36
  print(f"[{event}] {details}", file=sys.stderr, flush=True)
37
 
38
 
 
 
 
 
 
 
 
 
39
  @app.get("/", include_in_schema=False)
40
  def home_page():
41
  return FileResponse(UI_DIR / "index.html")
@@ -140,7 +152,10 @@ def reset(reset_request: ResetRequest | None = None):
140
  error=str(e),
141
  )
142
  raise HTTPException(status_code=400, detail=str(e))
143
- sessions[session_id] = env
 
 
 
144
  result.info["session_id"] = session_id
145
  result.info["state"] = env.state(session_id=session_id).model_dump()
146
  log_event(
@@ -155,17 +170,25 @@ def reset(reset_request: ResetRequest | None = None):
155
 
156
  @app.post("/step", response_model=StepResult)
157
  def step(action: IncidentAction, session_id: str):
158
- env = sessions.get(session_id)
159
- if not env:
160
- log_event("STEP_ERROR", session_id=session_id, error="session_not_found")
161
- raise HTTPException(status_code=404, detail="Session not found. Call /reset first.")
162
- try:
163
- result = env.step(action)
164
- except (RuntimeError, ValueError) as e:
165
- log_event("STEP_ERROR", session_id=session_id, incident_id=action.incident_id, error=str(e))
166
- raise HTTPException(status_code=400, detail=str(e))
167
- result.info["session_id"] = session_id
168
- result.info["state"] = env.state(session_id=session_id).model_dump()
 
 
 
 
 
 
 
 
169
  log_event(
170
  "STEP",
171
  session_id=session_id,
@@ -180,17 +203,22 @@ def step(action: IncidentAction, session_id: str):
180
 
181
  @app.get("/state", response_model=IncidentState)
182
  def state(session_id: str):
183
- env = sessions.get(session_id)
184
- if not env:
185
- log_event("STATE_ERROR", session_id=session_id, error="no_active_session")
186
- raise HTTPException(status_code=404, detail="No active session.")
187
- try:
188
- current_state = env.state(session_id=session_id)
189
- log_event("STATE", session_id=session_id, incident_id=current_state.incident_id, done=str(current_state.done).lower())
190
- return current_state
191
- except RuntimeError as e:
192
- log_event("STATE_ERROR", session_id=session_id, error=str(e))
193
- raise HTTPException(status_code=404, detail=str(e))
 
 
 
 
 
194
 
195
 
196
  @app.get("/grader")
 
2
  from collections import Counter
3
  from pathlib import Path
4
  import sys
5
+ from threading import RLock
6
  from typing import Any
7
 
8
  from fastapi import FastAPI, HTTPException
 
26
  ASSETS_DIR = UI_DIR / "assets"
27
 
28
  # Session store: session_id -> IncidentEnv instance
29
+ MAX_SESSIONS = 500
30
  sessions: dict[str, IncidentEnv] = {}
31
+ completed_states: dict[str, IncidentState] = {}
32
+ session_lock = RLock()
33
  task_counts = Counter(ticket["task_type"] for ticket in TICKETS)
34
 
35
  app.mount("/assets", StaticFiles(directory=ASSETS_DIR), name="assets")
 
40
  print(f"[{event}] {details}", file=sys.stderr, flush=True)
41
 
42
 
43
+ def evict_oldest(mapping: dict[str, Any], max_size: int) -> None:
44
+ while len(mapping) >= max_size:
45
+ oldest_key = next(iter(mapping), None)
46
+ if oldest_key is None:
47
+ return
48
+ mapping.pop(oldest_key, None)
49
+
50
+
51
  @app.get("/", include_in_schema=False)
52
  def home_page():
53
  return FileResponse(UI_DIR / "index.html")
 
152
  error=str(e),
153
  )
154
  raise HTTPException(status_code=400, detail=str(e))
155
+ with session_lock:
156
+ evict_oldest(sessions, MAX_SESSIONS)
157
+ evict_oldest(completed_states, MAX_SESSIONS)
158
+ sessions[session_id] = env
159
  result.info["session_id"] = session_id
160
  result.info["state"] = env.state(session_id=session_id).model_dump()
161
  log_event(
 
170
 
171
  @app.post("/step", response_model=StepResult)
172
  def step(action: IncidentAction, session_id: str):
173
+ with session_lock:
174
+ env = sessions.get(session_id)
175
+ if not env:
176
+ if session_id in completed_states:
177
+ log_event("STEP_ERROR", session_id=session_id, error="episode_already_completed")
178
+ raise HTTPException(status_code=400, detail="Episode already completed. Call reset() to start a new one.")
179
+ log_event("STEP_ERROR", session_id=session_id, error="session_not_found")
180
+ raise HTTPException(status_code=404, detail="Session not found. Call /reset first.")
181
+ try:
182
+ result = env.step(action)
183
+ except (RuntimeError, ValueError) as e:
184
+ log_event("STEP_ERROR", session_id=session_id, incident_id=action.incident_id, error=str(e))
185
+ raise HTTPException(status_code=400, detail=str(e))
186
+ result.info["session_id"] = session_id
187
+ current_state = env.state(session_id=session_id)
188
+ result.info["state"] = current_state.model_dump()
189
+ if result.done:
190
+ completed_states[session_id] = current_state
191
+ sessions.pop(session_id, None)
192
  log_event(
193
  "STEP",
194
  session_id=session_id,
 
203
 
204
  @app.get("/state", response_model=IncidentState)
205
  def state(session_id: str):
206
+ with session_lock:
207
+ env = sessions.get(session_id)
208
+ if not env:
209
+ completed_state = completed_states.get(session_id)
210
+ if completed_state:
211
+ log_event("STATE", session_id=session_id, incident_id=completed_state.incident_id, done=str(completed_state.done).lower())
212
+ return completed_state
213
+ log_event("STATE_ERROR", session_id=session_id, error="no_active_session")
214
+ raise HTTPException(status_code=404, detail="No active session.")
215
+ try:
216
+ current_state = env.state(session_id=session_id)
217
+ log_event("STATE", session_id=session_id, incident_id=current_state.incident_id, done=str(current_state.done).lower())
218
+ return current_state
219
+ except RuntimeError as e:
220
+ log_event("STATE_ERROR", session_id=session_id, error=str(e))
221
+ raise HTTPException(status_code=404, detail=str(e))
222
 
223
 
224
  @app.get("/grader")
inference.py CHANGED
@@ -325,6 +325,7 @@ def run_episode(
325
  steps_taken = 0
326
  score = 0.0
327
  success = False
 
328
 
329
  log_start(task=ticket["incident_id"], env=BENCHMARK, model=active_model_name(model_client))
330
 
@@ -349,9 +350,8 @@ def run_episode(
349
  done=bool(step_data.get("done", True)),
350
  error=None,
351
  )
352
- log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
353
 
354
- return {
355
  "incident_id": ticket["incident_id"],
356
  "task_type": ticket["task_type"],
357
  "difficulty": observation.get("difficulty"),
@@ -362,14 +362,19 @@ def run_episode(
362
  }
363
  except Exception as exc:
364
  log_step(step=max(steps_taken, 1), action="error", reward=0.0, done=True, error=str(exc))
365
- log_end(success=False, steps=steps_taken, score=0.0, rewards=rewards)
366
- return {
 
367
  "incident_id": ticket["incident_id"],
368
  "task_type": ticket["task_type"],
369
  "score": 0.0,
370
  "success": False,
371
  "error": str(exc),
372
  }
 
 
 
 
373
 
374
 
375
  def write_results(results: List[Dict[str, Any]]) -> None:
 
325
  steps_taken = 0
326
  score = 0.0
327
  success = False
328
+ episode_result: Dict[str, Any]
329
 
330
  log_start(task=ticket["incident_id"], env=BENCHMARK, model=active_model_name(model_client))
331
 
 
350
  done=bool(step_data.get("done", True)),
351
  error=None,
352
  )
 
353
 
354
+ episode_result = {
355
  "incident_id": ticket["incident_id"],
356
  "task_type": ticket["task_type"],
357
  "difficulty": observation.get("difficulty"),
 
362
  }
363
  except Exception as exc:
364
  log_step(step=max(steps_taken, 1), action="error", reward=0.0, done=True, error=str(exc))
365
+ score = 0.0
366
+ success = False
367
+ episode_result = {
368
  "incident_id": ticket["incident_id"],
369
  "task_type": ticket["task_type"],
370
  "score": 0.0,
371
  "success": False,
372
  "error": str(exc),
373
  }
374
+ finally:
375
+ log_end(success=success, steps=max(steps_taken, 1), score=score, rewards=rewards or [0.0])
376
+
377
+ return episode_result
378
 
379
 
380
  def write_results(results: List[Dict[str, Any]]) -> None:
pyproject.toml CHANGED
@@ -37,3 +37,7 @@ py-modules = [
37
  "inference",
38
  "models",
39
  ]
 
 
 
 
 
37
  "inference",
38
  "models",
39
  ]
40
+
41
+ [tool.setuptools.packages.find]
42
+ where = ["."]
43
+ include = ["server*"]
tests/test_env.py CHANGED
@@ -2,16 +2,18 @@ import unittest
2
 
3
  from fastapi.testclient import TestClient
4
 
5
- from app import app, sessions
6
 
7
 
8
  class IncidentEnvApiTests(unittest.TestCase):
9
  def setUp(self) -> None:
10
  sessions.clear()
 
11
  self.client = TestClient(app)
12
 
13
  def tearDown(self) -> None:
14
  sessions.clear()
 
15
 
16
  def test_health_schema_and_mcp_helper_endpoints(self) -> None:
17
  health_response = self.client.get("/health")
@@ -101,6 +103,19 @@ class IncidentEnvApiTests(unittest.TestCase):
101
  self.assertTrue(state_body["done"])
102
  self.assertEqual(state_body["status"], "completed")
103
  self.assertEqual(state_body["last_reward"], 1.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
104
 
105
  def test_step_rejects_action_for_wrong_task_type(self) -> None:
106
  reset_response = self.client.post(
 
2
 
3
  from fastapi.testclient import TestClient
4
 
5
+ from app import app, completed_states, sessions
6
 
7
 
8
  class IncidentEnvApiTests(unittest.TestCase):
9
  def setUp(self) -> None:
10
  sessions.clear()
11
+ completed_states.clear()
12
  self.client = TestClient(app)
13
 
14
  def tearDown(self) -> None:
15
  sessions.clear()
16
+ completed_states.clear()
17
 
18
  def test_health_schema_and_mcp_helper_endpoints(self) -> None:
19
  health_response = self.client.get("/health")
 
103
  self.assertTrue(state_body["done"])
104
  self.assertEqual(state_body["status"], "completed")
105
  self.assertEqual(state_body["last_reward"], 1.0)
106
+ self.assertNotIn(session_id, sessions)
107
+ self.assertIn(session_id, completed_states)
108
+
109
+ repeated_step_response = self.client.post(
110
+ f"/step?session_id={session_id}",
111
+ json={
112
+ "incident_id": "INC-014",
113
+ "task_type": "task3",
114
+ "action": "FAILOVER",
115
+ },
116
+ )
117
+ self.assertEqual(repeated_step_response.status_code, 400)
118
+ self.assertIn("already completed", repeated_step_response.json()["detail"])
119
 
120
  def test_step_rejects_action_for_wrong_task_type(self) -> None:
121
  reset_response = self.client.post(
ui/assets/app.js CHANGED
@@ -13,6 +13,13 @@ function safeText(value) {
13
  return value == null ? "--" : String(value);
14
  }
15
 
 
 
 
 
 
 
 
16
  function setHealthPill(status) {
17
  const pills = document.querySelectorAll("[data-health-pill]");
18
  pills.forEach((pill) => {
@@ -23,22 +30,36 @@ function setHealthPill(status) {
23
 
24
  function renderTaskCards(target, tasks) {
25
  if (!target) return;
26
- target.innerHTML = "";
27
  Object.entries(tasks).forEach(([taskId, task]) => {
28
  const article = document.createElement("article");
29
  article.className = "task-card";
30
- article.innerHTML = `
31
- <span class="badge difficulty-${task.difficulty}">${task.difficulty}</span>
32
- <h3>${task.name}</h3>
33
- <p>Expected field: <strong>${task.expected_field || task.output_field}</strong></p>
34
- <div class="task-meta">
35
- <span class="badge">${taskId}</span>
36
- <span class="badge">${task.ticket_count || 0} incidents</span>
37
- </div>
38
- <div class="task-values">
39
- ${(task.allowed_values || task.labels || []).map((value) => `<span class="badge">${value}</span>`).join("")}
40
- </div>
41
- `;
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
  target.appendChild(article);
43
  });
44
  }
@@ -69,15 +90,21 @@ async function initStatus() {
69
  renderTaskCards(document.querySelector("[data-task-grid]"), metadata.tasks);
70
 
71
  const schemaGrid = document.querySelector("[data-schema-grid]");
72
- schemaGrid.innerHTML = Object.keys(schema)
73
- .map((name) => `<span class="badge">${name}</span>`)
74
- .join("");
 
75
 
76
  document.querySelector("[data-grader-summary]").textContent = grader.scoring;
77
  const graderList = document.querySelector("[data-grader-list]");
78
- graderList.innerHTML = Object.entries(grader.tasks)
79
- .map(([task, rule]) => `<li><strong>${task}</strong>: ${rule}</li>`)
80
- .join("");
 
 
 
 
 
81
  }
82
 
83
  function buildActionPayload(observation, selectedValue) {
@@ -160,9 +187,13 @@ async function initPlayground() {
160
  try {
161
  const ticketData = await fetchJson("/tickets");
162
  validTickets = ticketData.tickets || [];
163
- ticketOptions.innerHTML = validTickets
164
- .map((ticket) => `<option value="${ticket.incident_id}" label="${ticket.task_type} / ${ticket.task_name}"></option>`)
165
- .join("");
 
 
 
 
166
  ticketHelper.textContent = `Valid ticket range: ${validTickets[0]?.incident_id || "--"} to ${validTickets.at(-1)?.incident_id || "--"}.`;
167
  } catch (error) {
168
  ticketHelper.textContent = `Could not load ticket list: ${error.message}`;
@@ -217,9 +248,13 @@ async function initPlayground() {
217
  expectedFieldInput.value = observation.expected_field;
218
  actionValueSelect.disabled = false;
219
  stepButton.disabled = false;
220
- actionValueSelect.innerHTML = observation.allowed_values
221
- .map((value) => `<option value="${value}">${value}</option>`)
222
- .join("");
 
 
 
 
223
 
224
  setOutput(observationOutput, result);
225
  setOutput(resultOutput, "No step submitted yet.");
@@ -282,7 +317,12 @@ async function bootstrap() {
282
  const pageShell = document.querySelector(".page-shell");
283
  const banner = document.createElement("div");
284
  banner.className = "floating-panel";
285
- banner.innerHTML = `<strong>UI data load failed.</strong><p class="status-helper">${error.message}</p>`;
 
 
 
 
 
286
  pageShell?.prepend(banner);
287
  }
288
  }
 
13
  return value == null ? "--" : String(value);
14
  }
15
 
16
+ function createBadge(text, extraClass = "") {
17
+ const badge = document.createElement("span");
18
+ badge.className = extraClass ? `badge ${extraClass}` : "badge";
19
+ badge.textContent = safeText(text);
20
+ return badge;
21
+ }
22
+
23
  function setHealthPill(status) {
24
  const pills = document.querySelectorAll("[data-health-pill]");
25
  pills.forEach((pill) => {
 
30
 
31
  function renderTaskCards(target, tasks) {
32
  if (!target) return;
33
+ target.replaceChildren();
34
  Object.entries(tasks).forEach(([taskId, task]) => {
35
  const article = document.createElement("article");
36
  article.className = "task-card";
37
+
38
+ const difficultyClass = `difficulty-${safeText(task.difficulty).toLowerCase().replace(/[^a-z0-9_-]/g, "")}`;
39
+ const difficulty = createBadge(task.difficulty, difficultyClass);
40
+ const title = document.createElement("h3");
41
+ title.textContent = safeText(task.name);
42
+
43
+ const expectedField = document.createElement("p");
44
+ expectedField.append("Expected field: ");
45
+ const expectedFieldValue = document.createElement("strong");
46
+ expectedFieldValue.textContent = safeText(task.expected_field || task.output_field);
47
+ expectedField.appendChild(expectedFieldValue);
48
+
49
+ const taskMeta = document.createElement("div");
50
+ taskMeta.className = "task-meta";
51
+ taskMeta.append(
52
+ createBadge(taskId),
53
+ createBadge(`${task.ticket_count || 0} incidents`),
54
+ );
55
+
56
+ const taskValues = document.createElement("div");
57
+ taskValues.className = "task-values";
58
+ (task.allowed_values || task.labels || []).forEach((value) => {
59
+ taskValues.appendChild(createBadge(value));
60
+ });
61
+
62
+ article.append(difficulty, title, expectedField, taskMeta, taskValues);
63
  target.appendChild(article);
64
  });
65
  }
 
90
  renderTaskCards(document.querySelector("[data-task-grid]"), metadata.tasks);
91
 
92
  const schemaGrid = document.querySelector("[data-schema-grid]");
93
+ schemaGrid.replaceChildren();
94
+ Object.keys(schema).forEach((name) => {
95
+ schemaGrid.appendChild(createBadge(name));
96
+ });
97
 
98
  document.querySelector("[data-grader-summary]").textContent = grader.scoring;
99
  const graderList = document.querySelector("[data-grader-list]");
100
+ graderList.replaceChildren();
101
+ Object.entries(grader.tasks).forEach(([task, rule]) => {
102
+ const item = document.createElement("li");
103
+ const taskName = document.createElement("strong");
104
+ taskName.textContent = task;
105
+ item.append(taskName, `: ${safeText(rule)}`);
106
+ graderList.appendChild(item);
107
+ });
108
  }
109
 
110
  function buildActionPayload(observation, selectedValue) {
 
187
  try {
188
  const ticketData = await fetchJson("/tickets");
189
  validTickets = ticketData.tickets || [];
190
+ ticketOptions.replaceChildren();
191
+ validTickets.forEach((ticket) => {
192
+ const option = document.createElement("option");
193
+ option.value = safeText(ticket.incident_id);
194
+ option.label = `${safeText(ticket.task_type)} / ${safeText(ticket.task_name)}`;
195
+ ticketOptions.appendChild(option);
196
+ });
197
  ticketHelper.textContent = `Valid ticket range: ${validTickets[0]?.incident_id || "--"} to ${validTickets.at(-1)?.incident_id || "--"}.`;
198
  } catch (error) {
199
  ticketHelper.textContent = `Could not load ticket list: ${error.message}`;
 
248
  expectedFieldInput.value = observation.expected_field;
249
  actionValueSelect.disabled = false;
250
  stepButton.disabled = false;
251
+ actionValueSelect.replaceChildren();
252
+ observation.allowed_values.forEach((value) => {
253
+ const option = document.createElement("option");
254
+ option.value = safeText(value);
255
+ option.textContent = safeText(value);
256
+ actionValueSelect.appendChild(option);
257
+ });
258
 
259
  setOutput(observationOutput, result);
260
  setOutput(resultOutput, "No step submitted yet.");
 
317
  const pageShell = document.querySelector(".page-shell");
318
  const banner = document.createElement("div");
319
  banner.className = "floating-panel";
320
+ const title = document.createElement("strong");
321
+ title.textContent = "UI data load failed.";
322
+ const detail = document.createElement("p");
323
+ detail.className = "status-helper";
324
+ detail.textContent = error.message;
325
+ banner.append(title, detail);
326
  pageShell?.prepend(banner);
327
  }
328
  }