Mist-ic commited on
Commit
b971f92
·
1 Parent(s): 33fb36b

Fix Phase 2: add server readiness check and wrap all httpx calls in try/except

Browse files
Files changed (1) hide show
  1. inference.py +44 -19
inference.py CHANGED
@@ -236,6 +236,21 @@ def parse_action(response_text: str) -> Dict[str, Any]:
236
  # ---------------------------------------------------------------------------
237
 
238
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
239
  def run_episode(
240
  client: OpenAI,
241
  task_id: str,
@@ -245,6 +260,9 @@ def run_episode(
245
 
246
  base = ENV_URL.rstrip("/")
247
 
 
 
 
248
  # Reset environment
249
  reset_resp = httpx.post(
250
  f"{base}/reset",
@@ -306,14 +324,15 @@ def run_episode(
306
 
307
  print(f" Step {step_num}: {act_type}({act_params})", flush=True)
308
 
309
- step_resp = httpx.post(
310
- f"{base}/step",
311
- json={"action": {"action_type": act_type, "params": act_params}},
312
- timeout=30.0,
313
- )
314
  try:
 
 
 
 
 
315
  resp_data = step_resp.json()
316
- except Exception:
 
317
  resp_data = {}
318
 
319
  obs = resp_data.get("observation", resp_data)
@@ -337,19 +356,25 @@ def run_episode(
337
  tried_actions[act_type].append(entry)
338
 
339
  # Grade the episode
340
- final_state = httpx.get(f"{base}/state", timeout=10.0).json()
341
- grade = httpx.post(
342
- f"{base}/grader",
343
- json={
344
- "final_slo_score": final_state.get("global_slo_score", 0.0),
345
- "steps_taken": final_state.get("step_count", 0),
346
- "max_steps": max_steps,
347
- "actions_taken": obs.get("actions_taken", []),
348
- "terminated": final_state.get("terminated", True),
349
- "termination_reason": final_state.get("termination_reason"),
350
- },
351
- timeout=10.0,
352
- ).json()
 
 
 
 
 
 
353
 
354
  score = grade.get("score", 0.0)
355
  outcome = final_state.get("termination_reason", "timeout")
 
236
  # ---------------------------------------------------------------------------
237
 
238
 
239
+ def _wait_for_server(base: str, max_wait: int = 90) -> None:
240
+ """Poll /health until server is ready or timeout."""
241
+ import httpx, time
242
+ deadline = time.time() + max_wait
243
+ while time.time() < deadline:
244
+ try:
245
+ r = httpx.get(f"{base}/health", timeout=5.0)
246
+ if r.status_code == 200:
247
+ return
248
+ except Exception:
249
+ pass
250
+ time.sleep(3)
251
+ raise RuntimeError(f"Server at {base} not ready after {max_wait}s")
252
+
253
+
254
  def run_episode(
255
  client: OpenAI,
256
  task_id: str,
 
260
 
261
  base = ENV_URL.rstrip("/")
262
 
263
+ # Wait for server to be ready (handles startup race condition)
264
+ _wait_for_server(base)
265
+
266
  # Reset environment
267
  reset_resp = httpx.post(
268
  f"{base}/reset",
 
324
 
325
  print(f" Step {step_num}: {act_type}({act_params})", flush=True)
326
 
 
 
 
 
 
327
  try:
328
+ step_resp = httpx.post(
329
+ f"{base}/step",
330
+ json={"action": {"action_type": act_type, "params": act_params}},
331
+ timeout=30.0,
332
+ )
333
  resp_data = step_resp.json()
334
+ except Exception as e:
335
+ print(f" [step error] {e}", flush=True)
336
  resp_data = {}
337
 
338
  obs = resp_data.get("observation", resp_data)
 
356
  tried_actions[act_type].append(entry)
357
 
358
  # Grade the episode
359
+ try:
360
+ final_state = httpx.get(f"{base}/state", timeout=10.0).json()
361
+ except Exception:
362
+ final_state = {}
363
+ try:
364
+ grade = httpx.post(
365
+ f"{base}/grader",
366
+ json={
367
+ "final_slo_score": final_state.get("global_slo_score", 0.0),
368
+ "steps_taken": final_state.get("step_count", 0),
369
+ "max_steps": max_steps,
370
+ "actions_taken": obs.get("actions_taken", []),
371
+ "terminated": final_state.get("terminated", True),
372
+ "termination_reason": final_state.get("termination_reason"),
373
+ },
374
+ timeout=10.0,
375
+ ).json()
376
+ except Exception:
377
+ grade = {}
378
 
379
  score = grade.get("score", 0.0)
380
  outcome = final_state.get("termination_reason", "timeout")