| """ |
| SepsisPilot β Inference Script |
| Meta PyTorch OpenEnv Hackathon 2026 |
| |
| STDOUT FORMAT (exact spec β do not modify): |
| [START] task=<task_name> env=<benchmark> model=<model_name> |
| [STEP] step=<n> action=<action_str> reward=<0.00> done=<true|false> error=<msg|null> |
| [END] success=<true|false> steps=<n> score=<score> rewards=<r1,r2,...,rn> |
| |
| Environment variables: |
| HF_TOKEN β HuggingFace / API key (used as OpenAI API key) |
| API_KEY β fallback API key if HF_TOKEN not set |
| API_BASE_URL β LLM endpoint (default: https://router.huggingface.co/v1) |
| MODEL_NAME β model identifier (default: Qwen/Qwen2.5-72B-Instruct) |
| LOCAL_IMAGE_NAME β Docker image name if using from_docker_image() |
| ENV_BASE_URL β SepsisPilot server URL (default: http://localhost:7860) |
| |
| Usage: |
| python inference.py |
| python inference.py --task mild_sepsis |
| python inference.py --episodes 3 --seed 42 |
| """ |
|
|
| from __future__ import annotations |
|
|
| import argparse |
| import json |
| import os |
| import sys |
| import time |
| from typing import Any, Dict, List, Optional |
|
|
| import requests |
| from openai import OpenAI |
|
|
| |
| |
| |
| |
|
|
| API_KEY = os.getenv("HF_TOKEN") or os.getenv("API_KEY") or "" |
| API_BASE_URL = os.getenv("API_BASE_URL") or "https://router.huggingface.co/v1" |
| MODEL_NAME = os.getenv("MODEL_NAME") or "Qwen/Qwen2.5-72B-Instruct" |
| LOCAL_IMAGE_NAME = os.getenv("LOCAL_IMAGE_NAME", "") |
| ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://localhost:7860") |
|
|
| BENCHMARK = "sepsis_pilot" |
| TASKS = ["mild_sepsis", "septic_shock", "severe_mods"] |
| MAX_STEPS_MAP = {"mild_sepsis": 24, "septic_shock": 48, "severe_mods": 72} |
|
|
| |
| MAX_RUNTIME_SECONDS = 18 * 60 |
| LLM_CALL_DELAY = 3 |
|
|
| |
| ACTION_NAMES = { |
| 0: "no_treatment", |
| 1: "broad_antibiotics", |
| 2: "narrow_antibiotics", |
| 3: "low_vasopressor", |
| 4: "high_vasopressor", |
| 5: "broad_ab_low_vaso", |
| 6: "broad_ab_high_vaso", |
| 7: "narrow_ab_low_vaso", |
| 8: "narrow_ab_high_vaso", |
| } |
|
|
| |
| |
| |
| |
|
|
| def build_llm_client() -> OpenAI: |
| return OpenAI( |
| api_key=API_KEY or "dummy", |
| base_url=API_BASE_URL, |
| timeout=10.0, |
| max_retries=0, |
| ) |
|
|
| |
| |
| |
|
|
| def env_reset(task: str, seed: int) -> Dict[str, Any]: |
| resp = requests.post( |
| f"{ENV_BASE_URL}/reset", |
| json={"task": task, "seed": seed}, |
| timeout=15, |
| ) |
| resp.raise_for_status() |
| return resp.json() |
|
|
|
|
| def env_step(action: int) -> Dict[str, Any]: |
| resp = requests.post( |
| f"{ENV_BASE_URL}/step", |
| json={"action": action}, |
| timeout=15, |
| ) |
| resp.raise_for_status() |
| return resp.json() |
|
|
|
|
| def env_grade() -> Dict[str, Any]: |
| resp = requests.get(f"{ENV_BASE_URL}/grade", timeout=15) |
| resp.raise_for_status() |
| return resp.json() |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| def heuristic_action(state: Dict[str, Any], task: str, step: int) -> int: |
| v = state["vitals"] |
| map_val = v["map_mmhg"] |
| lactate = v["lactate"] |
| creatinine = v["creatinine"] |
| wbc = v["wbc"] |
| temp = v["temperature"] |
| hr = v["heart_rate"] |
|
|
| if task == "mild_sepsis": |
| fully_stable = ( |
| map_val >= 70 and lactate <= 2.0 |
| and wbc <= 12.0 and temp <= 38.0 and hr <= 100 |
| ) |
| return 1 if fully_stable else 5 |
|
|
| elif task == "septic_shock": |
| fully_stable = map_val >= 72 and lactate <= 2.0 and wbc <= 12.0 |
| if fully_stable: |
| return 2 |
| if map_val < 58: |
| return 8 if creatinine < 2.2 else 7 |
| return 7 |
|
|
| elif task == "severe_mods": |
| if step == 1: |
| return 6 |
| if step == 2: |
| return 8 |
| return 8 if map_val < 50 else 7 |
|
|
| return 5 |
|
|
|
|
| |
| |
| |
|
|
| SYSTEM_PROMPT = """\ |
| You are an ICU physician treating a sepsis patient in a simulation. |
| Choose exactly ONE action integer (0-8) based on patient vitals. |
| |
| ACTIONS: |
| 0=no_treatment 1=broad_ab 2=narrow_ab 3=low_vaso 4=high_vaso |
| 5=broad_ab+low_vaso 6=broad_ab+high_vaso 7=narrow_ab+low_vaso 8=narrow_ab+high_vaso |
| |
| RULES BY TASK: |
| - mild_sepsis (gram-negative): always action 5 until stable, then 1. Never narrow AB. |
| - septic_shock (gram-positive): always narrow AB (2,7,8). Never broad. Use vaso if MAP<65. |
| - severe_mods (mixed): step1=6, step2=8, then 7 unless MAP<50 then 8. |
| |
| Respond ONLY with JSON: {"action": <0-8>, "reasoning": "<one sentence>"} |
| """ |
|
|
| def build_state_prompt(state: Dict[str, Any], step: int) -> str: |
| v = state["vitals"] |
| return ( |
| f"TASK={state.get('task','')} STEP={step}/{state['max_steps']} " |
| f"MAP={v['map_mmhg']:.1f}({'CRIT' if v['map_mmhg']<65 else 'OK'}) " |
| f"Lactate={v['lactate']:.2f}({'HIGH' if v['lactate']>2 else 'OK'}) " |
| f"WBC={v['wbc']:.1f} Creatinine={v['creatinine']:.2f} " |
| f"SOFA={v['sofa_score']:.1f} Resistance={v['resistance']:.3f}\n" |
| f'Reply ONLY with JSON: {{"action": N, "reasoning": "..."}}' |
| ) |
|
|
|
|
| def llm_action( |
| client: OpenAI, |
| state: Dict[str, Any], |
| task: str, |
| step: int, |
| history: list, |
| script_start: float, |
| ) -> int: |
| """Try LLM call. Return heuristic if anything goes wrong or time is running out.""" |
| if time.time() - script_start > MAX_RUNTIME_SECONDS: |
| sys.stderr.write(f"[RUNTIME GUARD] switching to heuristic-only\n") |
| return heuristic_action(state, task, step) |
|
|
| prompt = build_state_prompt(state, step) |
| history.append({"role": "user", "content": prompt}) |
|
|
| try: |
| time.sleep(LLM_CALL_DELAY) |
| response = client.chat.completions.create( |
| model=MODEL_NAME, |
| messages=[{"role": "system", "content": SYSTEM_PROMPT}] + history[-6:], |
| max_tokens=80, |
| temperature=0.1, |
| ) |
| raw = response.choices[0].message.content.strip() |
| clean = raw.replace("```json", "").replace("```", "").strip() |
| parsed = json.loads(clean) |
| action = int(parsed["action"]) |
|
|
| if not (0 <= action <= 8): |
| raise ValueError(f"action {action} out of range") |
|
|
| history.append({"role": "assistant", "content": raw}) |
| sys.stderr.write(f"[LLM] step={step} action={action}\n") |
| return action |
|
|
| except Exception as exc: |
| sys.stderr.write(f"[LLM FALLBACK] step={step} {exc}\n") |
| return heuristic_action(state, task, step) |
|
|
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| def run_episode( |
| client: OpenAI, |
| task: str, |
| episode: int, |
| seed: int, |
| script_start: float, |
| ) -> float: |
|
|
| |
| print(f"[START] task={task} env={BENCHMARK} model={MODEL_NAME}", flush=True) |
|
|
| state = env_reset(task, seed) |
| history: list = [] |
| rewards: List[float] = [] |
| step = 0 |
| done = False |
| last_error = "null" |
|
|
| while not done: |
| current_step = state.get("step", step) + 1 |
| action_int = llm_action(client, state, task, current_step, history, script_start) |
| action_str = ACTION_NAMES.get(action_int, str(action_int)) |
|
|
| try: |
| result = env_step(action_int) |
| step = result["state"]["step"] |
| reward = result["reward"] |
| done = result["done"] |
| state = result["state"] |
| last_error = "null" |
| except Exception as e: |
| last_error = str(e).replace("\n", " ") |
| reward = 0.0 |
| done = True |
|
|
| rewards.append(reward) |
| done_str = "true" if done else "false" |
|
|
| |
| print( |
| f"[STEP] step={step} action={action_str} " |
| f"reward={reward:.2f} done={done_str} error={last_error}", |
| flush=True, |
| ) |
|
|
| if done: |
| break |
|
|
| |
| final_score = 0.0 |
| success = False |
| try: |
| grade_result = env_grade() |
| final_score = grade_result["score"] |
| success = grade_result.get("passed", final_score >= 0.5) |
| sys.stderr.write( |
| f"[GRADE] task={task} ep={episode} score={final_score:.4f} " |
| f"| {grade_result.get('reason','')}\n" |
| f" {grade_result.get('metrics',{})}\n\n" |
| ) |
| except Exception as e: |
| sys.stderr.write(f"[GRADE ERROR] {e}\n") |
|
|
| rewards_str = ",".join(f"{r:.2f}" for r in rewards) |
| success_str = "true" if success else "false" |
|
|
| |
| print( |
| f"[END] success={success_str} steps={step} " |
| f"score={final_score:.2f} rewards={rewards_str}", |
| flush=True, |
| ) |
|
|
| return final_score |
|
|
|
|
| |
| |
| |
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="SepsisPilot Inference β OpenEnv Hackathon 2026") |
| parser.add_argument("--episodes", type=int, default=1) |
| parser.add_argument("--seed", type=int, default=42) |
| parser.add_argument("--task", type=str, default=None, |
| help="Run one task only: mild_sepsis | septic_shock | severe_mods") |
| args = parser.parse_args() |
|
|
| if not API_KEY: |
| sys.stderr.write("[WARN] HF_TOKEN/API_KEY not set β LLM calls will fail, heuristic will run.\n") |
|
|
| client = build_llm_client() |
| script_start = time.time() |
|
|
| sys.stderr.write( |
| f"[CONFIG] API_BASE_URL={API_BASE_URL} MODEL={MODEL_NAME} " |
| f"HF_TOKEN={'set' if API_KEY else 'NOT SET'} " |
| f"LOCAL_IMAGE={LOCAL_IMAGE_NAME or 'not set'}\n\n" |
| ) |
|
|
| tasks_to_run = [args.task] if args.task else TASKS |
| all_scores: Dict[str, list] = {} |
|
|
| for task in tasks_to_run: |
| all_scores[task] = [] |
| for ep in range(1, args.episodes + 1): |
| score = run_episode(client, task, ep, args.seed + ep, script_start) |
| all_scores[task].append(score) |
|
|
| elapsed = time.time() - script_start |
| sys.stderr.write(f"\n=== Summary (runtime: {elapsed:.1f}s / {MAX_RUNTIME_SECONDS}s max) ===\n") |
| for task, scores in all_scores.items(): |
| avg = sum(scores) / len(scores) if scores else 0.0 |
| sys.stderr.write(f" {task}: avg_score={avg:.4f} over {len(scores)} episode(s)\n") |
|
|
|
|
| if __name__ == "__main__": |
| main() |