Spaces:
Running
Running
ananya173147 commited on
Commit ·
6d63ca6
1
Parent(s): 877b3d1
Fix stateful UI session, enrich FHIR cache, fix Dockerfile
Browse files- Dockerfile +34 -65
- data/fhir_cache.json +0 -0
- server/app.py +44 -0
- ui/index.html +20 -7
Dockerfile
CHANGED
|
@@ -1,81 +1,50 @@
|
|
| 1 |
-
#
|
| 2 |
-
# All rights reserved.
|
| 3 |
#
|
| 4 |
-
#
|
| 5 |
-
#
|
| 6 |
-
|
| 7 |
-
#
|
| 8 |
-
#
|
| 9 |
-
# - In-repo environments (with local OpenEnv sources)
|
| 10 |
-
# - Standalone environments (with openenv from PyPI/Git)
|
| 11 |
-
# The build script (openenv build) handles context detection and sets appropriate build args.
|
| 12 |
|
| 13 |
-
|
| 14 |
-
FROM ${BASE_IMAGE} AS builder
|
| 15 |
|
| 16 |
-
|
| 17 |
|
| 18 |
-
#
|
| 19 |
-
RUN apt-get update && \
|
| 20 |
-
|
| 21 |
rm -rf /var/lib/apt/lists/*
|
| 22 |
|
| 23 |
-
#
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
# Copy environment code (always at root of build context)
|
| 28 |
-
COPY . /app/env
|
| 29 |
-
|
| 30 |
-
# For in-repo builds, openenv is already vendored in the build context
|
| 31 |
-
# For standalone builds, openenv will be installed via pyproject.toml
|
| 32 |
-
WORKDIR /app/env
|
| 33 |
-
|
| 34 |
-
# Ensure uv is available (for local builds where base image lacks it)
|
| 35 |
-
RUN if ! command -v uv >/dev/null 2>&1; then \
|
| 36 |
-
curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 37 |
-
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 38 |
-
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
|
| 39 |
-
fi
|
| 40 |
-
|
| 41 |
-
# Install dependencies using uv sync
|
| 42 |
-
# If uv.lock exists, use it; otherwise resolve on the fly
|
| 43 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 44 |
-
if [ -f uv.lock ]; then \
|
| 45 |
-
uv sync --frozen --no-install-project --no-editable; \
|
| 46 |
-
else \
|
| 47 |
-
uv sync --no-install-project --no-editable; \
|
| 48 |
-
fi
|
| 49 |
-
|
| 50 |
-
RUN --mount=type=cache,target=/root/.cache/uv \
|
| 51 |
-
if [ -f uv.lock ]; then \
|
| 52 |
-
uv sync --frozen --no-editable; \
|
| 53 |
-
else \
|
| 54 |
-
uv sync --no-editable; \
|
| 55 |
-
fi
|
| 56 |
-
|
| 57 |
-
# Final runtime stage
|
| 58 |
-
FROM ${BASE_IMAGE}
|
| 59 |
|
| 60 |
WORKDIR /app
|
| 61 |
|
| 62 |
-
#
|
| 63 |
-
COPY
|
| 64 |
|
| 65 |
-
#
|
| 66 |
-
COPY
|
| 67 |
|
| 68 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 69 |
ENV PATH="/app/.venv/bin:$PATH"
|
|
|
|
|
|
|
| 70 |
|
| 71 |
-
#
|
| 72 |
-
|
| 73 |
|
| 74 |
-
|
| 75 |
-
HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
|
| 76 |
CMD curl -f http://localhost:8000/health || exit 1
|
| 77 |
|
| 78 |
-
# Run the
|
| 79 |
-
#
|
| 80 |
-
|
| 81 |
-
|
|
|
|
| 1 |
+
# Dockerfile for Northflank CI/CD.
|
|
|
|
| 2 |
#
|
| 3 |
+
# Runs the OpenEnv environment server — exposes /reset, /step, /state,
|
| 4 |
+
# /schema, /ws, and the UI at /. Training connects separately via ENV_URL.
|
| 5 |
+
#
|
| 6 |
+
# No GPU required: the env server only does FHIR lookups against the
|
| 7 |
+
# baked-in cache and runs the FastAPI server.
|
|
|
|
|
|
|
|
|
|
| 8 |
|
| 9 |
+
FROM python:3.11-slim
|
|
|
|
| 10 |
|
| 11 |
+
ENV DEBIAN_FRONTEND=noninteractive
|
| 12 |
|
| 13 |
+
# ── System dependencies ────────────────────────────────────────────────────
|
| 14 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 15 |
+
curl git build-essential && \
|
| 16 |
rm -rf /var/lib/apt/lists/*
|
| 17 |
|
| 18 |
+
# ── uv ────────────────────────────────────────────────────────────────────
|
| 19 |
+
RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
|
| 20 |
+
mv /root/.local/bin/uv /usr/local/bin/uv && \
|
| 21 |
+
mv /root/.local/bin/uvx /usr/local/bin/uvx
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
|
| 23 |
WORKDIR /app
|
| 24 |
|
| 25 |
+
# ── Source code ───────────────────────────────────────────────────────────
|
| 26 |
+
COPY medagentbench_env/ ./medagentbench_env/
|
| 27 |
|
| 28 |
+
# ── MedAgentBench eval module (refsol graders) ────────────────────────────
|
| 29 |
+
COPY medagentbenchv2/medagentbench_v2/src/ ./medagentbenchv2/medagentbench_v2/src/
|
| 30 |
|
| 31 |
+
# ── Python package + deps (env server only, no train extras) ──────────────
|
| 32 |
+
RUN uv venv --python 3.11 /app/.venv && \
|
| 33 |
+
. /app/.venv/bin/activate && \
|
| 34 |
+
uv pip install -e "medagentbench_env"
|
| 35 |
+
|
| 36 |
+
# ── Runtime environment ───────────────────────────────────────────────────
|
| 37 |
ENV PATH="/app/.venv/bin:$PATH"
|
| 38 |
+
ENV PYTHONPATH="/app:$PYTHONPATH"
|
| 39 |
+
ENV ENABLE_WEB_INTERFACE=true
|
| 40 |
|
| 41 |
+
# ── Expose env server port ────────────────────────────────────────────────
|
| 42 |
+
EXPOSE 8000
|
| 43 |
|
| 44 |
+
HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \
|
|
|
|
| 45 |
CMD curl -f http://localhost:8000/health || exit 1
|
| 46 |
|
| 47 |
+
# Run the OpenEnv environment server.
|
| 48 |
+
# Training connects to this service via the ENV_URL env var.
|
| 49 |
+
CMD ["uvicorn", "medagentbench_env.server.app:app", \
|
| 50 |
+
"--host", "0.0.0.0", "--port", "8000"]
|
data/fhir_cache.json
CHANGED
|
The diff for this file is too large to render.
See raw diff
|
|
|
server/app.py
CHANGED
|
@@ -20,6 +20,7 @@ Usage:
|
|
| 20 |
|
| 21 |
import json
|
| 22 |
from pathlib import Path
|
|
|
|
| 23 |
|
| 24 |
try:
|
| 25 |
from openenv.core.env_server.http_server import create_app
|
|
@@ -36,6 +37,13 @@ from starlette.requests import Request
|
|
| 36 |
from medagentbench_env.models import MedAgentBenchAction, MedAgentBenchObservation
|
| 37 |
from .medagentbench_env_environment import MedAgentBenchEnvironment
|
| 38 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
_ROOT = Path(__file__).parent.parent
|
| 40 |
_UI_HTML = (_ROOT / "ui" / "index.html").read_text()
|
| 41 |
|
|
@@ -81,6 +89,42 @@ async def get_tasks():
|
|
| 81 |
])
|
| 82 |
|
| 83 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 84 |
@app.get("/api/baseline-results")
|
| 85 |
async def get_baseline_results():
|
| 86 |
"""Return pre-computed baseline evaluation results."""
|
|
|
|
| 20 |
|
| 21 |
import json
|
| 22 |
from pathlib import Path
|
| 23 |
+
from typing import Optional
|
| 24 |
|
| 25 |
try:
|
| 26 |
from openenv.core.env_server.http_server import create_app
|
|
|
|
| 37 |
from medagentbench_env.models import MedAgentBenchAction, MedAgentBenchObservation
|
| 38 |
from .medagentbench_env_environment import MedAgentBenchEnvironment
|
| 39 |
|
| 40 |
+
# ---------------------------------------------------------------------------
|
| 41 |
+
# Stateful UI session — one persistent environment instance shared across
|
| 42 |
+
# /api/reset and /api/step so step_count and task context survive between calls.
|
| 43 |
+
# (The built-in /reset and /step from OpenEnv create a fresh env per request.)
|
| 44 |
+
# ---------------------------------------------------------------------------
|
| 45 |
+
_ui_env: Optional[MedAgentBenchEnvironment] = None
|
| 46 |
+
|
| 47 |
_ROOT = Path(__file__).parent.parent
|
| 48 |
_UI_HTML = (_ROOT / "ui" / "index.html").read_text()
|
| 49 |
|
|
|
|
| 89 |
])
|
| 90 |
|
| 91 |
|
| 92 |
+
@app.post("/api/reset")
|
| 93 |
+
async def api_reset(request: Request):
|
| 94 |
+
"""Stateful reset for the UI — creates a persistent env instance."""
|
| 95 |
+
global _ui_env
|
| 96 |
+
body = {}
|
| 97 |
+
try:
|
| 98 |
+
body = await request.json()
|
| 99 |
+
except Exception:
|
| 100 |
+
pass
|
| 101 |
+
task_index = body.get("task_index", 0)
|
| 102 |
+
_ui_env = MedAgentBenchEnvironment()
|
| 103 |
+
obs = _ui_env.reset(task_index=task_index)
|
| 104 |
+
obs_dict = obs.model_dump(exclude={"reward", "done", "metadata"})
|
| 105 |
+
return JSONResponse({"observation": obs_dict, "reward": obs.reward, "done": obs.done})
|
| 106 |
+
|
| 107 |
+
|
| 108 |
+
@app.post("/api/step")
|
| 109 |
+
async def api_step(request: Request):
|
| 110 |
+
"""Stateful step for the UI — uses the same env instance across calls."""
|
| 111 |
+
global _ui_env
|
| 112 |
+
if _ui_env is None:
|
| 113 |
+
raise HTTPException(status_code=400, detail="No active session. Call /api/reset first.")
|
| 114 |
+
body = {}
|
| 115 |
+
try:
|
| 116 |
+
body = await request.json()
|
| 117 |
+
except Exception:
|
| 118 |
+
pass
|
| 119 |
+
try:
|
| 120 |
+
action = MedAgentBenchAction.model_validate(body.get("action", {}))
|
| 121 |
+
except Exception as e:
|
| 122 |
+
raise HTTPException(status_code=422, detail=str(e))
|
| 123 |
+
obs = _ui_env.step(action)
|
| 124 |
+
obs_dict = obs.model_dump(exclude={"reward", "done", "metadata"})
|
| 125 |
+
return JSONResponse({"observation": obs_dict, "reward": obs.reward, "done": obs.done})
|
| 126 |
+
|
| 127 |
+
|
| 128 |
@app.get("/api/baseline-results")
|
| 129 |
async def get_baseline_results():
|
| 130 |
"""Return pre-computed baseline evaluation results."""
|
ui/index.html
CHANGED
|
@@ -640,7 +640,7 @@ async function startSession() {
|
|
| 640 |
|
| 641 |
// Call /reset
|
| 642 |
try {
|
| 643 |
-
const r = await fetch('/reset', {
|
| 644 |
method: 'POST',
|
| 645 |
headers: {'Content-Type':'application/json'},
|
| 646 |
body: JSON.stringify({task_index: selectedTask.index})
|
|
@@ -761,7 +761,7 @@ async function sendAction() {
|
|
| 761 |
document.getElementById('send-btn').disabled = true;
|
| 762 |
|
| 763 |
try {
|
| 764 |
-
const r = await fetch('/step', {
|
| 765 |
method: 'POST',
|
| 766 |
headers: {'Content-Type':'application/json'},
|
| 767 |
body: JSON.stringify({
|
|
@@ -807,7 +807,7 @@ function buildQuickButtons() {
|
|
| 807 |
|
| 808 |
// Task-specific GET shortcuts
|
| 809 |
if (type === 'task10') {
|
| 810 |
-
gets.splice(2, 0, { label: '🩸 A1C (4548-4)', path: `Observation?patient=${mrn}&code=
|
| 811 |
}
|
| 812 |
if (type === 'task3') {
|
| 813 |
gets.splice(2, 0, { label: '💓 Vital Signs', path: `Observation?patient=${mrn}&category=vital-signs&_sort=-date`, resource: 'Observation' });
|
|
@@ -918,19 +918,32 @@ function appendAgentAction(type, url, body, answer, raw) {
|
|
| 918 |
|
| 919 |
function appendFhirResponse(text) {
|
| 920 |
const id = `resp-${traceSteps.length}`;
|
| 921 |
-
let parsed = null, summary = '';
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 922 |
try {
|
| 923 |
-
parsed = JSON.parse(
|
| 924 |
const total = parsed?.total ?? parsed?.entry?.length;
|
| 925 |
const rtype = parsed?.resourceType;
|
| 926 |
if (rtype === 'Bundle') {
|
| 927 |
-
|
|
|
|
| 928 |
} else if (rtype) {
|
| 929 |
summary = `${rtype}`;
|
| 930 |
}
|
| 931 |
} catch {}
|
| 932 |
|
| 933 |
-
const prettyText = parsed ? JSON.stringify(parsed, null, 2) :
|
| 934 |
const shortText = prettyText.length > 2000 ? prettyText.substring(0, 2000) + '\n… (truncated)' : prettyText;
|
| 935 |
|
| 936 |
const div = document.createElement('div');
|
|
|
|
| 640 |
|
| 641 |
// Call /reset
|
| 642 |
try {
|
| 643 |
+
const r = await fetch('/api/reset', {
|
| 644 |
method: 'POST',
|
| 645 |
headers: {'Content-Type':'application/json'},
|
| 646 |
body: JSON.stringify({task_index: selectedTask.index})
|
|
|
|
| 761 |
document.getElementById('send-btn').disabled = true;
|
| 762 |
|
| 763 |
try {
|
| 764 |
+
const r = await fetch('/api/step', {
|
| 765 |
method: 'POST',
|
| 766 |
headers: {'Content-Type':'application/json'},
|
| 767 |
body: JSON.stringify({
|
|
|
|
| 807 |
|
| 808 |
// Task-specific GET shortcuts
|
| 809 |
if (type === 'task10') {
|
| 810 |
+
gets.splice(2, 0, { label: '🩸 A1C (A1C/4548-4)', path: `Observation?patient=${mrn}&code=A1C&_count=5000`, resource: 'Observation' });
|
| 811 |
}
|
| 812 |
if (type === 'task3') {
|
| 813 |
gets.splice(2, 0, { label: '💓 Vital Signs', path: `Observation?patient=${mrn}&category=vital-signs&_sort=-date`, resource: 'Observation' });
|
|
|
|
| 918 |
|
| 919 |
function appendFhirResponse(text) {
|
| 920 |
const id = `resp-${traceSteps.length}`;
|
| 921 |
+
let parsed = null, summary = '', displayText = text;
|
| 922 |
+
|
| 923 |
+
// The env wraps FHIR JSON as: "Here is the response from the GET request:\n{JSON}. Please call FINISH..."
|
| 924 |
+
// Extract the embedded JSON so we can parse and pretty-print it.
|
| 925 |
+
const prefix = 'Here is the response from the GET request:\n';
|
| 926 |
+
const suffix = '. Please call FINISH';
|
| 927 |
+
const prefixIdx = text.indexOf(prefix);
|
| 928 |
+
if (prefixIdx !== -1) {
|
| 929 |
+
const afterPrefix = text.substring(prefixIdx + prefix.length);
|
| 930 |
+
const suffixIdx = afterPrefix.lastIndexOf(suffix);
|
| 931 |
+
displayText = suffixIdx !== -1 ? afterPrefix.substring(0, suffixIdx) : afterPrefix;
|
| 932 |
+
}
|
| 933 |
+
|
| 934 |
try {
|
| 935 |
+
parsed = JSON.parse(displayText);
|
| 936 |
const total = parsed?.total ?? parsed?.entry?.length;
|
| 937 |
const rtype = parsed?.resourceType;
|
| 938 |
if (rtype === 'Bundle') {
|
| 939 |
+
const count = parsed.entry?.length ?? 0;
|
| 940 |
+
summary = `Bundle · ${count} ${count === 1 ? 'entry' : 'entries'}${total !== undefined ? ` (total ${total})` : ''}`;
|
| 941 |
} else if (rtype) {
|
| 942 |
summary = `${rtype}`;
|
| 943 |
}
|
| 944 |
} catch {}
|
| 945 |
|
| 946 |
+
const prettyText = parsed ? JSON.stringify(parsed, null, 2) : displayText;
|
| 947 |
const shortText = prettyText.length > 2000 ? prettyText.substring(0, 2000) + '\n… (truncated)' : prettyText;
|
| 948 |
|
| 949 |
const div = document.createElement('div');
|