ananya173147 commited on
Commit
6d63ca6
·
1 Parent(s): 877b3d1

Fix stateful UI session, enrich FHIR cache, fix Dockerfile

Browse files
Files changed (4) hide show
  1. Dockerfile +34 -65
  2. data/fhir_cache.json +0 -0
  3. server/app.py +44 -0
  4. ui/index.html +20 -7
Dockerfile CHANGED
@@ -1,81 +1,50 @@
1
- # Copyright (c) Meta Platforms, Inc. and affiliates.
2
- # All rights reserved.
3
  #
4
- # This source code is licensed under the BSD-style license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- # Multi-stage build using openenv-base
8
- # This Dockerfile is flexible and works for both:
9
- # - In-repo environments (with local OpenEnv sources)
10
- # - Standalone environments (with openenv from PyPI/Git)
11
- # The build script (openenv build) handles context detection and sets appropriate build args.
12
 
13
- ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
14
- FROM ${BASE_IMAGE} AS builder
15
 
16
- WORKDIR /app
17
 
18
- # Ensure git is available (required for installing dependencies from VCS)
19
- RUN apt-get update && \
20
- apt-get install -y --no-install-recommends git && \
21
  rm -rf /var/lib/apt/lists/*
22
 
23
- # Build argument to control whether we're building standalone or in-repo
24
- ARG BUILD_MODE=in-repo
25
- ARG ENV_NAME=medagentbench_env
26
-
27
- # Copy environment code (always at root of build context)
28
- COPY . /app/env
29
-
30
- # For in-repo builds, openenv is already vendored in the build context
31
- # For standalone builds, openenv will be installed via pyproject.toml
32
- WORKDIR /app/env
33
-
34
- # Ensure uv is available (for local builds where base image lacks it)
35
- RUN if ! command -v uv >/dev/null 2>&1; then \
36
- curl -LsSf https://astral.sh/uv/install.sh | sh && \
37
- mv /root/.local/bin/uv /usr/local/bin/uv && \
38
- mv /root/.local/bin/uvx /usr/local/bin/uvx; \
39
- fi
40
-
41
- # Install dependencies using uv sync
42
- # If uv.lock exists, use it; otherwise resolve on the fly
43
- RUN --mount=type=cache,target=/root/.cache/uv \
44
- if [ -f uv.lock ]; then \
45
- uv sync --frozen --no-install-project --no-editable; \
46
- else \
47
- uv sync --no-install-project --no-editable; \
48
- fi
49
-
50
- RUN --mount=type=cache,target=/root/.cache/uv \
51
- if [ -f uv.lock ]; then \
52
- uv sync --frozen --no-editable; \
53
- else \
54
- uv sync --no-editable; \
55
- fi
56
-
57
- # Final runtime stage
58
- FROM ${BASE_IMAGE}
59
 
60
  WORKDIR /app
61
 
62
- # Copy the virtual environment from builder
63
- COPY --from=builder /app/env/.venv /app/.venv
64
 
65
- # Copy the environment code
66
- COPY --from=builder /app/env /app/env
67
 
68
- # Set PATH to use the virtual environment
 
 
 
 
 
69
  ENV PATH="/app/.venv/bin:$PATH"
 
 
70
 
71
- # Set PYTHONPATH so imports work correctly
72
- ENV PYTHONPATH="/app/env:$PYTHONPATH"
73
 
74
- # Health check
75
- HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \
76
  CMD curl -f http://localhost:8000/health || exit 1
77
 
78
- # Run the FastAPI server
79
- # The module path is constructed to work with the /app/env structure
80
- ENV ENABLE_WEB_INTERFACE=true
81
- CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]
 
1
+ # Dockerfile for Northflank CI/CD.
 
2
  #
3
+ # Runs the OpenEnv environment server exposes /reset, /step, /state,
4
+ # /schema, /ws, and the UI at /. Training connects separately via ENV_URL.
5
+ #
6
+ # No GPU required: the env server only does FHIR lookups against the
7
+ # baked-in cache and runs the FastAPI server.
 
 
 
8
 
9
+ FROM python:3.11-slim
 
10
 
11
+ ENV DEBIAN_FRONTEND=noninteractive
12
 
13
+ # ── System dependencies ────────────────────────────────────────────────────
14
+ RUN apt-get update && apt-get install -y --no-install-recommends \
15
+ curl git build-essential && \
16
  rm -rf /var/lib/apt/lists/*
17
 
18
+ # ── uv ────────────────────────────────────────────────────────────────────
19
+ RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \
20
+ mv /root/.local/bin/uv /usr/local/bin/uv && \
21
+ mv /root/.local/bin/uvx /usr/local/bin/uvx
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
  WORKDIR /app
24
 
25
+ # ── Source code ───────────────────────────────────────────────────────────
26
+ COPY medagentbench_env/ ./medagentbench_env/
27
 
28
+ # ── MedAgentBench eval module (refsol graders) ────────────────────────────
29
+ COPY medagentbenchv2/medagentbench_v2/src/ ./medagentbenchv2/medagentbench_v2/src/
30
 
31
+ # ── Python package + deps (env server only, no train extras) ──────────────
32
+ RUN uv venv --python 3.11 /app/.venv && \
33
+ . /app/.venv/bin/activate && \
34
+ uv pip install -e "medagentbench_env"
35
+
36
+ # ── Runtime environment ───────────────────────────────────────────────────
37
  ENV PATH="/app/.venv/bin:$PATH"
38
+ ENV PYTHONPATH="/app:$PYTHONPATH"
39
+ ENV ENABLE_WEB_INTERFACE=true
40
 
41
+ # ── Expose env server port ────────────────────────────────────────────────
42
+ EXPOSE 8000
43
 
44
+ HEALTHCHECK --interval=30s --timeout=3s --start-period=15s --retries=3 \
 
45
  CMD curl -f http://localhost:8000/health || exit 1
46
 
47
+ # Run the OpenEnv environment server.
48
+ # Training connects to this service via the ENV_URL env var.
49
+ CMD ["uvicorn", "medagentbench_env.server.app:app", \
50
+ "--host", "0.0.0.0", "--port", "8000"]
data/fhir_cache.json CHANGED
The diff for this file is too large to render. See raw diff
 
server/app.py CHANGED
@@ -20,6 +20,7 @@ Usage:
20
 
21
  import json
22
  from pathlib import Path
 
23
 
24
  try:
25
  from openenv.core.env_server.http_server import create_app
@@ -36,6 +37,13 @@ from starlette.requests import Request
36
  from medagentbench_env.models import MedAgentBenchAction, MedAgentBenchObservation
37
  from .medagentbench_env_environment import MedAgentBenchEnvironment
38
 
 
 
 
 
 
 
 
39
  _ROOT = Path(__file__).parent.parent
40
  _UI_HTML = (_ROOT / "ui" / "index.html").read_text()
41
 
@@ -81,6 +89,42 @@ async def get_tasks():
81
  ])
82
 
83
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
84
  @app.get("/api/baseline-results")
85
  async def get_baseline_results():
86
  """Return pre-computed baseline evaluation results."""
 
20
 
21
  import json
22
  from pathlib import Path
23
+ from typing import Optional
24
 
25
  try:
26
  from openenv.core.env_server.http_server import create_app
 
37
  from medagentbench_env.models import MedAgentBenchAction, MedAgentBenchObservation
38
  from .medagentbench_env_environment import MedAgentBenchEnvironment
39
 
40
+ # ---------------------------------------------------------------------------
41
+ # Stateful UI session — one persistent environment instance shared across
42
+ # /api/reset and /api/step so step_count and task context survive between calls.
43
+ # (The built-in /reset and /step from OpenEnv create a fresh env per request.)
44
+ # ---------------------------------------------------------------------------
45
+ _ui_env: Optional[MedAgentBenchEnvironment] = None
46
+
47
  _ROOT = Path(__file__).parent.parent
48
  _UI_HTML = (_ROOT / "ui" / "index.html").read_text()
49
 
 
89
  ])
90
 
91
 
92
+ @app.post("/api/reset")
93
+ async def api_reset(request: Request):
94
+ """Stateful reset for the UI — creates a persistent env instance."""
95
+ global _ui_env
96
+ body = {}
97
+ try:
98
+ body = await request.json()
99
+ except Exception:
100
+ pass
101
+ task_index = body.get("task_index", 0)
102
+ _ui_env = MedAgentBenchEnvironment()
103
+ obs = _ui_env.reset(task_index=task_index)
104
+ obs_dict = obs.model_dump(exclude={"reward", "done", "metadata"})
105
+ return JSONResponse({"observation": obs_dict, "reward": obs.reward, "done": obs.done})
106
+
107
+
108
+ @app.post("/api/step")
109
+ async def api_step(request: Request):
110
+ """Stateful step for the UI — uses the same env instance across calls."""
111
+ global _ui_env
112
+ if _ui_env is None:
113
+ raise HTTPException(status_code=400, detail="No active session. Call /api/reset first.")
114
+ body = {}
115
+ try:
116
+ body = await request.json()
117
+ except Exception:
118
+ pass
119
+ try:
120
+ action = MedAgentBenchAction.model_validate(body.get("action", {}))
121
+ except Exception as e:
122
+ raise HTTPException(status_code=422, detail=str(e))
123
+ obs = _ui_env.step(action)
124
+ obs_dict = obs.model_dump(exclude={"reward", "done", "metadata"})
125
+ return JSONResponse({"observation": obs_dict, "reward": obs.reward, "done": obs.done})
126
+
127
+
128
  @app.get("/api/baseline-results")
129
  async def get_baseline_results():
130
  """Return pre-computed baseline evaluation results."""
ui/index.html CHANGED
@@ -640,7 +640,7 @@ async function startSession() {
640
 
641
  // Call /reset
642
  try {
643
- const r = await fetch('/reset', {
644
  method: 'POST',
645
  headers: {'Content-Type':'application/json'},
646
  body: JSON.stringify({task_index: selectedTask.index})
@@ -761,7 +761,7 @@ async function sendAction() {
761
  document.getElementById('send-btn').disabled = true;
762
 
763
  try {
764
- const r = await fetch('/step', {
765
  method: 'POST',
766
  headers: {'Content-Type':'application/json'},
767
  body: JSON.stringify({
@@ -807,7 +807,7 @@ function buildQuickButtons() {
807
 
808
  // Task-specific GET shortcuts
809
  if (type === 'task10') {
810
- gets.splice(2, 0, { label: '🩸 A1C (4548-4)', path: `Observation?patient=${mrn}&code=4548-4&_sort=-date`, resource: 'Observation' });
811
  }
812
  if (type === 'task3') {
813
  gets.splice(2, 0, { label: '💓 Vital Signs', path: `Observation?patient=${mrn}&category=vital-signs&_sort=-date`, resource: 'Observation' });
@@ -918,19 +918,32 @@ function appendAgentAction(type, url, body, answer, raw) {
918
 
919
  function appendFhirResponse(text) {
920
  const id = `resp-${traceSteps.length}`;
921
- let parsed = null, summary = '';
 
 
 
 
 
 
 
 
 
 
 
 
922
  try {
923
- parsed = JSON.parse(text);
924
  const total = parsed?.total ?? parsed?.entry?.length;
925
  const rtype = parsed?.resourceType;
926
  if (rtype === 'Bundle') {
927
- summary = `Bundle · ${parsed.entry?.length ?? 0} entries${total !== undefined ? ` (total ${total})` : ''}`;
 
928
  } else if (rtype) {
929
  summary = `${rtype}`;
930
  }
931
  } catch {}
932
 
933
- const prettyText = parsed ? JSON.stringify(parsed, null, 2) : text;
934
  const shortText = prettyText.length > 2000 ? prettyText.substring(0, 2000) + '\n… (truncated)' : prettyText;
935
 
936
  const div = document.createElement('div');
 
640
 
641
  // Call /reset
642
  try {
643
+ const r = await fetch('/api/reset', {
644
  method: 'POST',
645
  headers: {'Content-Type':'application/json'},
646
  body: JSON.stringify({task_index: selectedTask.index})
 
761
  document.getElementById('send-btn').disabled = true;
762
 
763
  try {
764
+ const r = await fetch('/api/step', {
765
  method: 'POST',
766
  headers: {'Content-Type':'application/json'},
767
  body: JSON.stringify({
 
807
 
808
  // Task-specific GET shortcuts
809
  if (type === 'task10') {
810
+ gets.splice(2, 0, { label: '🩸 A1C (A1C/4548-4)', path: `Observation?patient=${mrn}&code=A1C&_count=5000`, resource: 'Observation' });
811
  }
812
  if (type === 'task3') {
813
  gets.splice(2, 0, { label: '💓 Vital Signs', path: `Observation?patient=${mrn}&category=vital-signs&_sort=-date`, resource: 'Observation' });
 
918
 
919
  function appendFhirResponse(text) {
920
  const id = `resp-${traceSteps.length}`;
921
+ let parsed = null, summary = '', displayText = text;
922
+
923
+ // The env wraps FHIR JSON as: "Here is the response from the GET request:\n{JSON}. Please call FINISH..."
924
+ // Extract the embedded JSON so we can parse and pretty-print it.
925
+ const prefix = 'Here is the response from the GET request:\n';
926
+ const suffix = '. Please call FINISH';
927
+ const prefixIdx = text.indexOf(prefix);
928
+ if (prefixIdx !== -1) {
929
+ const afterPrefix = text.substring(prefixIdx + prefix.length);
930
+ const suffixIdx = afterPrefix.lastIndexOf(suffix);
931
+ displayText = suffixIdx !== -1 ? afterPrefix.substring(0, suffixIdx) : afterPrefix;
932
+ }
933
+
934
  try {
935
+ parsed = JSON.parse(displayText);
936
  const total = parsed?.total ?? parsed?.entry?.length;
937
  const rtype = parsed?.resourceType;
938
  if (rtype === 'Bundle') {
939
+ const count = parsed.entry?.length ?? 0;
940
+ summary = `Bundle · ${count} ${count === 1 ? 'entry' : 'entries'}${total !== undefined ? ` (total ${total})` : ''}`;
941
  } else if (rtype) {
942
  summary = `${rtype}`;
943
  }
944
  } catch {}
945
 
946
+ const prettyText = parsed ? JSON.stringify(parsed, null, 2) : displayText;
947
  const shortText = prettyText.length > 2000 ? prettyText.substring(0, 2000) + '\n… (truncated)' : prettyText;
948
 
949
  const div = document.createElement('div');