Spaces:

aim143
/

support-queue-openenv

Sleeping

App Files Files Community

eeshwar143 commited on Apr 7

Commit

3e6da75

1 Parent(s): e4accbb

Harden inference bootstrap and container startup

Browse files

Files changed (2) hide show

inference.py +52 -27
support_queue_env/client.py +88 -5

inference.py CHANGED Viewed

@@ -25,7 +25,6 @@ def log_start(task: str, env: str, model: str) -> None:
     print(f"[START] task={task} env={env} model={model}", flush=True)
 def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
     error_value = "none" if error is None else error.replace("\n", " ")
     print(
@@ -34,7 +33,6 @@ def log_step(step: int, action: str, reward: float, done: bool, error: str | Non
     )
 def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
     print(
         f"[END] success={str(success).lower()} steps={steps} score={score:.4f} rewards={json.dumps([round(r, 4) for r in rewards])}",
@@ -42,7 +40,6 @@ def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> No
     )
 def get_model_message(
     client: OpenAI,
     step: int,
@@ -72,7 +69,6 @@ def get_model_message(
         return "hello"
 def available_tasks() -> list[TaskCard]:
     return [
         TaskCard(
@@ -86,7 +82,6 @@ def available_tasks() -> list[TaskCard]:
     ]
 def heuristic_action(observation: SupportQueueObservation) -> SupportQueueAction:
     text = " ".join(
         [
@@ -193,9 +188,7 @@ def heuristic_action(observation: SupportQueueObservation) -> SupportQueueAction
     )
-async def run_task(client: OpenAI, task: TaskCard) -> dict[str, Any]:
-    env = await SupportQueueEnv.from_docker_image(LOCAL_IMAGE_NAME)
     history: List[str] = []
     rewards: List[float] = []
     steps_taken = 0
@@ -216,7 +209,13 @@ async def run_task(client: OpenAI, task: TaskCard) -> dict[str, Any]:
             _ = get_model_message(client, step, observation, last_reward, history)
             action = heuristic_action(observation)
-            result = await env.step(action)
             reward = result.reward or 0.0
             done = result.done
             error = None
@@ -237,11 +236,10 @@ async def run_task(client: OpenAI, task: TaskCard) -> dict[str, Any]:
         score = min(max(score, 0.0), 1.0)
         success = score >= SUCCESS_SCORE_THRESHOLD
     finally:
-        try:
-            await env.close()
-        except Exception as exc:
-            print(f"[DEBUG] env.close() error (container cleanup): {exc}", flush=True)
         log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
     return {
@@ -254,21 +252,48 @@ async def run_task(client: OpenAI, task: TaskCard) -> dict[str, Any]:
 async def main() -> None:
-    client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN)
-    results = []
-    for task in available_tasks():
-        results.append(await run_task(client, task))
-    aggregate = {
-        "benchmark": BENCHMARK,
-        "model": MODEL_NAME,
-        "average_score": round(sum(item["score"] for item in results) / len(results), 4) if results else 0.0,
-        "tasks": results,
-    }
-    with open("inference_results.json", "w", encoding="utf-8") as handle:
-        json.dump(aggregate, handle, indent=2)
 if __name__ == "__main__":
-    asyncio.run(main())

     print(f"[START] task={task} env={env} model={model}", flush=True)
 def log_step(step: int, action: str, reward: float, done: bool, error: str | None) -> None:
     error_value = "none" if error is None else error.replace("\n", " ")
     print(
     )
 def log_end(success: bool, steps: int, score: float, rewards: list[float]) -> None:
     print(
         f"[END] success={str(success).lower()} steps={steps} score={score:.4f} rewards={json.dumps([round(r, 4) for r in rewards])}",
     )
 def get_model_message(
     client: OpenAI,
     step: int,
         return "hello"
 def available_tasks() -> list[TaskCard]:
     return [
         TaskCard(
     ]
 def heuristic_action(observation: SupportQueueObservation) -> SupportQueueAction:
     text = " ".join(
         [
     )
+async def run_task(client: OpenAI, env: SupportQueueEnv, task: TaskCard) -> dict[str, Any]:
     history: List[str] = []
     rewards: List[float] = []
     steps_taken = 0
             _ = get_model_message(client, step, observation, last_reward, history)
             action = heuristic_action(observation)
+            try:
+                result = await env.step(action)
+            except Exception as exc:
+                action_payload = json.dumps(action.model_dump(), separators=(",", ":"), sort_keys=True)
+                log_step(step=step, action=action_payload, reward=0.0, done=True, error=str(exc))
+                break
             reward = result.reward or 0.0
             done = result.done
             error = None
         score = min(max(score, 0.0), 1.0)
         success = score >= SUCCESS_SCORE_THRESHOLD
+    except Exception as exc:
+        print(f"[DEBUG] Task {task.task_id} failed: {exc}", flush=True)
     finally:
         log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
     return {
 async def main() -> None:
+    client = OpenAI(base_url=API_BASE_URL, api_key=HF_TOKEN or "placeholder")
+    tasks = available_tasks()
+    results: list[dict[str, Any]] = []
+    env: SupportQueueEnv | None = None
+    try:
+        env = await SupportQueueEnv.from_docker_image(LOCAL_IMAGE_NAME)
+        for task in tasks:
+            results.append(await run_task(client, env, task))
+    except Exception as exc:
+        print(f"[DEBUG] Environment bootstrap failed: {exc}", flush=True)
+        for task in tasks:
+            log_start(task=task.task_id, env=BENCHMARK, model=MODEL_NAME)
+            log_end(success=False, steps=0, score=0.0, rewards=[])
+            results.append(
+                {
+                    "task_id": task.task_id,
+                    "score": 0.0,
+                    "steps": 0,
+                    "rewards": [],
+                    "success": False,
+                }
+            )
+    finally:
+        if env is not None:
+            try:
+                await env.close()
+            except Exception as exc:
+                print(f"[DEBUG] env.close() error (container cleanup): {exc}", flush=True)
+        aggregate = {
+            "benchmark": BENCHMARK,
+            "model": MODEL_NAME,
+            "average_score": round(sum(item["score"] for item in results) / len(results), 4) if results else 0.0,
+            "tasks": results,
+        }
+        with open("inference_results.json", "w", encoding="utf-8") as handle:
+            json.dump(aggregate, handle, indent=2)
 if __name__ == "__main__":
+    try:
+        asyncio.run(main())
+    except Exception as exc:
+        print(f"[DEBUG] Fatal inference error: {exc}", flush=True)

support_queue_env/client.py CHANGED Viewed

@@ -4,13 +4,22 @@ from __future__ import annotations
 import asyncio
 import os
 from typing import Any
 import requests
 from support_queue_env.models import TaskCard, SupportQueueAction, SupportQueueObservation, SupportQueueState
-DEFAULT_ENV_BASE_URL = os.getenv("ENV_BASE_URL", "http://127.0.0.1:8000")
 class _Result:
@@ -21,8 +30,9 @@ class _Result:
 class SupportQueueEnv:
-    def __init__(self, base_url: str) -> None:
         self.base_url = base_url.rstrip("/")
     @classmethod
     def from_base_url(cls, base_url: str) -> "SupportQueueEnv":
@@ -30,8 +40,80 @@ class SupportQueueEnv:
     @classmethod
     async def from_docker_image(cls, image_name: str | None = None) -> "SupportQueueEnv":
-        _ = image_name
-        return cls(base_url=DEFAULT_ENV_BASE_URL)
     def list_tasks(self) -> list[TaskCard]:
         response = requests.get(f"{self.base_url}/tasks", timeout=30)
@@ -67,4 +149,5 @@ class SupportQueueEnv:
         return await asyncio.to_thread(self.state_sync)
     async def close(self) -> None:
-        return None

 import asyncio
 import os
+import socket
+import subprocess
+import time
 from typing import Any
 import requests
 from support_queue_env.models import TaskCard, SupportQueueAction, SupportQueueObservation, SupportQueueState
+DEFAULT_ENV_BASE_URL = os.getenv("ENV_BASE_URL")
+DEFAULT_IMAGE_CANDIDATES = [
+    "support-queue-openenv:latest",
+    "support-queue-openenv",
+    "support_queue_env:latest",
+    "support_queue_env",
+]
 class _Result:
 class SupportQueueEnv:
+    def __init__(self, base_url: str, container_id: str | None = None) -> None:
         self.base_url = base_url.rstrip("/")
+        self.container_id = container_id
     @classmethod
     def from_base_url(cls, base_url: str) -> "SupportQueueEnv":
     @classmethod
     async def from_docker_image(cls, image_name: str | None = None) -> "SupportQueueEnv":
+        if DEFAULT_ENV_BASE_URL:
+            return cls(base_url=DEFAULT_ENV_BASE_URL)
+        return await asyncio.to_thread(cls._spawn_local_container, image_name)
+    @classmethod
+    def _spawn_local_container(cls, image_name: str | None) -> "SupportQueueEnv":
+        chosen_image = cls._resolve_image_name(image_name)
+        port = cls._pick_free_port()
+        container_id = cls._run(["docker", "run", "-d", "-p", f"{port}:8000", chosen_image]).strip()
+        base_url = f"http://127.0.0.1:{port}"
+        try:
+            cls._wait_until_ready(base_url)
+        except Exception:
+            cls._safe_remove_container(container_id)
+            raise
+        return cls(base_url=base_url, container_id=container_id)
+    @classmethod
+    def _resolve_image_name(cls, image_name: str | None) -> str:
+        candidates: list[str] = []
+        if image_name:
+            candidates.append(image_name)
+        candidates.extend(DEFAULT_IMAGE_CANDIDATES)
+        for candidate in candidates:
+            if cls._image_exists(candidate):
+                return candidate
+        build_tag = image_name or "support-queue-openenv:local"
+        cls._run(["docker", "build", "-t", build_tag, "."])
+        return build_tag
+    @staticmethod
+    def _image_exists(image_name: str) -> bool:
+        try:
+            SupportQueueEnv._run(["docker", "image", "inspect", image_name])
+            return True
+        except RuntimeError:
+            return False
+    @staticmethod
+    def _pick_free_port() -> int:
+        with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as sock:
+            sock.bind(("127.0.0.1", 0))
+            return int(sock.getsockname()[1])
+    @staticmethod
+    def _wait_until_ready(base_url: str, timeout_seconds: int = 45) -> None:
+        deadline = time.time() + timeout_seconds
+        last_error = ""
+        while time.time() < deadline:
+            try:
+                response = requests.get(f"{base_url}/health", timeout=3)
+                if response.ok:
+                    return
+            except Exception as exc:
+                last_error = str(exc)
+            time.sleep(1)
+        raise RuntimeError(f"Environment did not become ready at {base_url}: {last_error}")
+    @staticmethod
+    def _run(command: list[str]) -> str:
+        result = subprocess.run(command, check=False, capture_output=True, text=True)
+        if result.returncode != 0:
+            raise RuntimeError((result.stderr or result.stdout).strip() or f"Command failed: {' '.join(command)}")
+        return result.stdout
+    @staticmethod
+    def _safe_remove_container(container_id: str) -> None:
+        subprocess.run(["docker", "rm", "-f", container_id], check=False, capture_output=True, text=True)
     def list_tasks(self) -> list[TaskCard]:
         response = requests.get(f"{self.base_url}/tasks", timeout=30)
         return await asyncio.to_thread(self.state_sync)
     async def close(self) -> None:
+        if self.container_id:
+            await asyncio.to_thread(self._safe_remove_container, self.container_id)