Spaces:

AdithyaSK
/

opencode-env-rollout

Sleeping

App Files Files Community

AdithyaSK HF Staff commited on Apr 20

Commit

1a3a8ee

verified ·

1 Parent(s): d81f3f0

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

server/app.py +20 -11
server/opencode_environment.py +75 -83

server/app.py CHANGED Viewed

@@ -75,20 +75,29 @@ app = create_app(
 def _find_active_environment(request):
-    """Locate the currently-active OpenCodeEnvironment instance for a request.
-    ``create_app`` keeps per-session envs behind the scenes; for the SSE
-    endpoint we just grab the most recent one (single-worker Space), so
-    we poke at ``app.state.env_cache`` and fall back to ``web_manager``.
     """
-    cache = getattr(app.state, "env_cache", None)
-    if cache:
-        try:
-            return next(iter(cache.values()))
-        except StopIteration:
-            pass
     try:
-        return _web_manager.get_environment()  # type: ignore[name-defined]
     except Exception:
         return None

 def _find_active_environment(request):
+    """Locate a currently-active OpenCodeEnvironment instance.
+    ``create_app`` stores per-session envs internally; we don't have a
+    public accessor, so we poke at ``app.state`` attributes that match
+    OpenEnv's conventions. As a last resort we create a fresh env —
+    fine for single-worker Spaces because registries live in-process
+    and the default env is idle until a tool is called.
     """
+    # Most recent "env" attribute on app.state that looks like ours.
+    for attr_name in ("env_cache", "envs", "environments", "_envs"):
+        cache = getattr(app.state, attr_name, None)
+        if cache:
+            try:
+                if isinstance(cache, dict):
+                    return next(iter(cache.values()))
+                if isinstance(cache, (list, tuple)):
+                    return cache[-1]
+            except Exception:
+                pass
+    # Fallback — make a new env. Safe because the SSE endpoint only
+    # needs the _registry dict, which we then look up rollout_id in.
     try:
+        return OpenCodeEnvironment()
     except Exception:
         return None

server/opencode_environment.py CHANGED Viewed

@@ -538,74 +538,7 @@ class OpenCodeEnvironment(MCPEnvironment):
         return result.model_dump_json()
-# ── Helpers ─────────────────────────────────────────────────────────────────
-def _qualify_model(provider: str, model: str) -> str:
-    """Return a ``<provider>/<model>`` string the primitive can split cleanly.
-    The primitive splits ``config.model`` on the first ``/`` to recover the
-    upstream model id. If the caller passes a model that already contains a
-    slash (e.g. ``Qwen/Qwen3.5-4B``), we still prepend the provider so the
-    split separates provider from model and the model part round-trips
-    intact (``openai_compatible/Qwen/Qwen3.5-4B`` → upstream ``Qwen/Qwen3.5-4B``).
-    """
-    # Strip an existing <provider>/ prefix only if it matches the configured
-    # provider verbatim — otherwise treat the whole string as the model id.
-    if model.startswith(provider + "/"):
-        return model
-    return f"{provider}/{model}"
-def _read_reward(sandbox: Any, reward_path: str) -> Optional[float]:
-    try:
-        raw = sandbox.read_text(reward_path).strip()
-    except Exception:
-        return None
-    if not raw:
-        return None
-    try:
-        return float(raw)
-    except ValueError:
-        return None
-def _clamp_turn(turn: dict[str, Any]) -> dict[str, Any]:
-    """Clamp per-turn payload sizes to keep responses under a reasonable cap."""
-    out = dict(turn)
-    raw_response = out.get("response") or {}
-    choices = raw_response.get("choices") or []
-    first_choice = choices[0] if choices else {}
-    compact: dict[str, Any] = {
-        "finish_reason": first_choice.get("finish_reason"),
-        "usage": raw_response.get("usage"),
-    }
-    # Surface upstream errors captured by the proxy so they reach the client.
-    if raw_response.get("upstream_error") is not None:
-        compact["upstream_error"] = raw_response["upstream_error"]
-    if raw_response.get("upstream_status") is not None:
-        compact["upstream_status"] = raw_response["upstream_status"]
-    out["response"] = compact
-    req = out.get("request") or {}
-    messages = req.get("messages") or []
-    # Keep request messages (trainer needs them) but drop very long tool schemas.
-    req = {
-        "model": req.get("model"),
-        "messages": messages,
-        "temperature": req.get("temperature"),
-        "top_p": req.get("top_p"),
-        "max_tokens": req.get("max_tokens"),
-        "max_completion_tokens": req.get("max_completion_tokens"),
-        "logprobs": req.get("logprobs"),
-        "top_logprobs": req.get("top_logprobs"),
-        "stream": req.get("stream"),
-    }
-    out["request"] = req
-    return out
-    # ── Async rollout plumbing ─────────────────────────────────────────────
     def _spawn_async_rollout(
         self,
@@ -627,7 +560,6 @@ def _clamp_turn(turn: dict[str, Any]) -> dict[str, Any]:
     ) -> _RolloutHandle:
         from opencode_env import OpenCodeTask
-        # Build the task payload up-front; staging happens on the worker.
         merged_uploads = dict(upload_files)
         if test_script:
             merged_uploads[REMOTE_TEST_PATH] = test_script
@@ -652,11 +584,8 @@ def _clamp_turn(turn: dict[str, Any]) -> dict[str, Any]:
         handle = _RolloutHandle(
             rollout_id=rollout_id,
             task_id=task_id,
-            session_factory_kwargs={
-                "config": config,
-                "mode": mode,
-                "agent_timeout_s": agent_timeout_s,
-            },
             task=task,
         )
         handle._test_script = test_script
@@ -669,12 +598,9 @@ def _clamp_turn(turn: dict[str, Any]) -> dict[str, Any]:
                     sandbox_backend=self._E2BSandboxBackend(),
                     mode=mode,
                     verifier=None,
-                    driver="serve",  # Phase 2b path
                 )
                 handle.session = factory.create(task=task)
-                # Block until the agent idles. The caller can abort via
-                # ``abort_rollout`` any time; that triggers the serve
-                # ``/abort`` endpoint and ``wait_for_completion`` returns.
                 try:
                     handle.session.wait_for_completion(timeout_s=agent_timeout_s)
                 except Exception as exc:  # noqa: BLE001
@@ -691,17 +617,17 @@ def _clamp_turn(turn: dict[str, Any]) -> dict[str, Any]:
         return handle
     def _finalize_handle(self, handle: _RolloutHandle) -> str:
-        """Run the verifier (if test_script present), collect the trace, and
-        return a JSON-serialized :class:`RolloutResult` matching the shape
-        returned by ``run_rollout``. Closes the session + sandbox."""
-        result = self._result_cls(task_id=handle.task_id, mode=handle._kwargs.get("mode", ""))
         session = handle.session
         if session is None:
             result.error = handle.error or "session never created"
             return result.model_dump_json()
         result.sandbox_id = session.sandbox.sandbox_id
-        result.exit_code = 0  # serve-driver has no exit code; use 0 unless aborted
         wall_s = (handle.finished_at or time.time()) - handle.started_at
         result.wall_s = round(wall_s, 3)
@@ -749,6 +675,72 @@ def _clamp_turn(turn: dict[str, Any]) -> dict[str, Any]:
         return result.model_dump_json()
 def _tail(events: list[dict[str, Any]], n: int) -> str:
     """Return the last ``n`` opencode event lines as a newline-joined string."""
     if not events:

         return result.model_dump_json()
+    # ── Async rollout plumbing (Phase 2b) ────────────────────────────────
     def _spawn_async_rollout(
         self,
     ) -> _RolloutHandle:
         from opencode_env import OpenCodeTask
         merged_uploads = dict(upload_files)
         if test_script:
             merged_uploads[REMOTE_TEST_PATH] = test_script
         handle = _RolloutHandle(
             rollout_id=rollout_id,
             task_id=task_id,
+            session_factory_kwargs={"config": config, "mode": mode,
+                                    "agent_timeout_s": agent_timeout_s},
             task=task,
         )
         handle._test_script = test_script
                     sandbox_backend=self._E2BSandboxBackend(),
                     mode=mode,
                     verifier=None,
+                    driver="serve",
                 )
                 handle.session = factory.create(task=task)
                 try:
                     handle.session.wait_for_completion(timeout_s=agent_timeout_s)
                 except Exception as exc:  # noqa: BLE001
         return handle
     def _finalize_handle(self, handle: _RolloutHandle) -> str:
+        """Run the verifier (if present), collect the trace + workdir, and
+        return a JSON-serialized :class:`RolloutResult`. Closes the session."""
+        result = self._result_cls(task_id=handle.task_id,
+                                  mode=handle._kwargs.get("mode", ""))
         session = handle.session
         if session is None:
             result.error = handle.error or "session never created"
             return result.model_dump_json()
         result.sandbox_id = session.sandbox.sandbox_id
+        result.exit_code = 0
         wall_s = (handle.finished_at or time.time()) - handle.started_at
         result.wall_s = round(wall_s, 3)
         return result.model_dump_json()
+# ── Helpers ─────────────────────────────────────────────────────────────────
+def _qualify_model(provider: str, model: str) -> str:
+    """Return a ``<provider>/<model>`` string the primitive can split cleanly.
+    The primitive splits ``config.model`` on the first ``/`` to recover the
+    upstream model id. If the caller passes a model that already contains a
+    slash (e.g. ``Qwen/Qwen3.5-4B``), we still prepend the provider so the
+    split separates provider from model and the model part round-trips
+    intact (``openai_compatible/Qwen/Qwen3.5-4B`` → upstream ``Qwen/Qwen3.5-4B``).
+    """
+    # Strip an existing <provider>/ prefix only if it matches the configured
+    # provider verbatim — otherwise treat the whole string as the model id.
+    if model.startswith(provider + "/"):
+        return model
+    return f"{provider}/{model}"
+def _read_reward(sandbox: Any, reward_path: str) -> Optional[float]:
+    try:
+        raw = sandbox.read_text(reward_path).strip()
+    except Exception:
+        return None
+    if not raw:
+        return None
+    try:
+        return float(raw)
+    except ValueError:
+        return None
+def _clamp_turn(turn: dict[str, Any]) -> dict[str, Any]:
+    """Clamp per-turn payload sizes to keep responses under a reasonable cap."""
+    out = dict(turn)
+    raw_response = out.get("response") or {}
+    choices = raw_response.get("choices") or []
+    first_choice = choices[0] if choices else {}
+    compact: dict[str, Any] = {
+        "finish_reason": first_choice.get("finish_reason"),
+        "usage": raw_response.get("usage"),
+    }
+    # Surface upstream errors captured by the proxy so they reach the client.
+    if raw_response.get("upstream_error") is not None:
+        compact["upstream_error"] = raw_response["upstream_error"]
+    if raw_response.get("upstream_status") is not None:
+        compact["upstream_status"] = raw_response["upstream_status"]
+    out["response"] = compact
+    req = out.get("request") or {}
+    messages = req.get("messages") or []
+    # Keep request messages (trainer needs them) but drop very long tool schemas.
+    req = {
+        "model": req.get("model"),
+        "messages": messages,
+        "temperature": req.get("temperature"),
+        "top_p": req.get("top_p"),
+        "max_tokens": req.get("max_tokens"),
+        "max_completion_tokens": req.get("max_completion_tokens"),
+        "logprobs": req.get("logprobs"),
+        "top_logprobs": req.get("top_logprobs"),
+        "stream": req.get("stream"),
+    }
+    out["request"] = req
+    return out
 def _tail(events: list[dict[str, Any]], n: int) -> str:
     """Return the last ``n`` opencode event lines as a newline-joined string."""
     if not events: