Spaces:

PYAE1994
/

openhands-backend

Sleeping

App Files Files Community

PYAE1994 commited on 17 days ago

Commit

46258b3

verified ·

1 Parent(s): fe7add9

Phase 1 backend deploy

Browse files

Files changed (12) hide show

.env.example +17 -0
Dockerfile +37 -0
README.md +36 -4
__init__.py +2 -0
agent.py +214 -0
app.py +242 -0
executor.py +230 -0
intent.py +141 -0
llm_router.py +399 -0
requirements.txt +6 -0
tests/__init__.py +0 -0
tests/test_smoke.py +81 -0

.env.example ADDED Viewed

	@@ -0,0 +1,17 @@

+# Copy to .env for local dev. DO NOT COMMIT real keys.
+# LLM providers (comma-separated keys, at least one provider required)
+GEMINI_KEY=
+SAMBANOVA_KEY=
+GITHUB_KEY=
+# E2B (required for execution endpoints)
+E2B_API_KEY=
+# E2B_TEMPLATE=
+# CORS
+ALLOWED_ORIGINS=*
+# Logging
+LOG_LEVEL=INFO
+PORT=7860

Dockerfile ADDED Viewed

	@@ -0,0 +1,37 @@

+# HF Space Dockerfile for OpenHands Simplified Backend (Phase 1)
+# Runs FastAPI on port 7860 (HF Space default).
+FROM python:3.11-slim
+ENV PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    PIP_NO_CACHE_DIR=1 \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PORT=7860 \
+    HOME=/home/user
+# Non-root user expected by HF Spaces
+RUN useradd -m -u 1000 user
+WORKDIR /home/user/app
+# System deps (minimal)
+RUN apt-get update && apt-get install -y --no-install-recommends \
+        curl ca-certificates \
+    && rm -rf /var/lib/apt/lists/*
+# Python deps
+COPY --chown=user:user requirements.txt /home/user/app/requirements.txt
+RUN pip install --no-cache-dir -r /home/user/app/requirements.txt
+# App code
+COPY --chown=user:user . /home/user/app/backend
+# Make package importable as `backend`
+ENV PYTHONPATH=/home/user/app
+USER user
+EXPOSE 7860
+HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
+    CMD curl -fsS http://localhost:7860/health || exit 1
+CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "info"]

README.md CHANGED Viewed

@@ -1,10 +1,42 @@
 ---
-title: Openhands Backend
-emoji: 📚
 colorFrom: indigo
-colorTo: pink
 sdk: docker
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: OpenHands Simplified Backend
+emoji: 🤖
 colorFrom: indigo
+colorTo: purple
 sdk: docker
+app_port: 7860
 pinned: false
 ---
+# OpenHands Simplified Backend (Phase 1)
+FastAPI service that exposes:
+- `GET /health` — liveness + provider/key status
+- `POST /api/chat` — non-streaming chat
+- `POST /api/chat/stream` — SSE; auto-routes chat vs E2B execution
+- `POST /api/execute` — SSE; always runs in E2B sandbox
+- `POST /api/intent` — intent classifier only
+## Environment variables (set in HF Space → Settings → Secrets)
+| Name              | Required | Notes                                  |
+|-------------------|----------|----------------------------------------|
+| `GEMINI_KEY`      | optional | comma-separated Gemini API keys        |
+| `SAMBANOVA_KEY`   | optional | comma-separated SambaNova API keys     |
+| `GITHUB_KEY`      | optional | comma-separated GitHub Models tokens   |
+| `E2B_API_KEY`     | **yes**  | required for execution endpoints       |
+| `E2B_TEMPLATE`    | optional | custom sandbox template id             |
+| `ALLOWED_ORIGINS` | optional | CSV; defaults to `*`                   |
+| `LOG_LEVEL`       | optional | default `INFO`                         |
+At least ONE LLM provider key must be set.
+## Test (after deploy)
+```bash
+curl https://<your-space>.hf.space/health
+curl -N -X POST https://<your-space>.hf.space/api/execute \
+  -H 'Content-Type: application/json' \
+  -d '{"message":"Create proof.txt with the current UNIX timestamp and print its contents."}'
+```

__init__.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ """Simplified OpenHands backend (Phase 1)."""
2	+ __version__ = "0.1.0"

agent.py ADDED Viewed

	@@ -0,0 +1,214 @@

+"""
+Agent loop: orchestrates LLM <-> E2B sandbox for execution tasks.
+Phase 1 design — kept deliberately simple and robust:
+  1. Ask LLM to produce a SINGLE python code block (and optional shell block)
+     to satisfy the user's request, given recent context.
+  2. Extract the code block(s).
+  3. Run them in a fresh E2B sandbox, streaming stdout/stderr to the caller.
+  4. Show the LLM the real output and ask for a final natural-language reply.
+  5. Stream that reply.
+  6. Close the sandbox.
+Anything more elaborate (multi-step planner, tool-calling, retry-on-error) is
+intentionally OUT of Phase 1.
+"""
+from __future__ import annotations
+import json
+import logging
+import re
+from dataclasses import dataclass
+from typing import AsyncIterator, Dict, List, Optional
+from . import llm_router
+from .executor import E2BExecutor, ExecEvent
+logger = logging.getLogger(__name__)
+# ----------------------------------------------------------------------------
+# Prompts
+# ----------------------------------------------------------------------------
+CODER_SYSTEM = """You are a code executor agent running inside a real Linux
+sandbox (E2B). The user will ask you to do something that requires running
+real code. Reply with ONE single fenced code block — Python preferred — that,
+when executed, accomplishes the task.
+Strict rules:
+- Output ONLY the code block. No prose before or after.
+- Prefer Python. Use ```python fences.
+- If the task is shell-only (mkdir, ls, install a package), you may use one
+  ```bash block instead.
+- Print clear progress messages so the user can see what happened.
+- Always print a final confirmation line.
+- Keep total output under ~200 lines.
+"""
+REPLY_SYSTEM = """You are a helpful assistant. The user asked for a task that
+required running real code. Below is the user's request, the code that ran,
+and the REAL execution output. Write a short, friendly natural-language reply
+summarising what was done and quoting any important values from the real
+output. Do NOT fabricate. Do NOT re-run anything. Keep it concise (3-6
+sentences)."""
+CHAT_SYSTEM = """You are a concise, helpful assistant. Reply in the same
+language as the user when natural. Keep answers focused."""
+# ----------------------------------------------------------------------------
+# Code extraction
+# ----------------------------------------------------------------------------
+_FENCE_RE = re.compile(
+    r"```([a-zA-Z0-9_+\-]*)\s*\n(.*?)```", re.DOTALL
+)
+@dataclass
+class CodeBlock:
+    language: str
+    code: str
+def extract_code_blocks(text: str) -> List[CodeBlock]:
+    blocks: List[CodeBlock] = []
+    for m in _FENCE_RE.finditer(text or ""):
+        lang = (m.group(1) or "").lower().strip()
+        code = m.group(2).rstrip()
+        blocks.append(CodeBlock(language=lang or "python", code=code))
+    return blocks
+def pick_runnable(blocks: List[CodeBlock]) -> Optional[CodeBlock]:
+    # Prefer python; else bash/sh; else first
+    for b in blocks:
+        if b.language in ("python", "py"):
+            return b
+    for b in blocks:
+        if b.language in ("bash", "sh", "shell"):
+            return b
+    return blocks[0] if blocks else None
+# ----------------------------------------------------------------------------
+# Streaming agent
+# ----------------------------------------------------------------------------
+async def stream_chat_only(
+    messages: List[Dict[str, str]],
+) -> AsyncIterator[Dict]:
+    """Plain chat: no sandbox."""
+    full_messages = [{"role": "system", "content": CHAT_SYSTEM}, *messages]
+    yield {"type": "phase", "phase": "chat"}
+    async for chunk in llm_router.stream_complete(full_messages, temperature=0.4, max_tokens=1024):
+        if chunk["type"] == "delta":
+            yield {"type": "assistant_delta", "content": chunk["content"]}
+        elif chunk["type"] == "done":
+            yield {"type": "assistant_done", "provider": chunk.get("provider"), "model": chunk.get("model")}
+        elif chunk["type"] == "error":
+            yield {"type": "error", "error": chunk["error"]}
+async def stream_execute(
+    messages: List[Dict[str, str]],
+    *,
+    sandbox_timeout: int = 300,
+) -> AsyncIterator[Dict]:
+    """Execution task: spin up E2B, run code, reply with real results."""
+    # --- step 1: ask LLM for code -------------------------------------------
+    yield {"type": "phase", "phase": "planning"}
+    code_messages = [{"role": "system", "content": CODER_SYSTEM}, *messages]
+    try:
+        coder_resp = await llm_router.complete(code_messages, temperature=0.2, max_tokens=1500)
+    except Exception as e:
+        yield {"type": "error", "error": f"LLM failed: {e}"}
+        return
+    raw = coder_resp["content"]
+    yield {"type": "plan", "content": raw, "provider": coder_resp.get("provider")}
+    blocks = extract_code_blocks(raw)
+    chosen = pick_runnable(blocks)
+    if chosen is None:
+        # No code block → degrade to chat reply
+        yield {"type": "assistant_delta", "content": raw}
+        yield {"type": "assistant_done"}
+        return
+    yield {"type": "code", "language": chosen.language, "code": chosen.code}
+    # --- step 2: launch sandbox & run ---------------------------------------
+    yield {"type": "phase", "phase": "sandbox_starting"}
+    executor: Optional[E2BExecutor] = None
+    stdout_buf: List[str] = []
+    stderr_buf: List[str] = []
+    error_text: Optional[str] = None
+    result_text: str = ""
+    exit_code: Optional[int] = None
+    try:
+        executor = E2BExecutor(timeout=sandbox_timeout)
+        await executor.start()
+        yield {"type": "sandbox_started", "sandbox_id": executor.sandbox_id}
+        runner = (
+            executor.run_python(chosen.code)
+            if chosen.language in ("python", "py")
+            else executor.run_shell(chosen.code)
+        )
+        yield {"type": "phase", "phase": "executing"}
+        async for ev in runner:
+            if ev.type == "stdout":
+                stdout_buf.append(ev.data)
+                yield {"type": "stdout", "content": ev.data}
+            elif ev.type == "stderr":
+                stderr_buf.append(ev.data)
+                yield {"type": "stderr", "content": ev.data}
+            elif ev.type == "error":
+                error_text = ev.data
+                yield {"type": "exec_error", "content": ev.data, "meta": ev.meta}
+            elif ev.type == "result":
+                result_text = ev.data
+                exit_code = ev.meta.get("exit_code") if ev.meta else None
+                yield {"type": "exec_result", "content": ev.data, "meta": ev.meta}
+    except Exception as e:
+        logger.exception("sandbox error")
+        yield {"type": "error", "error": f"Sandbox error: {e}"}
+        if executor:
+            await executor.close()
+        return
+    finally:
+        if executor:
+            await executor.close()
+            yield {"type": "sandbox_closed"}
+    # --- step 3: ask LLM for final reply with real outputs ------------------
+    yield {"type": "phase", "phase": "summarising"}
+    user_request = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
+    summary_user = (
+        f"USER REQUEST:\n{user_request}\n\n"
+        f"CODE EXECUTED ({chosen.language}):\n```\n{chosen.code}\n```\n\n"
+        f"STDOUT:\n{''.join(stdout_buf) or '(empty)'}\n\n"
+        f"STDERR:\n{''.join(stderr_buf) or '(empty)'}\n\n"
+        f"RESULT:\n{result_text or '(none)'}\n\n"
+        f"ERROR:\n{error_text or '(none)'}\n\n"
+        f"EXIT_CODE: {exit_code}"
+    )
+    reply_messages = [
+        {"role": "system", "content": REPLY_SYSTEM},
+        {"role": "user", "content": summary_user},
+    ]
+    async for chunk in llm_router.stream_complete(reply_messages, temperature=0.4, max_tokens=600):
+        if chunk["type"] == "delta":
+            yield {"type": "assistant_delta", "content": chunk["content"]}
+        elif chunk["type"] == "done":
+            yield {"type": "assistant_done", "provider": chunk.get("provider"), "model": chunk.get("model")}
+        elif chunk["type"] == "error":
+            yield {"type": "error", "error": chunk["error"]}

app.py ADDED Viewed

	@@ -0,0 +1,242 @@

+"""
+FastAPI backend — simplified OpenHands runtime gateway.
+Endpoints:
+  GET  /                  → service info
+  GET  /health            → liveness + provider/key status
+  POST /api/chat          → JSON, non-streaming convenience endpoint
+  POST /api/chat/stream   → SSE streaming (chat OR execute, auto-routed)
+  POST /api/execute       → SSE streaming, always uses sandbox
+  POST /api/intent        → JSON, returns intent decision only
+CORS is fully open by default (intended for Vercel frontend).
+Configure with env var ALLOWED_ORIGINS (comma-separated) to lock down.
+"""
+from __future__ import annotations
+import asyncio
+import json
+import logging
+import os
+import time
+from typing import Any, Dict, List, Optional
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse, StreamingResponse
+from pydantic import BaseModel, Field
+from . import agent, intent, llm_router
+# ----------------------------------------------------------------------------
+# Logging
+# ----------------------------------------------------------------------------
+logging.basicConfig(
+    level=os.environ.get("LOG_LEVEL", "INFO"),
+    format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
+)
+logger = logging.getLogger("openhands.backend")
+# ----------------------------------------------------------------------------
+# Models
+# ----------------------------------------------------------------------------
+class ChatMessage(BaseModel):
+    role: str = Field(..., pattern="^(system|user|assistant)$")
+    content: str
+class ChatRequest(BaseModel):
+    messages: List[ChatMessage] = Field(default_factory=list)
+    message: Optional[str] = None  # convenience: single user message
+    force_sandbox: Optional[bool] = None  # override intent detection
+    sandbox_timeout: int = 300
+    def to_messages(self) -> List[Dict[str, str]]:
+        msgs = [m.dict() for m in self.messages]
+        if self.message:
+            msgs.append({"role": "user", "content": self.message})
+        if not msgs:
+            raise ValueError("at least one message is required")
+        return msgs
+class IntentRequest(BaseModel):
+    message: str
+# ----------------------------------------------------------------------------
+# App
+# ----------------------------------------------------------------------------
+app = FastAPI(
+    title="OpenHands Simplified Backend",
+    version="0.1.0",
+    description="Phase-1 backend: LLM router + E2B real execution + SSE streaming.",
+)
+_allowed = os.environ.get("ALLOWED_ORIGINS", "*").strip()
+allowed_origins = ["*"] if _allowed == "*" else [o.strip() for o in _allowed.split(",") if o.strip()]
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=allowed_origins,
+    allow_credentials=False,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# ----------------------------------------------------------------------------
+# Helpers
+# ----------------------------------------------------------------------------
+def sse_format(event: str, data: Any) -> bytes:
+    payload = data if isinstance(data, str) else json.dumps(data, ensure_ascii=False)
+    return f"event: {event}\ndata: {payload}\n\n".encode("utf-8")
+SSE_HEADERS = {
+    "Content-Type": "text/event-stream",
+    "Cache-Control": "no-cache, no-transform",
+    "Connection": "keep-alive",
+    "X-Accel-Buffering": "no",  # disable buffering on proxies that respect it
+}
+# ----------------------------------------------------------------------------
+# Routes
+# ----------------------------------------------------------------------------
+@app.get("/")
+async def root() -> Dict[str, Any]:
+    return {
+        "service": "openhands-simplified-backend",
+        "status": "ok",
+        "endpoints": [
+            "/health",
+            "/api/chat",
+            "/api/chat/stream",
+            "/api/execute",
+            "/api/intent",
+        ],
+    }
+@app.get("/health")
+async def health() -> Dict[str, Any]:
+    return {
+        "status": "ok",
+        "time": int(time.time()),
+        "providers": llm_router.pool_status(),
+        "e2b_configured": bool(os.environ.get("E2B_API_KEY")),
+    }
+@app.post("/api/intent")
+async def api_intent(req: IntentRequest) -> Dict[str, Any]:
+    decision = await intent.detect(req.message)
+    return {
+        "needs_sandbox": decision.needs_sandbox,
+        "reason": decision.reason,
+        "confidence": decision.confidence,
+    }
+@app.post("/api/chat")
+async def api_chat(req: ChatRequest) -> Dict[str, Any]:
+    """Non-streaming chat (no sandbox). Convenience for simple clients."""
+    try:
+        messages = req.to_messages()
+    except ValueError as e:
+        raise HTTPException(400, str(e))
+    # Add a light system message if not present
+    if not any(m["role"] == "system" for m in messages):
+        messages = [{"role": "system", "content": agent.CHAT_SYSTEM}, *messages]
+    result = await llm_router.complete(messages, temperature=0.4, max_tokens=1024)
+    return {
+        "content": result["content"],
+        "provider": result.get("provider"),
+        "model": result.get("model"),
+    }
+@app.post("/api/chat/stream")
+async def api_chat_stream(req: ChatRequest):
+    """SSE streaming endpoint.
+    Auto-routes between chat-only and sandbox execution based on intent
+    detection (override with `force_sandbox`).
+    """
+    try:
+        messages = req.to_messages()
+    except ValueError as e:
+        raise HTTPException(400, str(e))
+    last_user = next((m["content"] for m in reversed(messages) if m["role"] == "user"), "")
+    async def event_gen():
+        # Decide routing
+        if req.force_sandbox is True:
+            decision = intent.ExecutionIntent(True, "forced by client", 1.0)
+        elif req.force_sandbox is False:
+            decision = intent.ExecutionIntent(False, "forced by client", 1.0)
+        else:
+            decision = await intent.detect(last_user)
+        yield sse_format("intent", {
+            "needs_sandbox": decision.needs_sandbox,
+            "reason": decision.reason,
+            "confidence": decision.confidence,
+        })
+        try:
+            if decision.needs_sandbox:
+                async for ev in agent.stream_execute(messages, sandbox_timeout=req.sandbox_timeout):
+                    yield sse_format(ev["type"], ev)
+            else:
+                async for ev in agent.stream_chat_only(messages):
+                    yield sse_format(ev["type"], ev)
+        except Exception as e:
+            logger.exception("stream error")
+            yield sse_format("error", {"error": str(e)})
+        finally:
+            yield sse_format("end", {"done": True})
+    return StreamingResponse(event_gen(), headers=SSE_HEADERS)
+@app.post("/api/execute")
+async def api_execute(req: ChatRequest):
+    """SSE streaming endpoint that ALWAYS uses the sandbox."""
+    try:
+        messages = req.to_messages()
+    except ValueError as e:
+        raise HTTPException(400, str(e))
+    async def event_gen():
+        yield sse_format("intent", {"needs_sandbox": True, "reason": "explicit /execute"})
+        try:
+            async for ev in agent.stream_execute(messages, sandbox_timeout=req.sandbox_timeout):
+                yield sse_format(ev["type"], ev)
+        except Exception as e:
+            logger.exception("execute error")
+            yield sse_format("error", {"error": str(e)})
+        finally:
+            yield sse_format("end", {"done": True})
+    return StreamingResponse(event_gen(), headers=SSE_HEADERS)
+# ----------------------------------------------------------------------------
+# Local entrypoint
+# ----------------------------------------------------------------------------
+if __name__ == "__main__":
+    import uvicorn
+    port = int(os.environ.get("PORT", "7860"))
+    uvicorn.run(app, host="0.0.0.0", port=port)

executor.py ADDED Viewed

	@@ -0,0 +1,230 @@

+"""
+E2B Sandbox Executor.
+Owns the real execution runtime. Wraps the e2b_code_interpreter SDK so the
+rest of the backend never imports e2b directly.
+Capabilities (Phase 1):
+  - run_python(code): execute Python in a sandbox, stream stdout/stderr
+  - run_shell(cmd): execute shell command, stream stdout/stderr
+  - write_file(path, contents)
+  - read_file(path)
+  - close()
+A sandbox is created per task and closed at the end (Phase 1: no reuse).
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import os
+from contextlib import asynccontextmanager
+from dataclasses import dataclass, field
+from typing import AsyncIterator, Dict, List, Optional
+logger = logging.getLogger(__name__)
+# ----------------------------------------------------------------------------
+# Event types streamed back to the client
+# ----------------------------------------------------------------------------
+@dataclass
+class ExecEvent:
+    type: str          # 'sandbox_started' | 'stdout' | 'stderr' | 'result' | 'error' | 'sandbox_closed'
+    data: str = ""
+    meta: Dict = field(default_factory=dict)
+# ----------------------------------------------------------------------------
+# E2B SDK import (lazy so missing dep doesn't crash module import)
+# ----------------------------------------------------------------------------
+def _get_sandbox_class():
+    try:
+        from e2b_code_interpreter import Sandbox  # type: ignore
+        return Sandbox
+    except ImportError as e:
+        raise RuntimeError(
+            "e2b_code_interpreter not installed. Add `e2b-code-interpreter` to requirements."
+        ) from e
+# ----------------------------------------------------------------------------
+# Executor
+# ----------------------------------------------------------------------------
+class E2BExecutor:
+    """One sandbox = one E2BExecutor instance.
+    The SDK is synchronous; we offload calls to a thread to keep the event
+    loop free.
+    """
+    def __init__(self, api_key: Optional[str] = None, template: Optional[str] = None,
+                 timeout: int = 300) -> None:
+        self.api_key = api_key or os.environ.get("E2B_API_KEY", "")
+        if not self.api_key:
+            raise RuntimeError("E2B_API_KEY is not configured")
+        self.template = template or os.environ.get("E2B_TEMPLATE")  # None → default
+        self.timeout = timeout
+        self._sandbox = None  # type: ignore
+        self._lock = asyncio.Lock()
+    # ---- lifecycle ----------------------------------------------------------
+    async def start(self) -> None:
+        if self._sandbox is not None:
+            return
+        Sandbox = _get_sandbox_class()
+        def _create():
+            kwargs = {"api_key": self.api_key, "timeout": self.timeout}
+            if self.template:
+                return Sandbox(self.template, **kwargs)
+            return Sandbox(**kwargs)
+        self._sandbox = await asyncio.to_thread(_create)
+        logger.info("E2B sandbox started: id=%s", getattr(self._sandbox, "sandbox_id", "?"))
+    async def close(self) -> None:
+        if self._sandbox is None:
+            return
+        sb = self._sandbox
+        self._sandbox = None
+        try:
+            await asyncio.to_thread(sb.kill)
+        except Exception as e:
+            logger.warning("E2B close error (non-fatal): %s", e)
+    @property
+    def sandbox_id(self) -> Optional[str]:
+        return getattr(self._sandbox, "sandbox_id", None) if self._sandbox else None
+    # ---- execution ----------------------------------------------------------
+    async def run_python(self, code: str) -> AsyncIterator[ExecEvent]:
+        """Run Python code; yield streaming events."""
+        if self._sandbox is None:
+            await self.start()
+        sb = self._sandbox
+        # Queue bridging the SDK callback thread → asyncio loop
+        loop = asyncio.get_running_loop()
+        queue: asyncio.Queue[ExecEvent] = asyncio.Queue()
+        def on_stdout(msg) -> None:
+            text = getattr(msg, "line", None) or getattr(msg, "text", None) or str(msg)
+            loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stdout", text))
+        def on_stderr(msg) -> None:
+            text = getattr(msg, "line", None) or getattr(msg, "text", None) or str(msg)
+            loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stderr", text))
+        async def runner():
+            try:
+                def _exec():
+                    return sb.run_code(code, on_stdout=on_stdout, on_stderr=on_stderr)
+                execution = await asyncio.to_thread(_exec)
+                # Final result
+                result_text = ""
+                if execution is not None:
+                    err = getattr(execution, "error", None)
+                    if err is not None:
+                        loop.call_soon_threadsafe(
+                            queue.put_nowait,
+                            ExecEvent("error", f"{getattr(err, 'name', 'Error')}: {getattr(err, 'value', err)}",
+                                      {"traceback": getattr(err, "traceback", "")}),
+                        )
+                    results = getattr(execution, "results", []) or []
+                    if results:
+                        for r in results:
+                            t = getattr(r, "text", None)
+                            if t:
+                                result_text += t + "\n"
+                loop.call_soon_threadsafe(
+                    queue.put_nowait,
+                    ExecEvent("result", result_text.strip()),
+                )
+            except Exception as e:
+                loop.call_soon_threadsafe(
+                    queue.put_nowait, ExecEvent("error", str(e)),
+                )
+            finally:
+                loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("__done__"))
+        task = asyncio.create_task(runner())
+        try:
+            while True:
+                ev = await queue.get()
+                if ev.type == "__done__":
+                    break
+                yield ev
+        finally:
+            if not task.done():
+                task.cancel()
+    async def run_shell(self, cmd: str) -> AsyncIterator[ExecEvent]:
+        """Run shell command via sandbox.commands.run()."""
+        if self._sandbox is None:
+            await self.start()
+        sb = self._sandbox
+        loop = asyncio.get_running_loop()
+        queue: asyncio.Queue[ExecEvent] = asyncio.Queue()
+        def on_stdout(data) -> None:
+            loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stdout", str(data)))
+        def on_stderr(data) -> None:
+            loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stderr", str(data)))
+        async def runner():
+            try:
+                def _exec():
+                    return sb.commands.run(cmd, on_stdout=on_stdout, on_stderr=on_stderr)
+                result = await asyncio.to_thread(_exec)
+                exit_code = getattr(result, "exit_code", None)
+                loop.call_soon_threadsafe(
+                    queue.put_nowait,
+                    ExecEvent("result", "", {"exit_code": exit_code}),
+                )
+            except Exception as e:
+                loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("error", str(e)))
+            finally:
+                loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("__done__"))
+        task = asyncio.create_task(runner())
+        try:
+            while True:
+                ev = await queue.get()
+                if ev.type == "__done__":
+                    break
+                yield ev
+        finally:
+            if not task.done():
+                task.cancel()
+    async def write_file(self, path: str, contents: str) -> None:
+        if self._sandbox is None:
+            await self.start()
+        sb = self._sandbox
+        await asyncio.to_thread(sb.files.write, path, contents)
+    async def read_file(self, path: str) -> str:
+        if self._sandbox is None:
+            await self.start()
+        sb = self._sandbox
+        return await asyncio.to_thread(sb.files.read, path)
+# ----------------------------------------------------------------------------
+# Convenience context manager
+# ----------------------------------------------------------------------------
+@asynccontextmanager
+async def sandbox_session(timeout: int = 300):
+    ex = E2BExecutor(timeout=timeout)
+    try:
+        await ex.start()
+        yield ex
+    finally:
+        await ex.close()

intent.py ADDED Viewed

	@@ -0,0 +1,141 @@

+"""
+Execution intent detection.
+Decides whether a user prompt needs a real E2B sandbox or is just chat.
+Strategy:
+  1. Fast keyword/regex heuristics (free, deterministic)
+  2. If ambiguous → fall back to LLM classification (cheap)
+Output: ExecutionIntent dataclass
+"""
+from __future__ import annotations
+import json
+import logging
+import re
+from dataclasses import dataclass
+from typing import List, Optional
+from . import llm_router
+logger = logging.getLogger(__name__)
+@dataclass
+class ExecutionIntent:
+    needs_sandbox: bool
+    reason: str
+    confidence: float  # 0..1
+# ----------------------------------------------------------------------------
+# Heuristic rules
+# ----------------------------------------------------------------------------
+# Strong execution signals
+_EXEC_PATTERNS = [
+    r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)",
+    r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)",
+    r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b",
+    r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)",
+    r"\b(install|pip install|npm install|apt install)\b",
+    r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+",
+    r"```(python|bash|sh|shell|javascript|js|node)\b",
+    r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)",
+    r"\b(proof\.txt|test\.py|script\.py|main\.py)\b",
+    r"\bdebug\s+(this|my)\b",
+    r"\bbuild\s+(an?\s+)?(app|website|api|server|script)",
+]
+# Strong chat-only signals
+_CHAT_PATTERNS = [
+    r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b",
+    r"^\s*(thanks|thank you|thx|ty)\b",
+    r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$",
+    r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)",
+    r"\b(define|definition\s+of)\b",
+    r"\btell me about\b",
+    r"\bdifference between\b",
+]
+_EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS]
+_CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS]
+def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
+    """Return strong-signal intent, or None if ambiguous."""
+    p = (prompt or "").strip()
+    if not p:
+        return ExecutionIntent(False, "empty prompt", 1.0)
+    exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
+    chat_hits = sum(1 for r in _CHAT_RE if r.search(p))
+    # Triple-backtick code block always implies execution intent
+    if "```" in p and exec_hits == 0:
+        # bare code block without verb → still likely wants execution
+        if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE):
+            return ExecutionIntent(True, "code block detected", 0.85)
+    if exec_hits >= 1 and chat_hits == 0:
+        return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
+    if chat_hits >= 1 and exec_hits == 0:
+        return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
+    if exec_hits == 0 and chat_hits == 0:
+        # Very short prompts are usually chat
+        if len(p) < 30:
+            return ExecutionIntent(False, "short prompt, likely chat", 0.7)
+        return None  # ambiguous → ask LLM
+    # Mixed signals → ask LLM
+    return None
+# ----------------------------------------------------------------------------
+# LLM fallback classifier
+# ----------------------------------------------------------------------------
+_CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
+requires running real code or shell commands in a sandbox computer.
+Reply ONLY with strict JSON, no prose:
+{"needs_sandbox": true|false, "reason": "<short explanation>"}
+Rules:
+- needs_sandbox = true  when the user wants to run code, execute shell commands,
+  create/modify files, install packages, debug a running program, or otherwise
+  observe real execution results.
+- needs_sandbox = false when the user asks for explanations, greetings,
+  brainstorming, advice, or static code review with no run request.
+"""
+async def llm_detect(prompt: str) -> ExecutionIntent:
+    messages = [
+        {"role": "system", "content": _CLASSIFIER_SYSTEM},
+        {"role": "user", "content": prompt[:2000]},
+    ]
+    try:
+        result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
+        text = result["content"].strip()
+        # Tolerate models that wrap JSON in code fences
+        text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip()
+        obj = json.loads(text)
+        return ExecutionIntent(
+            needs_sandbox=bool(obj.get("needs_sandbox", False)),
+            reason=str(obj.get("reason", "llm classifier"))[:200],
+            confidence=0.75,
+        )
+    except Exception as e:
+        logger.warning("LLM intent classifier failed: %s", e)
+        # Conservative default → no sandbox
+        return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)
+async def detect(prompt: str) -> ExecutionIntent:
+    """Top-level: heuristic first, LLM fallback."""
+    h = heuristic_detect(prompt)
+    if h is not None:
+        return h
+    return await llm_detect(prompt)

llm_router.py ADDED Viewed

	@@ -0,0 +1,399 @@

+"""
+LLM Router - Multi-provider with key rotation, cooldown, and failover.
+Providers:
+  - gemini      (Google Generative Language API)
+  - sambanova   (SambaNova OpenAI-compatible)
+  - github_gpt4o (GitHub Models, OpenAI-compatible)
+Comma-separated keys per provider via env vars:
+  GEMINI_KEY, SAMBANOVA_KEY, GITHUB_KEY
+Rotation:
+  - Round-robin across keys
+  - Track failures per key; cool down after N consecutive failures
+  - Failover to next provider when all keys exhausted
+"""
+from __future__ import annotations
+import asyncio
+import logging
+import os
+import time
+from dataclasses import dataclass, field
+from typing import Any, AsyncIterator, Dict, List, Optional
+import httpx
+logger = logging.getLogger(__name__)
+# ----------------------------------------------------------------------------
+# Constants
+# ----------------------------------------------------------------------------
+MAX_PROVIDER_RETRY = 3
+MAX_FAILURES_BEFORE_COOLDOWN = 3
+COOLDOWN_SECONDS = 300  # 5 min
+REQUEST_TIMEOUT_SECONDS = 120.0
+STREAM_TIMEOUT_SECONDS = 600.0
+# ----------------------------------------------------------------------------
+# Provider definitions
+# ----------------------------------------------------------------------------
+@dataclass
+class ProviderConfig:
+    name: str
+    kind: str  # 'gemini' | 'openai'
+    url: str
+    key_env: str
+    model: str
+    stream_supported: bool = True
+PROVIDERS: Dict[str, ProviderConfig] = {
+    "gemini": ProviderConfig(
+        name="gemini",
+        kind="gemini",
+        url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent",
+        key_env="GEMINI_KEY",
+        model="gemini-2.0-flash",
+        stream_supported=False,  # we use non-streaming generate for simplicity
+    ),
+    "sambanova": ProviderConfig(
+        name="sambanova",
+        kind="openai",
+        url="https://api.sambanova.ai/v1/chat/completions",
+        key_env="SAMBANOVA_KEY",
+        model="Meta-Llama-3.3-70B-Instruct",
+        stream_supported=True,
+    ),
+    "github_gpt4o": ProviderConfig(
+        name="github_gpt4o",
+        kind="openai",
+        url="https://models.inference.ai.azure.com/chat/completions",
+        key_env="GITHUB_KEY",
+        model="gpt-4o",
+        stream_supported=True,
+    ),
+}
+# ----------------------------------------------------------------------------
+# Key pool
+# ----------------------------------------------------------------------------
+@dataclass
+class KeyState:
+    key: str
+    fail_count: int = 0
+    cooldown_until: float = 0.0  # epoch seconds
+class KeyPool:
+    """Round-robin key pool with failure tracking & cooldown."""
+    def __init__(self, keys: List[str]) -> None:
+        self._keys: List[KeyState] = [KeyState(k.strip()) for k in keys if k.strip()]
+        self._cursor = 0
+    def __bool__(self) -> bool:
+        return len(self._keys) > 0
+    def pick(self) -> Optional[KeyState]:
+        if not self._keys:
+            return None
+        now = time.time()
+        # Try every key starting from cursor
+        for _ in range(len(self._keys)):
+            ks = self._keys[self._cursor % len(self._keys)]
+            self._cursor += 1
+            if ks.cooldown_until <= now:
+                return ks
+        return None  # all cooling down
+    @staticmethod
+    def mark_success(ks: KeyState) -> None:
+        ks.fail_count = 0
+        ks.cooldown_until = 0.0
+    @staticmethod
+    def mark_failure(ks: KeyState) -> None:
+        ks.fail_count += 1
+        if ks.fail_count >= MAX_FAILURES_BEFORE_COOLDOWN:
+            ks.cooldown_until = time.time() + COOLDOWN_SECONDS
+            logger.warning(
+                "Key cooled down for %ds (fail_count=%d)",
+                COOLDOWN_SECONDS, ks.fail_count,
+            )
+# Cache pools so cooldown state persists across requests
+_POOL_CACHE: Dict[str, KeyPool] = {}
+def get_pool(provider: ProviderConfig) -> KeyPool:
+    if provider.name in _POOL_CACHE:
+        return _POOL_CACHE[provider.name]
+    raw = os.environ.get(provider.key_env, "")
+    keys = [k for k in raw.split(",") if k.strip()]
+    pool = KeyPool(keys)
+    _POOL_CACHE[provider.name] = pool
+    return pool
+# ----------------------------------------------------------------------------
+# Task classification → provider order
+# ----------------------------------------------------------------------------
+def classify_task(prompt: str) -> str:
+    p = (prompt or "").lower()
+    if any(w in p for w in ("workflow", "automation")):
+        return "planning"
+    if any(w in p for w in ("code", "python", "javascript", "function", "api", "build", "debug")):
+        return "engineering"
+    if any(w in p for w in ("why", "analyze", "explain", "reason")):
+        return "reasoning"
+    if any(w in p for w in ("translate", "summarize", "summary")):
+        return "language"
+    return "general"
+def provider_order(prompt: str) -> List[str]:
+    task = classify_task(prompt)
+    if task == "engineering" or task == "reasoning":
+        return ["sambanova", "github_gpt4o", "gemini"]
+    if task == "planning":
+        return ["github_gpt4o", "sambanova", "gemini"]
+    if task == "language" or len(prompt) < 500:
+        return ["gemini", "sambanova", "github_gpt4o"]
+    return ["sambanova", "github_gpt4o", "gemini"]
+# ----------------------------------------------------------------------------
+# Provider callers
+# ----------------------------------------------------------------------------
+def _gemini_body(messages: List[Dict[str, str]]) -> Dict[str, Any]:
+    """Convert OpenAI-style messages → Gemini body."""
+    contents = []
+    system_parts: List[str] = []
+    for m in messages:
+        role = m.get("role")
+        content = m.get("content", "")
+        if role == "system":
+            system_parts.append(content)
+            continue
+        gem_role = "user" if role == "user" else "model"
+        contents.append({"role": gem_role, "parts": [{"text": content}]})
+    body: Dict[str, Any] = {"contents": contents}
+    if system_parts:
+        body["systemInstruction"] = {"parts": [{"text": "\n".join(system_parts)}]}
+    return body
+def _extract_text(provider: ProviderConfig, data: Dict[str, Any]) -> str:
+    if provider.kind == "gemini":
+        try:
+            return data["candidates"][0]["content"]["parts"][0]["text"]
+        except (KeyError, IndexError, TypeError):
+            return ""
+    try:
+        return data["choices"][0]["message"]["content"] or ""
+    except (KeyError, IndexError, TypeError):
+        return ""
+async def _call_once(
+    client: httpx.AsyncClient,
+    provider: ProviderConfig,
+    key: str,
+    messages: List[Dict[str, str]],
+    temperature: float = 0.4,
+    max_tokens: int = 2048,
+) -> str:
+    if provider.kind == "gemini":
+        url = f"{provider.url}?key={key}"
+        body = _gemini_body(messages)
+        body["generationConfig"] = {
+            "temperature": temperature,
+            "maxOutputTokens": max_tokens,
+        }
+        r = await client.post(url, json=body, timeout=REQUEST_TIMEOUT_SECONDS)
+    else:
+        headers = {
+            "Authorization": f"Bearer {key}",
+            "Content-Type": "application/json",
+        }
+        body = {
+            "model": provider.model,
+            "messages": messages,
+            "temperature": temperature,
+            "max_tokens": max_tokens,
+        }
+        r = await client.post(
+            provider.url, headers=headers, json=body, timeout=REQUEST_TIMEOUT_SECONDS
+        )
+    if r.status_code >= 400:
+        raise RuntimeError(f"{provider.name} HTTP {r.status_code}: {r.text[:200]}")
+    return _extract_text(provider, r.json())
+async def complete(
+    messages: List[Dict[str, str]],
+    *,
+    temperature: float = 0.4,
+    max_tokens: int = 2048,
+) -> Dict[str, Any]:
+    """Non-streaming completion with provider/key failover.
+    Returns: {"content": str, "provider": str, "model": str}
+    """
+    prompt_text = "\n".join(m.get("content", "") for m in messages if m.get("role") == "user")
+    order = provider_order(prompt_text)
+    last_err: Optional[str] = None
+    async with httpx.AsyncClient() as client:
+        for provider_name in order:
+            provider = PROVIDERS[provider_name]
+            pool = get_pool(provider)
+            if not pool:
+                continue
+            for _ in range(MAX_PROVIDER_RETRY):
+                ks = pool.pick()
+                if ks is None:
+                    break
+                try:
+                    text = await _call_once(
+                        client, provider, ks.key, messages,
+                        temperature=temperature, max_tokens=max_tokens,
+                    )
+                    if not text.strip():
+                        raise RuntimeError("empty completion")
+                    KeyPool.mark_success(ks)
+                    return {
+                        "content": text,
+                        "provider": provider.name,
+                        "model": provider.model,
+                    }
+                except Exception as e:
+                    last_err = f"{provider.name}: {e}"
+                    logger.warning("LLM call failed → %s", last_err)
+                    KeyPool.mark_failure(ks)
+    raise RuntimeError(f"ALL_PROVIDERS_FAILED ({last_err})")
+# ----------------------------------------------------------------------------
+# Streaming (OpenAI-compatible providers only; Gemini falls back to chunked)
+# ----------------------------------------------------------------------------
+async def _stream_openai(
+    client: httpx.AsyncClient,
+    provider: ProviderConfig,
+    key: str,
+    messages: List[Dict[str, str]],
+    temperature: float,
+    max_tokens: int,
+) -> AsyncIterator[str]:
+    headers = {
+        "Authorization": f"Bearer {key}",
+        "Content-Type": "application/json",
+        "Accept": "text/event-stream",
+    }
+    body = {
+        "model": provider.model,
+        "messages": messages,
+        "temperature": temperature,
+        "max_tokens": max_tokens,
+        "stream": True,
+    }
+    async with client.stream(
+        "POST", provider.url, headers=headers, json=body,
+        timeout=STREAM_TIMEOUT_SECONDS,
+    ) as r:
+        if r.status_code >= 400:
+            err_text = (await r.aread()).decode("utf-8", "ignore")[:200]
+            raise RuntimeError(f"{provider.name} HTTP {r.status_code}: {err_text}")
+        async for line in r.aiter_lines():
+            if not line or not line.startswith("data:"):
+                continue
+            payload = line[5:].strip()
+            if payload == "[DONE]":
+                break
+            try:
+                import json
+                obj = json.loads(payload)
+                delta = obj["choices"][0]["delta"].get("content")
+                if delta:
+                    yield delta
+            except Exception:
+                continue
+async def stream_complete(
+    messages: List[Dict[str, str]],
+    *,
+    temperature: float = 0.4,
+    max_tokens: int = 2048,
+) -> AsyncIterator[Dict[str, Any]]:
+    """Yield {'type':'delta','content':str,'provider':str} chunks, then {'type':'done', ...}."""
+    prompt_text = "\n".join(m.get("content", "") for m in messages if m.get("role") == "user")
+    order = provider_order(prompt_text)
+    last_err: Optional[str] = None
+    async with httpx.AsyncClient() as client:
+        for provider_name in order:
+            provider = PROVIDERS[provider_name]
+            pool = get_pool(provider)
+            if not pool:
+                continue
+            for _ in range(MAX_PROVIDER_RETRY):
+                ks = pool.pick()
+                if ks is None:
+                    break
+                try:
+                    if provider.stream_supported:
+                        got_any = False
+                        async for delta in _stream_openai(
+                            client, provider, ks.key, messages, temperature, max_tokens
+                        ):
+                            got_any = True
+                            yield {"type": "delta", "content": delta, "provider": provider.name}
+                        if not got_any:
+                            raise RuntimeError("empty stream")
+                    else:
+                        # Gemini fallback: non-streaming, emit as a single delta
+                        text = await _call_once(
+                            client, provider, ks.key, messages,
+                            temperature=temperature, max_tokens=max_tokens,
+                        )
+                        if not text.strip():
+                            raise RuntimeError("empty completion")
+                        yield {"type": "delta", "content": text, "provider": provider.name}
+                    KeyPool.mark_success(ks)
+                    yield {"type": "done", "provider": provider.name, "model": provider.model}
+                    return
+                except Exception as e:
+                    last_err = f"{provider.name}: {e}"
+                    logger.warning("LLM stream failed → %s", last_err)
+                    KeyPool.mark_failure(ks)
+    yield {"type": "error", "error": f"ALL_PROVIDERS_FAILED ({last_err})"}
+def pool_status() -> Dict[str, Any]:
+    """Diagnostic info about each provider's key pool."""
+    out: Dict[str, Any] = {}
+    now = time.time()
+    for name, provider in PROVIDERS.items():
+        pool = get_pool(provider)
+        out[name] = {
+            "keys_configured": len(pool._keys),
+            "keys_available": sum(1 for k in pool._keys if k.cooldown_until <= now),
+            "model": provider.model,
+        }
+    return out

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+fastapi==0.115.5
+uvicorn[standard]==0.32.1
+pydantic==2.10.3
+httpx==0.27.2
+e2b-code-interpreter==1.0.4
+python-multipart==0.0.20

tests/__init__.py ADDED Viewed

File without changes

tests/test_smoke.py ADDED Viewed

	@@ -0,0 +1,81 @@

+"""
+Smoke tests that DO NOT hit the network.
+Run with: pytest apps/backend/tests
+"""
+from __future__ import annotations
+import importlib
+import os
+import sys
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
+def test_imports():
+    """Every backend module imports cleanly."""
+    for mod in ("backend.app", "backend.agent", "backend.intent",
+                "backend.llm_router", "backend.executor"):
+        importlib.import_module(mod)
+def test_intent_heuristic_chat():
+    from backend.intent import heuristic_detect
+    r = heuristic_detect("hello there")
+    assert r is not None and r.needs_sandbox is False
+def test_intent_heuristic_exec():
+    from backend.intent import heuristic_detect
+    r = heuristic_detect("Create proof.txt with the current unix timestamp and write it")
+    assert r is not None and r.needs_sandbox is True
+def test_intent_code_fence():
+    from backend.intent import heuristic_detect
+    r = heuristic_detect("```python\nprint('hi')\n```")
+    assert r is not None and r.needs_sandbox is True
+def test_extract_code_blocks():
+    from backend.agent import extract_code_blocks, pick_runnable
+    blocks = extract_code_blocks("Here is code:\n```python\nprint(1)\n```\nand shell:\n```bash\nls\n```")
+    assert len(blocks) == 2
+    chosen = pick_runnable(blocks)
+    assert chosen is not None
+    assert chosen.language in ("python", "py")
+def test_provider_order_engineering():
+    from backend.llm_router import provider_order
+    order = provider_order("Write a python function to sort a list")
+    assert order[0] == "sambanova"
+def test_provider_order_short_chat():
+    from backend.llm_router import provider_order
+    order = provider_order("hi")
+    assert order[0] == "gemini"
+def test_pool_empty_when_no_env():
+    from backend.llm_router import KeyPool
+    p = KeyPool([])
+    assert not p
+    assert p.pick() is None
+def test_pool_rotation_and_cooldown():
+    from backend.llm_router import KeyPool, MAX_FAILURES_BEFORE_COOLDOWN
+    p = KeyPool(["a", "b", "c"])
+    seen = set()
+    for _ in range(3):
+        ks = p.pick()
+        assert ks is not None
+        seen.add(ks.key)
+    assert seen == {"a", "b", "c"}
+    # Mark one failed enough times to cool down
+    ks = p.pick()
+    for _ in range(MAX_FAILURES_BEFORE_COOLDOWN):
+        KeyPool.mark_failure(ks)
+    assert ks.cooldown_until > 0