Spaces:
Sleeping
Sleeping
Phase 1 backend deploy
Browse files- .env.example +17 -0
- Dockerfile +37 -0
- README.md +36 -4
- __init__.py +2 -0
- agent.py +214 -0
- app.py +242 -0
- executor.py +230 -0
- intent.py +141 -0
- llm_router.py +399 -0
- requirements.txt +6 -0
- tests/__init__.py +0 -0
- tests/test_smoke.py +81 -0
.env.example
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copy to .env for local dev. DO NOT COMMIT real keys.
|
| 2 |
+
|
| 3 |
+
# LLM providers (comma-separated keys, at least one provider required)
|
| 4 |
+
GEMINI_KEY=
|
| 5 |
+
SAMBANOVA_KEY=
|
| 6 |
+
GITHUB_KEY=
|
| 7 |
+
|
| 8 |
+
# E2B (required for execution endpoints)
|
| 9 |
+
E2B_API_KEY=
|
| 10 |
+
# E2B_TEMPLATE=
|
| 11 |
+
|
| 12 |
+
# CORS
|
| 13 |
+
ALLOWED_ORIGINS=*
|
| 14 |
+
|
| 15 |
+
# Logging
|
| 16 |
+
LOG_LEVEL=INFO
|
| 17 |
+
PORT=7860
|
Dockerfile
ADDED
|
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# HF Space Dockerfile for OpenHands Simplified Backend (Phase 1)
|
| 2 |
+
# Runs FastAPI on port 7860 (HF Space default).
|
| 3 |
+
|
| 4 |
+
FROM python:3.11-slim
|
| 5 |
+
|
| 6 |
+
ENV PYTHONDONTWRITEBYTECODE=1 \
|
| 7 |
+
PYTHONUNBUFFERED=1 \
|
| 8 |
+
PIP_NO_CACHE_DIR=1 \
|
| 9 |
+
PIP_DISABLE_PIP_VERSION_CHECK=1 \
|
| 10 |
+
PORT=7860 \
|
| 11 |
+
HOME=/home/user
|
| 12 |
+
|
| 13 |
+
# Non-root user expected by HF Spaces
|
| 14 |
+
RUN useradd -m -u 1000 user
|
| 15 |
+
WORKDIR /home/user/app
|
| 16 |
+
|
| 17 |
+
# System deps (minimal)
|
| 18 |
+
RUN apt-get update && apt-get install -y --no-install-recommends \
|
| 19 |
+
curl ca-certificates \
|
| 20 |
+
&& rm -rf /var/lib/apt/lists/*
|
| 21 |
+
|
| 22 |
+
# Python deps
|
| 23 |
+
COPY --chown=user:user requirements.txt /home/user/app/requirements.txt
|
| 24 |
+
RUN pip install --no-cache-dir -r /home/user/app/requirements.txt
|
| 25 |
+
|
| 26 |
+
# App code
|
| 27 |
+
COPY --chown=user:user . /home/user/app/backend
|
| 28 |
+
# Make package importable as `backend`
|
| 29 |
+
ENV PYTHONPATH=/home/user/app
|
| 30 |
+
|
| 31 |
+
USER user
|
| 32 |
+
EXPOSE 7860
|
| 33 |
+
|
| 34 |
+
HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
|
| 35 |
+
CMD curl -fsS http://localhost:7860/health || exit 1
|
| 36 |
+
|
| 37 |
+
CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "info"]
|
README.md
CHANGED
|
@@ -1,10 +1,42 @@
|
|
| 1 |
---
|
| 2 |
-
title:
|
| 3 |
-
emoji:
|
| 4 |
colorFrom: indigo
|
| 5 |
-
colorTo:
|
| 6 |
sdk: docker
|
|
|
|
| 7 |
pinned: false
|
| 8 |
---
|
| 9 |
|
| 10 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
---
|
| 2 |
+
title: OpenHands Simplified Backend
|
| 3 |
+
emoji: 🤖
|
| 4 |
colorFrom: indigo
|
| 5 |
+
colorTo: purple
|
| 6 |
sdk: docker
|
| 7 |
+
app_port: 7860
|
| 8 |
pinned: false
|
| 9 |
---
|
| 10 |
|
| 11 |
+
# OpenHands Simplified Backend (Phase 1)
|
| 12 |
+
|
| 13 |
+
FastAPI service that exposes:
|
| 14 |
+
|
| 15 |
+
- `GET /health` — liveness + provider/key status
|
| 16 |
+
- `POST /api/chat` — non-streaming chat
|
| 17 |
+
- `POST /api/chat/stream` — SSE; auto-routes chat vs E2B execution
|
| 18 |
+
- `POST /api/execute` — SSE; always runs in E2B sandbox
|
| 19 |
+
- `POST /api/intent` — intent classifier only
|
| 20 |
+
|
| 21 |
+
## Environment variables (set in HF Space → Settings → Secrets)
|
| 22 |
+
|
| 23 |
+
| Name | Required | Notes |
|
| 24 |
+
|-------------------|----------|----------------------------------------|
|
| 25 |
+
| `GEMINI_KEY` | optional | comma-separated Gemini API keys |
|
| 26 |
+
| `SAMBANOVA_KEY` | optional | comma-separated SambaNova API keys |
|
| 27 |
+
| `GITHUB_KEY` | optional | comma-separated GitHub Models tokens |
|
| 28 |
+
| `E2B_API_KEY` | **yes** | required for execution endpoints |
|
| 29 |
+
| `E2B_TEMPLATE` | optional | custom sandbox template id |
|
| 30 |
+
| `ALLOWED_ORIGINS` | optional | CSV; defaults to `*` |
|
| 31 |
+
| `LOG_LEVEL` | optional | default `INFO` |
|
| 32 |
+
|
| 33 |
+
At least ONE LLM provider key must be set.
|
| 34 |
+
|
| 35 |
+
## Test (after deploy)
|
| 36 |
+
|
| 37 |
+
```bash
|
| 38 |
+
curl https://<your-space>.hf.space/health
|
| 39 |
+
curl -N -X POST https://<your-space>.hf.space/api/execute \
|
| 40 |
+
-H 'Content-Type: application/json' \
|
| 41 |
+
-d '{"message":"Create proof.txt with the current UNIX timestamp and print its contents."}'
|
| 42 |
+
```
|
__init__.py
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Simplified OpenHands backend (Phase 1)."""
|
| 2 |
+
__version__ = "0.1.0"
|
agent.py
ADDED
|
@@ -0,0 +1,214 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Agent loop: orchestrates LLM <-> E2B sandbox for execution tasks.
|
| 3 |
+
|
| 4 |
+
Phase 1 design — kept deliberately simple and robust:
|
| 5 |
+
|
| 6 |
+
1. Ask LLM to produce a SINGLE python code block (and optional shell block)
|
| 7 |
+
to satisfy the user's request, given recent context.
|
| 8 |
+
2. Extract the code block(s).
|
| 9 |
+
3. Run them in a fresh E2B sandbox, streaming stdout/stderr to the caller.
|
| 10 |
+
4. Show the LLM the real output and ask for a final natural-language reply.
|
| 11 |
+
5. Stream that reply.
|
| 12 |
+
6. Close the sandbox.
|
| 13 |
+
|
| 14 |
+
Anything more elaborate (multi-step planner, tool-calling, retry-on-error) is
|
| 15 |
+
intentionally OUT of Phase 1.
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
import json
|
| 21 |
+
import logging
|
| 22 |
+
import re
|
| 23 |
+
from dataclasses import dataclass
|
| 24 |
+
from typing import AsyncIterator, Dict, List, Optional
|
| 25 |
+
|
| 26 |
+
from . import llm_router
|
| 27 |
+
from .executor import E2BExecutor, ExecEvent
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger(__name__)
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
# ----------------------------------------------------------------------------
|
| 33 |
+
# Prompts
|
| 34 |
+
# ----------------------------------------------------------------------------
|
| 35 |
+
|
| 36 |
+
CODER_SYSTEM = """You are a code executor agent running inside a real Linux
|
| 37 |
+
sandbox (E2B). The user will ask you to do something that requires running
|
| 38 |
+
real code. Reply with ONE single fenced code block — Python preferred — that,
|
| 39 |
+
when executed, accomplishes the task.
|
| 40 |
+
|
| 41 |
+
Strict rules:
|
| 42 |
+
- Output ONLY the code block. No prose before or after.
|
| 43 |
+
- Prefer Python. Use ```python fences.
|
| 44 |
+
- If the task is shell-only (mkdir, ls, install a package), you may use one
|
| 45 |
+
```bash block instead.
|
| 46 |
+
- Print clear progress messages so the user can see what happened.
|
| 47 |
+
- Always print a final confirmation line.
|
| 48 |
+
- Keep total output under ~200 lines.
|
| 49 |
+
"""
|
| 50 |
+
|
| 51 |
+
REPLY_SYSTEM = """You are a helpful assistant. The user asked for a task that
|
| 52 |
+
required running real code. Below is the user's request, the code that ran,
|
| 53 |
+
and the REAL execution output. Write a short, friendly natural-language reply
|
| 54 |
+
summarising what was done and quoting any important values from the real
|
| 55 |
+
output. Do NOT fabricate. Do NOT re-run anything. Keep it concise (3-6
|
| 56 |
+
sentences)."""
|
| 57 |
+
|
| 58 |
+
CHAT_SYSTEM = """You are a concise, helpful assistant. Reply in the same
|
| 59 |
+
language as the user when natural. Keep answers focused."""
|
| 60 |
+
|
| 61 |
+
|
| 62 |
+
# ----------------------------------------------------------------------------
|
| 63 |
+
# Code extraction
|
| 64 |
+
# ----------------------------------------------------------------------------
|
| 65 |
+
|
| 66 |
+
_FENCE_RE = re.compile(
|
| 67 |
+
r"```([a-zA-Z0-9_+\-]*)\s*\n(.*?)```", re.DOTALL
|
| 68 |
+
)
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
@dataclass
|
| 72 |
+
class CodeBlock:
|
| 73 |
+
language: str
|
| 74 |
+
code: str
|
| 75 |
+
|
| 76 |
+
|
| 77 |
+
def extract_code_blocks(text: str) -> List[CodeBlock]:
|
| 78 |
+
blocks: List[CodeBlock] = []
|
| 79 |
+
for m in _FENCE_RE.finditer(text or ""):
|
| 80 |
+
lang = (m.group(1) or "").lower().strip()
|
| 81 |
+
code = m.group(2).rstrip()
|
| 82 |
+
blocks.append(CodeBlock(language=lang or "python", code=code))
|
| 83 |
+
return blocks
|
| 84 |
+
|
| 85 |
+
|
| 86 |
+
def pick_runnable(blocks: List[CodeBlock]) -> Optional[CodeBlock]:
|
| 87 |
+
# Prefer python; else bash/sh; else first
|
| 88 |
+
for b in blocks:
|
| 89 |
+
if b.language in ("python", "py"):
|
| 90 |
+
return b
|
| 91 |
+
for b in blocks:
|
| 92 |
+
if b.language in ("bash", "sh", "shell"):
|
| 93 |
+
return b
|
| 94 |
+
return blocks[0] if blocks else None
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
# ----------------------------------------------------------------------------
|
| 98 |
+
# Streaming agent
|
| 99 |
+
# ----------------------------------------------------------------------------
|
| 100 |
+
|
| 101 |
+
async def stream_chat_only(
|
| 102 |
+
messages: List[Dict[str, str]],
|
| 103 |
+
) -> AsyncIterator[Dict]:
|
| 104 |
+
"""Plain chat: no sandbox."""
|
| 105 |
+
full_messages = [{"role": "system", "content": CHAT_SYSTEM}, *messages]
|
| 106 |
+
yield {"type": "phase", "phase": "chat"}
|
| 107 |
+
async for chunk in llm_router.stream_complete(full_messages, temperature=0.4, max_tokens=1024):
|
| 108 |
+
if chunk["type"] == "delta":
|
| 109 |
+
yield {"type": "assistant_delta", "content": chunk["content"]}
|
| 110 |
+
elif chunk["type"] == "done":
|
| 111 |
+
yield {"type": "assistant_done", "provider": chunk.get("provider"), "model": chunk.get("model")}
|
| 112 |
+
elif chunk["type"] == "error":
|
| 113 |
+
yield {"type": "error", "error": chunk["error"]}
|
| 114 |
+
|
| 115 |
+
|
| 116 |
+
async def stream_execute(
|
| 117 |
+
messages: List[Dict[str, str]],
|
| 118 |
+
*,
|
| 119 |
+
sandbox_timeout: int = 300,
|
| 120 |
+
) -> AsyncIterator[Dict]:
|
| 121 |
+
"""Execution task: spin up E2B, run code, reply with real results."""
|
| 122 |
+
|
| 123 |
+
# --- step 1: ask LLM for code -------------------------------------------
|
| 124 |
+
yield {"type": "phase", "phase": "planning"}
|
| 125 |
+
code_messages = [{"role": "system", "content": CODER_SYSTEM}, *messages]
|
| 126 |
+
try:
|
| 127 |
+
coder_resp = await llm_router.complete(code_messages, temperature=0.2, max_tokens=1500)
|
| 128 |
+
except Exception as e:
|
| 129 |
+
yield {"type": "error", "error": f"LLM failed: {e}"}
|
| 130 |
+
return
|
| 131 |
+
|
| 132 |
+
raw = coder_resp["content"]
|
| 133 |
+
yield {"type": "plan", "content": raw, "provider": coder_resp.get("provider")}
|
| 134 |
+
|
| 135 |
+
blocks = extract_code_blocks(raw)
|
| 136 |
+
chosen = pick_runnable(blocks)
|
| 137 |
+
if chosen is None:
|
| 138 |
+
# No code block → degrade to chat reply
|
| 139 |
+
yield {"type": "assistant_delta", "content": raw}
|
| 140 |
+
yield {"type": "assistant_done"}
|
| 141 |
+
return
|
| 142 |
+
|
| 143 |
+
yield {"type": "code", "language": chosen.language, "code": chosen.code}
|
| 144 |
+
|
| 145 |
+
# --- step 2: launch sandbox & run ---------------------------------------
|
| 146 |
+
yield {"type": "phase", "phase": "sandbox_starting"}
|
| 147 |
+
executor: Optional[E2BExecutor] = None
|
| 148 |
+
stdout_buf: List[str] = []
|
| 149 |
+
stderr_buf: List[str] = []
|
| 150 |
+
error_text: Optional[str] = None
|
| 151 |
+
result_text: str = ""
|
| 152 |
+
exit_code: Optional[int] = None
|
| 153 |
+
|
| 154 |
+
try:
|
| 155 |
+
executor = E2BExecutor(timeout=sandbox_timeout)
|
| 156 |
+
await executor.start()
|
| 157 |
+
yield {"type": "sandbox_started", "sandbox_id": executor.sandbox_id}
|
| 158 |
+
|
| 159 |
+
runner = (
|
| 160 |
+
executor.run_python(chosen.code)
|
| 161 |
+
if chosen.language in ("python", "py")
|
| 162 |
+
else executor.run_shell(chosen.code)
|
| 163 |
+
)
|
| 164 |
+
|
| 165 |
+
yield {"type": "phase", "phase": "executing"}
|
| 166 |
+
async for ev in runner:
|
| 167 |
+
if ev.type == "stdout":
|
| 168 |
+
stdout_buf.append(ev.data)
|
| 169 |
+
yield {"type": "stdout", "content": ev.data}
|
| 170 |
+
elif ev.type == "stderr":
|
| 171 |
+
stderr_buf.append(ev.data)
|
| 172 |
+
yield {"type": "stderr", "content": ev.data}
|
| 173 |
+
elif ev.type == "error":
|
| 174 |
+
error_text = ev.data
|
| 175 |
+
yield {"type": "exec_error", "content": ev.data, "meta": ev.meta}
|
| 176 |
+
elif ev.type == "result":
|
| 177 |
+
result_text = ev.data
|
| 178 |
+
exit_code = ev.meta.get("exit_code") if ev.meta else None
|
| 179 |
+
yield {"type": "exec_result", "content": ev.data, "meta": ev.meta}
|
| 180 |
+
|
| 181 |
+
except Exception as e:
|
| 182 |
+
logger.exception("sandbox error")
|
| 183 |
+
yield {"type": "error", "error": f"Sandbox error: {e}"}
|
| 184 |
+
if executor:
|
| 185 |
+
await executor.close()
|
| 186 |
+
return
|
| 187 |
+
finally:
|
| 188 |
+
if executor:
|
| 189 |
+
await executor.close()
|
| 190 |
+
yield {"type": "sandbox_closed"}
|
| 191 |
+
|
| 192 |
+
# --- step 3: ask LLM for final reply with real outputs ------------------
|
| 193 |
+
yield {"type": "phase", "phase": "summarising"}
|
| 194 |
+
user_request = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
|
| 195 |
+
summary_user = (
|
| 196 |
+
f"USER REQUEST:\n{user_request}\n\n"
|
| 197 |
+
f"CODE EXECUTED ({chosen.language}):\n```\n{chosen.code}\n```\n\n"
|
| 198 |
+
f"STDOUT:\n{''.join(stdout_buf) or '(empty)'}\n\n"
|
| 199 |
+
f"STDERR:\n{''.join(stderr_buf) or '(empty)'}\n\n"
|
| 200 |
+
f"RESULT:\n{result_text or '(none)'}\n\n"
|
| 201 |
+
f"ERROR:\n{error_text or '(none)'}\n\n"
|
| 202 |
+
f"EXIT_CODE: {exit_code}"
|
| 203 |
+
)
|
| 204 |
+
reply_messages = [
|
| 205 |
+
{"role": "system", "content": REPLY_SYSTEM},
|
| 206 |
+
{"role": "user", "content": summary_user},
|
| 207 |
+
]
|
| 208 |
+
async for chunk in llm_router.stream_complete(reply_messages, temperature=0.4, max_tokens=600):
|
| 209 |
+
if chunk["type"] == "delta":
|
| 210 |
+
yield {"type": "assistant_delta", "content": chunk["content"]}
|
| 211 |
+
elif chunk["type"] == "done":
|
| 212 |
+
yield {"type": "assistant_done", "provider": chunk.get("provider"), "model": chunk.get("model")}
|
| 213 |
+
elif chunk["type"] == "error":
|
| 214 |
+
yield {"type": "error", "error": chunk["error"]}
|
app.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
FastAPI backend — simplified OpenHands runtime gateway.
|
| 3 |
+
|
| 4 |
+
Endpoints:
|
| 5 |
+
GET / → service info
|
| 6 |
+
GET /health → liveness + provider/key status
|
| 7 |
+
POST /api/chat → JSON, non-streaming convenience endpoint
|
| 8 |
+
POST /api/chat/stream → SSE streaming (chat OR execute, auto-routed)
|
| 9 |
+
POST /api/execute → SSE streaming, always uses sandbox
|
| 10 |
+
POST /api/intent → JSON, returns intent decision only
|
| 11 |
+
|
| 12 |
+
CORS is fully open by default (intended for Vercel frontend).
|
| 13 |
+
Configure with env var ALLOWED_ORIGINS (comma-separated) to lock down.
|
| 14 |
+
"""
|
| 15 |
+
|
| 16 |
+
from __future__ import annotations
|
| 17 |
+
|
| 18 |
+
import asyncio
|
| 19 |
+
import json
|
| 20 |
+
import logging
|
| 21 |
+
import os
|
| 22 |
+
import time
|
| 23 |
+
from typing import Any, Dict, List, Optional
|
| 24 |
+
|
| 25 |
+
from fastapi import FastAPI, HTTPException, Request
|
| 26 |
+
from fastapi.middleware.cors import CORSMiddleware
|
| 27 |
+
from fastapi.responses import JSONResponse, StreamingResponse
|
| 28 |
+
from pydantic import BaseModel, Field
|
| 29 |
+
|
| 30 |
+
from . import agent, intent, llm_router
|
| 31 |
+
|
| 32 |
+
# ----------------------------------------------------------------------------
|
| 33 |
+
# Logging
|
| 34 |
+
# ----------------------------------------------------------------------------
|
| 35 |
+
|
| 36 |
+
logging.basicConfig(
|
| 37 |
+
level=os.environ.get("LOG_LEVEL", "INFO"),
|
| 38 |
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
| 39 |
+
)
|
| 40 |
+
logger = logging.getLogger("openhands.backend")
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
# ----------------------------------------------------------------------------
|
| 44 |
+
# Models
|
| 45 |
+
# ----------------------------------------------------------------------------
|
| 46 |
+
|
| 47 |
+
class ChatMessage(BaseModel):
|
| 48 |
+
role: str = Field(..., pattern="^(system|user|assistant)$")
|
| 49 |
+
content: str
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
class ChatRequest(BaseModel):
|
| 53 |
+
messages: List[ChatMessage] = Field(default_factory=list)
|
| 54 |
+
message: Optional[str] = None # convenience: single user message
|
| 55 |
+
force_sandbox: Optional[bool] = None # override intent detection
|
| 56 |
+
sandbox_timeout: int = 300
|
| 57 |
+
|
| 58 |
+
def to_messages(self) -> List[Dict[str, str]]:
|
| 59 |
+
msgs = [m.dict() for m in self.messages]
|
| 60 |
+
if self.message:
|
| 61 |
+
msgs.append({"role": "user", "content": self.message})
|
| 62 |
+
if not msgs:
|
| 63 |
+
raise ValueError("at least one message is required")
|
| 64 |
+
return msgs
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
class IntentRequest(BaseModel):
|
| 68 |
+
message: str
|
| 69 |
+
|
| 70 |
+
|
| 71 |
+
# ----------------------------------------------------------------------------
|
| 72 |
+
# App
|
| 73 |
+
# ----------------------------------------------------------------------------
|
| 74 |
+
|
| 75 |
+
app = FastAPI(
|
| 76 |
+
title="OpenHands Simplified Backend",
|
| 77 |
+
version="0.1.0",
|
| 78 |
+
description="Phase-1 backend: LLM router + E2B real execution + SSE streaming.",
|
| 79 |
+
)
|
| 80 |
+
|
| 81 |
+
_allowed = os.environ.get("ALLOWED_ORIGINS", "*").strip()
|
| 82 |
+
allowed_origins = ["*"] if _allowed == "*" else [o.strip() for o in _allowed.split(",") if o.strip()]
|
| 83 |
+
|
| 84 |
+
app.add_middleware(
|
| 85 |
+
CORSMiddleware,
|
| 86 |
+
allow_origins=allowed_origins,
|
| 87 |
+
allow_credentials=False,
|
| 88 |
+
allow_methods=["*"],
|
| 89 |
+
allow_headers=["*"],
|
| 90 |
+
)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
# ----------------------------------------------------------------------------
|
| 94 |
+
# Helpers
|
| 95 |
+
# ----------------------------------------------------------------------------
|
| 96 |
+
|
| 97 |
+
def sse_format(event: str, data: Any) -> bytes:
|
| 98 |
+
payload = data if isinstance(data, str) else json.dumps(data, ensure_ascii=False)
|
| 99 |
+
return f"event: {event}\ndata: {payload}\n\n".encode("utf-8")
|
| 100 |
+
|
| 101 |
+
|
| 102 |
+
SSE_HEADERS = {
|
| 103 |
+
"Content-Type": "text/event-stream",
|
| 104 |
+
"Cache-Control": "no-cache, no-transform",
|
| 105 |
+
"Connection": "keep-alive",
|
| 106 |
+
"X-Accel-Buffering": "no", # disable buffering on proxies that respect it
|
| 107 |
+
}
|
| 108 |
+
|
| 109 |
+
|
| 110 |
+
# ----------------------------------------------------------------------------
|
| 111 |
+
# Routes
|
| 112 |
+
# ----------------------------------------------------------------------------
|
| 113 |
+
|
| 114 |
+
@app.get("/")
|
| 115 |
+
async def root() -> Dict[str, Any]:
|
| 116 |
+
return {
|
| 117 |
+
"service": "openhands-simplified-backend",
|
| 118 |
+
"status": "ok",
|
| 119 |
+
"endpoints": [
|
| 120 |
+
"/health",
|
| 121 |
+
"/api/chat",
|
| 122 |
+
"/api/chat/stream",
|
| 123 |
+
"/api/execute",
|
| 124 |
+
"/api/intent",
|
| 125 |
+
],
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
@app.get("/health")
|
| 130 |
+
async def health() -> Dict[str, Any]:
|
| 131 |
+
return {
|
| 132 |
+
"status": "ok",
|
| 133 |
+
"time": int(time.time()),
|
| 134 |
+
"providers": llm_router.pool_status(),
|
| 135 |
+
"e2b_configured": bool(os.environ.get("E2B_API_KEY")),
|
| 136 |
+
}
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
@app.post("/api/intent")
|
| 140 |
+
async def api_intent(req: IntentRequest) -> Dict[str, Any]:
|
| 141 |
+
decision = await intent.detect(req.message)
|
| 142 |
+
return {
|
| 143 |
+
"needs_sandbox": decision.needs_sandbox,
|
| 144 |
+
"reason": decision.reason,
|
| 145 |
+
"confidence": decision.confidence,
|
| 146 |
+
}
|
| 147 |
+
|
| 148 |
+
|
| 149 |
+
@app.post("/api/chat")
|
| 150 |
+
async def api_chat(req: ChatRequest) -> Dict[str, Any]:
|
| 151 |
+
"""Non-streaming chat (no sandbox). Convenience for simple clients."""
|
| 152 |
+
try:
|
| 153 |
+
messages = req.to_messages()
|
| 154 |
+
except ValueError as e:
|
| 155 |
+
raise HTTPException(400, str(e))
|
| 156 |
+
|
| 157 |
+
# Add a light system message if not present
|
| 158 |
+
if not any(m["role"] == "system" for m in messages):
|
| 159 |
+
messages = [{"role": "system", "content": agent.CHAT_SYSTEM}, *messages]
|
| 160 |
+
result = await llm_router.complete(messages, temperature=0.4, max_tokens=1024)
|
| 161 |
+
return {
|
| 162 |
+
"content": result["content"],
|
| 163 |
+
"provider": result.get("provider"),
|
| 164 |
+
"model": result.get("model"),
|
| 165 |
+
}
|
| 166 |
+
|
| 167 |
+
|
| 168 |
+
@app.post("/api/chat/stream")
|
| 169 |
+
async def api_chat_stream(req: ChatRequest):
|
| 170 |
+
"""SSE streaming endpoint.
|
| 171 |
+
|
| 172 |
+
Auto-routes between chat-only and sandbox execution based on intent
|
| 173 |
+
detection (override with `force_sandbox`).
|
| 174 |
+
"""
|
| 175 |
+
try:
|
| 176 |
+
messages = req.to_messages()
|
| 177 |
+
except ValueError as e:
|
| 178 |
+
raise HTTPException(400, str(e))
|
| 179 |
+
|
| 180 |
+
last_user = next((m["content"] for m in reversed(messages) if m["role"] == "user"), "")
|
| 181 |
+
|
| 182 |
+
async def event_gen():
|
| 183 |
+
# Decide routing
|
| 184 |
+
if req.force_sandbox is True:
|
| 185 |
+
decision = intent.ExecutionIntent(True, "forced by client", 1.0)
|
| 186 |
+
elif req.force_sandbox is False:
|
| 187 |
+
decision = intent.ExecutionIntent(False, "forced by client", 1.0)
|
| 188 |
+
else:
|
| 189 |
+
decision = await intent.detect(last_user)
|
| 190 |
+
|
| 191 |
+
yield sse_format("intent", {
|
| 192 |
+
"needs_sandbox": decision.needs_sandbox,
|
| 193 |
+
"reason": decision.reason,
|
| 194 |
+
"confidence": decision.confidence,
|
| 195 |
+
})
|
| 196 |
+
|
| 197 |
+
try:
|
| 198 |
+
if decision.needs_sandbox:
|
| 199 |
+
async for ev in agent.stream_execute(messages, sandbox_timeout=req.sandbox_timeout):
|
| 200 |
+
yield sse_format(ev["type"], ev)
|
| 201 |
+
else:
|
| 202 |
+
async for ev in agent.stream_chat_only(messages):
|
| 203 |
+
yield sse_format(ev["type"], ev)
|
| 204 |
+
except Exception as e:
|
| 205 |
+
logger.exception("stream error")
|
| 206 |
+
yield sse_format("error", {"error": str(e)})
|
| 207 |
+
finally:
|
| 208 |
+
yield sse_format("end", {"done": True})
|
| 209 |
+
|
| 210 |
+
return StreamingResponse(event_gen(), headers=SSE_HEADERS)
|
| 211 |
+
|
| 212 |
+
|
| 213 |
+
@app.post("/api/execute")
|
| 214 |
+
async def api_execute(req: ChatRequest):
|
| 215 |
+
"""SSE streaming endpoint that ALWAYS uses the sandbox."""
|
| 216 |
+
try:
|
| 217 |
+
messages = req.to_messages()
|
| 218 |
+
except ValueError as e:
|
| 219 |
+
raise HTTPException(400, str(e))
|
| 220 |
+
|
| 221 |
+
async def event_gen():
|
| 222 |
+
yield sse_format("intent", {"needs_sandbox": True, "reason": "explicit /execute"})
|
| 223 |
+
try:
|
| 224 |
+
async for ev in agent.stream_execute(messages, sandbox_timeout=req.sandbox_timeout):
|
| 225 |
+
yield sse_format(ev["type"], ev)
|
| 226 |
+
except Exception as e:
|
| 227 |
+
logger.exception("execute error")
|
| 228 |
+
yield sse_format("error", {"error": str(e)})
|
| 229 |
+
finally:
|
| 230 |
+
yield sse_format("end", {"done": True})
|
| 231 |
+
|
| 232 |
+
return StreamingResponse(event_gen(), headers=SSE_HEADERS)
|
| 233 |
+
|
| 234 |
+
|
| 235 |
+
# ----------------------------------------------------------------------------
|
| 236 |
+
# Local entrypoint
|
| 237 |
+
# ----------------------------------------------------------------------------
|
| 238 |
+
|
| 239 |
+
if __name__ == "__main__":
|
| 240 |
+
import uvicorn
|
| 241 |
+
port = int(os.environ.get("PORT", "7860"))
|
| 242 |
+
uvicorn.run(app, host="0.0.0.0", port=port)
|
executor.py
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
E2B Sandbox Executor.
|
| 3 |
+
|
| 4 |
+
Owns the real execution runtime. Wraps the e2b_code_interpreter SDK so the
|
| 5 |
+
rest of the backend never imports e2b directly.
|
| 6 |
+
|
| 7 |
+
Capabilities (Phase 1):
|
| 8 |
+
- run_python(code): execute Python in a sandbox, stream stdout/stderr
|
| 9 |
+
- run_shell(cmd): execute shell command, stream stdout/stderr
|
| 10 |
+
- write_file(path, contents)
|
| 11 |
+
- read_file(path)
|
| 12 |
+
- close()
|
| 13 |
+
|
| 14 |
+
A sandbox is created per task and closed at the end (Phase 1: no reuse).
|
| 15 |
+
"""
|
| 16 |
+
|
| 17 |
+
from __future__ import annotations
|
| 18 |
+
|
| 19 |
+
import asyncio
|
| 20 |
+
import logging
|
| 21 |
+
import os
|
| 22 |
+
from contextlib import asynccontextmanager
|
| 23 |
+
from dataclasses import dataclass, field
|
| 24 |
+
from typing import AsyncIterator, Dict, List, Optional
|
| 25 |
+
|
| 26 |
+
logger = logging.getLogger(__name__)
|
| 27 |
+
|
| 28 |
+
|
| 29 |
+
# ----------------------------------------------------------------------------
|
| 30 |
+
# Event types streamed back to the client
|
| 31 |
+
# ----------------------------------------------------------------------------
|
| 32 |
+
|
| 33 |
+
@dataclass
|
| 34 |
+
class ExecEvent:
|
| 35 |
+
type: str # 'sandbox_started' | 'stdout' | 'stderr' | 'result' | 'error' | 'sandbox_closed'
|
| 36 |
+
data: str = ""
|
| 37 |
+
meta: Dict = field(default_factory=dict)
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
# ----------------------------------------------------------------------------
|
| 41 |
+
# E2B SDK import (lazy so missing dep doesn't crash module import)
|
| 42 |
+
# ----------------------------------------------------------------------------
|
| 43 |
+
|
| 44 |
+
def _get_sandbox_class():
|
| 45 |
+
try:
|
| 46 |
+
from e2b_code_interpreter import Sandbox # type: ignore
|
| 47 |
+
return Sandbox
|
| 48 |
+
except ImportError as e:
|
| 49 |
+
raise RuntimeError(
|
| 50 |
+
"e2b_code_interpreter not installed. Add `e2b-code-interpreter` to requirements."
|
| 51 |
+
) from e
|
| 52 |
+
|
| 53 |
+
|
| 54 |
+
# ----------------------------------------------------------------------------
|
| 55 |
+
# Executor
|
| 56 |
+
# ----------------------------------------------------------------------------
|
| 57 |
+
|
| 58 |
+
class E2BExecutor:
|
| 59 |
+
"""One sandbox = one E2BExecutor instance.
|
| 60 |
+
|
| 61 |
+
The SDK is synchronous; we offload calls to a thread to keep the event
|
| 62 |
+
loop free.
|
| 63 |
+
"""
|
| 64 |
+
|
| 65 |
+
def __init__(self, api_key: Optional[str] = None, template: Optional[str] = None,
|
| 66 |
+
timeout: int = 300) -> None:
|
| 67 |
+
self.api_key = api_key or os.environ.get("E2B_API_KEY", "")
|
| 68 |
+
if not self.api_key:
|
| 69 |
+
raise RuntimeError("E2B_API_KEY is not configured")
|
| 70 |
+
self.template = template or os.environ.get("E2B_TEMPLATE") # None → default
|
| 71 |
+
self.timeout = timeout
|
| 72 |
+
self._sandbox = None # type: ignore
|
| 73 |
+
self._lock = asyncio.Lock()
|
| 74 |
+
|
| 75 |
+
# ---- lifecycle ----------------------------------------------------------
|
| 76 |
+
|
| 77 |
+
async def start(self) -> None:
|
| 78 |
+
if self._sandbox is not None:
|
| 79 |
+
return
|
| 80 |
+
Sandbox = _get_sandbox_class()
|
| 81 |
+
def _create():
|
| 82 |
+
kwargs = {"api_key": self.api_key, "timeout": self.timeout}
|
| 83 |
+
if self.template:
|
| 84 |
+
return Sandbox(self.template, **kwargs)
|
| 85 |
+
return Sandbox(**kwargs)
|
| 86 |
+
self._sandbox = await asyncio.to_thread(_create)
|
| 87 |
+
logger.info("E2B sandbox started: id=%s", getattr(self._sandbox, "sandbox_id", "?"))
|
| 88 |
+
|
| 89 |
+
async def close(self) -> None:
|
| 90 |
+
if self._sandbox is None:
|
| 91 |
+
return
|
| 92 |
+
sb = self._sandbox
|
| 93 |
+
self._sandbox = None
|
| 94 |
+
try:
|
| 95 |
+
await asyncio.to_thread(sb.kill)
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.warning("E2B close error (non-fatal): %s", e)
|
| 98 |
+
|
| 99 |
+
@property
|
| 100 |
+
def sandbox_id(self) -> Optional[str]:
|
| 101 |
+
return getattr(self._sandbox, "sandbox_id", None) if self._sandbox else None
|
| 102 |
+
|
| 103 |
+
# ---- execution ----------------------------------------------------------
|
| 104 |
+
|
| 105 |
+
async def run_python(self, code: str) -> AsyncIterator[ExecEvent]:
|
| 106 |
+
"""Run Python code; yield streaming events."""
|
| 107 |
+
if self._sandbox is None:
|
| 108 |
+
await self.start()
|
| 109 |
+
sb = self._sandbox
|
| 110 |
+
|
| 111 |
+
# Queue bridging the SDK callback thread → asyncio loop
|
| 112 |
+
loop = asyncio.get_running_loop()
|
| 113 |
+
queue: asyncio.Queue[ExecEvent] = asyncio.Queue()
|
| 114 |
+
|
| 115 |
+
def on_stdout(msg) -> None:
|
| 116 |
+
text = getattr(msg, "line", None) or getattr(msg, "text", None) or str(msg)
|
| 117 |
+
loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stdout", text))
|
| 118 |
+
|
| 119 |
+
def on_stderr(msg) -> None:
|
| 120 |
+
text = getattr(msg, "line", None) or getattr(msg, "text", None) or str(msg)
|
| 121 |
+
loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stderr", text))
|
| 122 |
+
|
| 123 |
+
async def runner():
|
| 124 |
+
try:
|
| 125 |
+
def _exec():
|
| 126 |
+
return sb.run_code(code, on_stdout=on_stdout, on_stderr=on_stderr)
|
| 127 |
+
execution = await asyncio.to_thread(_exec)
|
| 128 |
+
# Final result
|
| 129 |
+
result_text = ""
|
| 130 |
+
if execution is not None:
|
| 131 |
+
err = getattr(execution, "error", None)
|
| 132 |
+
if err is not None:
|
| 133 |
+
loop.call_soon_threadsafe(
|
| 134 |
+
queue.put_nowait,
|
| 135 |
+
ExecEvent("error", f"{getattr(err, 'name', 'Error')}: {getattr(err, 'value', err)}",
|
| 136 |
+
{"traceback": getattr(err, "traceback", "")}),
|
| 137 |
+
)
|
| 138 |
+
results = getattr(execution, "results", []) or []
|
| 139 |
+
if results:
|
| 140 |
+
for r in results:
|
| 141 |
+
t = getattr(r, "text", None)
|
| 142 |
+
if t:
|
| 143 |
+
result_text += t + "\n"
|
| 144 |
+
loop.call_soon_threadsafe(
|
| 145 |
+
queue.put_nowait,
|
| 146 |
+
ExecEvent("result", result_text.strip()),
|
| 147 |
+
)
|
| 148 |
+
except Exception as e:
|
| 149 |
+
loop.call_soon_threadsafe(
|
| 150 |
+
queue.put_nowait, ExecEvent("error", str(e)),
|
| 151 |
+
)
|
| 152 |
+
finally:
|
| 153 |
+
loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("__done__"))
|
| 154 |
+
|
| 155 |
+
task = asyncio.create_task(runner())
|
| 156 |
+
try:
|
| 157 |
+
while True:
|
| 158 |
+
ev = await queue.get()
|
| 159 |
+
if ev.type == "__done__":
|
| 160 |
+
break
|
| 161 |
+
yield ev
|
| 162 |
+
finally:
|
| 163 |
+
if not task.done():
|
| 164 |
+
task.cancel()
|
| 165 |
+
|
| 166 |
+
async def run_shell(self, cmd: str) -> AsyncIterator[ExecEvent]:
|
| 167 |
+
"""Run shell command via sandbox.commands.run()."""
|
| 168 |
+
if self._sandbox is None:
|
| 169 |
+
await self.start()
|
| 170 |
+
sb = self._sandbox
|
| 171 |
+
loop = asyncio.get_running_loop()
|
| 172 |
+
queue: asyncio.Queue[ExecEvent] = asyncio.Queue()
|
| 173 |
+
|
| 174 |
+
def on_stdout(data) -> None:
|
| 175 |
+
loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stdout", str(data)))
|
| 176 |
+
|
| 177 |
+
def on_stderr(data) -> None:
|
| 178 |
+
loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stderr", str(data)))
|
| 179 |
+
|
| 180 |
+
async def runner():
|
| 181 |
+
try:
|
| 182 |
+
def _exec():
|
| 183 |
+
return sb.commands.run(cmd, on_stdout=on_stdout, on_stderr=on_stderr)
|
| 184 |
+
result = await asyncio.to_thread(_exec)
|
| 185 |
+
exit_code = getattr(result, "exit_code", None)
|
| 186 |
+
loop.call_soon_threadsafe(
|
| 187 |
+
queue.put_nowait,
|
| 188 |
+
ExecEvent("result", "", {"exit_code": exit_code}),
|
| 189 |
+
)
|
| 190 |
+
except Exception as e:
|
| 191 |
+
loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("error", str(e)))
|
| 192 |
+
finally:
|
| 193 |
+
loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("__done__"))
|
| 194 |
+
|
| 195 |
+
task = asyncio.create_task(runner())
|
| 196 |
+
try:
|
| 197 |
+
while True:
|
| 198 |
+
ev = await queue.get()
|
| 199 |
+
if ev.type == "__done__":
|
| 200 |
+
break
|
| 201 |
+
yield ev
|
| 202 |
+
finally:
|
| 203 |
+
if not task.done():
|
| 204 |
+
task.cancel()
|
| 205 |
+
|
| 206 |
+
async def write_file(self, path: str, contents: str) -> None:
|
| 207 |
+
if self._sandbox is None:
|
| 208 |
+
await self.start()
|
| 209 |
+
sb = self._sandbox
|
| 210 |
+
await asyncio.to_thread(sb.files.write, path, contents)
|
| 211 |
+
|
| 212 |
+
async def read_file(self, path: str) -> str:
|
| 213 |
+
if self._sandbox is None:
|
| 214 |
+
await self.start()
|
| 215 |
+
sb = self._sandbox
|
| 216 |
+
return await asyncio.to_thread(sb.files.read, path)
|
| 217 |
+
|
| 218 |
+
|
| 219 |
+
# ----------------------------------------------------------------------------
|
| 220 |
+
# Convenience context manager
|
| 221 |
+
# ----------------------------------------------------------------------------
|
| 222 |
+
|
| 223 |
+
@asynccontextmanager
|
| 224 |
+
async def sandbox_session(timeout: int = 300):
|
| 225 |
+
ex = E2BExecutor(timeout=timeout)
|
| 226 |
+
try:
|
| 227 |
+
await ex.start()
|
| 228 |
+
yield ex
|
| 229 |
+
finally:
|
| 230 |
+
await ex.close()
|
intent.py
ADDED
|
@@ -0,0 +1,141 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Execution intent detection.
|
| 3 |
+
|
| 4 |
+
Decides whether a user prompt needs a real E2B sandbox or is just chat.
|
| 5 |
+
|
| 6 |
+
Strategy:
|
| 7 |
+
1. Fast keyword/regex heuristics (free, deterministic)
|
| 8 |
+
2. If ambiguous → fall back to LLM classification (cheap)
|
| 9 |
+
|
| 10 |
+
Output: ExecutionIntent dataclass
|
| 11 |
+
"""
|
| 12 |
+
|
| 13 |
+
from __future__ import annotations
|
| 14 |
+
|
| 15 |
+
import json
|
| 16 |
+
import logging
|
| 17 |
+
import re
|
| 18 |
+
from dataclasses import dataclass
|
| 19 |
+
from typing import List, Optional
|
| 20 |
+
|
| 21 |
+
from . import llm_router
|
| 22 |
+
|
| 23 |
+
logger = logging.getLogger(__name__)
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
@dataclass
|
| 27 |
+
class ExecutionIntent:
|
| 28 |
+
needs_sandbox: bool
|
| 29 |
+
reason: str
|
| 30 |
+
confidence: float # 0..1
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
# ----------------------------------------------------------------------------
|
| 34 |
+
# Heuristic rules
|
| 35 |
+
# ----------------------------------------------------------------------------
|
| 36 |
+
|
| 37 |
+
# Strong execution signals
|
| 38 |
+
_EXEC_PATTERNS = [
|
| 39 |
+
r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)",
|
| 40 |
+
r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)",
|
| 41 |
+
r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b",
|
| 42 |
+
r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)",
|
| 43 |
+
r"\b(install|pip install|npm install|apt install)\b",
|
| 44 |
+
r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+",
|
| 45 |
+
r"```(python|bash|sh|shell|javascript|js|node)\b",
|
| 46 |
+
r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)",
|
| 47 |
+
r"\b(proof\.txt|test\.py|script\.py|main\.py)\b",
|
| 48 |
+
r"\bdebug\s+(this|my)\b",
|
| 49 |
+
r"\bbuild\s+(an?\s+)?(app|website|api|server|script)",
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
# Strong chat-only signals
|
| 53 |
+
_CHAT_PATTERNS = [
|
| 54 |
+
r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b",
|
| 55 |
+
r"^\s*(thanks|thank you|thx|ty)\b",
|
| 56 |
+
r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$",
|
| 57 |
+
r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)",
|
| 58 |
+
r"\b(define|definition\s+of)\b",
|
| 59 |
+
r"\btell me about\b",
|
| 60 |
+
r"\bdifference between\b",
|
| 61 |
+
]
|
| 62 |
+
|
| 63 |
+
_EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS]
|
| 64 |
+
_CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS]
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
|
| 68 |
+
"""Return strong-signal intent, or None if ambiguous."""
|
| 69 |
+
p = (prompt or "").strip()
|
| 70 |
+
if not p:
|
| 71 |
+
return ExecutionIntent(False, "empty prompt", 1.0)
|
| 72 |
+
|
| 73 |
+
exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
|
| 74 |
+
chat_hits = sum(1 for r in _CHAT_RE if r.search(p))
|
| 75 |
+
|
| 76 |
+
# Triple-backtick code block always implies execution intent
|
| 77 |
+
if "```" in p and exec_hits == 0:
|
| 78 |
+
# bare code block without verb → still likely wants execution
|
| 79 |
+
if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE):
|
| 80 |
+
return ExecutionIntent(True, "code block detected", 0.85)
|
| 81 |
+
|
| 82 |
+
if exec_hits >= 1 and chat_hits == 0:
|
| 83 |
+
return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
|
| 84 |
+
if chat_hits >= 1 and exec_hits == 0:
|
| 85 |
+
return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
|
| 86 |
+
if exec_hits == 0 and chat_hits == 0:
|
| 87 |
+
# Very short prompts are usually chat
|
| 88 |
+
if len(p) < 30:
|
| 89 |
+
return ExecutionIntent(False, "short prompt, likely chat", 0.7)
|
| 90 |
+
return None # ambiguous → ask LLM
|
| 91 |
+
# Mixed signals → ask LLM
|
| 92 |
+
return None
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
# ----------------------------------------------------------------------------
|
| 96 |
+
# LLM fallback classifier
|
| 97 |
+
# ----------------------------------------------------------------------------
|
| 98 |
+
|
| 99 |
+
_CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
|
| 100 |
+
requires running real code or shell commands in a sandbox computer.
|
| 101 |
+
|
| 102 |
+
Reply ONLY with strict JSON, no prose:
|
| 103 |
+
{"needs_sandbox": true|false, "reason": "<short explanation>"}
|
| 104 |
+
|
| 105 |
+
Rules:
|
| 106 |
+
- needs_sandbox = true when the user wants to run code, execute shell commands,
|
| 107 |
+
create/modify files, install packages, debug a running program, or otherwise
|
| 108 |
+
observe real execution results.
|
| 109 |
+
- needs_sandbox = false when the user asks for explanations, greetings,
|
| 110 |
+
brainstorming, advice, or static code review with no run request.
|
| 111 |
+
"""
|
| 112 |
+
|
| 113 |
+
|
| 114 |
+
async def llm_detect(prompt: str) -> ExecutionIntent:
|
| 115 |
+
messages = [
|
| 116 |
+
{"role": "system", "content": _CLASSIFIER_SYSTEM},
|
| 117 |
+
{"role": "user", "content": prompt[:2000]},
|
| 118 |
+
]
|
| 119 |
+
try:
|
| 120 |
+
result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
|
| 121 |
+
text = result["content"].strip()
|
| 122 |
+
# Tolerate models that wrap JSON in code fences
|
| 123 |
+
text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip()
|
| 124 |
+
obj = json.loads(text)
|
| 125 |
+
return ExecutionIntent(
|
| 126 |
+
needs_sandbox=bool(obj.get("needs_sandbox", False)),
|
| 127 |
+
reason=str(obj.get("reason", "llm classifier"))[:200],
|
| 128 |
+
confidence=0.75,
|
| 129 |
+
)
|
| 130 |
+
except Exception as e:
|
| 131 |
+
logger.warning("LLM intent classifier failed: %s", e)
|
| 132 |
+
# Conservative default → no sandbox
|
| 133 |
+
return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)
|
| 134 |
+
|
| 135 |
+
|
| 136 |
+
async def detect(prompt: str) -> ExecutionIntent:
|
| 137 |
+
"""Top-level: heuristic first, LLM fallback."""
|
| 138 |
+
h = heuristic_detect(prompt)
|
| 139 |
+
if h is not None:
|
| 140 |
+
return h
|
| 141 |
+
return await llm_detect(prompt)
|
llm_router.py
ADDED
|
@@ -0,0 +1,399 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
LLM Router - Multi-provider with key rotation, cooldown, and failover.
|
| 3 |
+
|
| 4 |
+
Providers:
|
| 5 |
+
- gemini (Google Generative Language API)
|
| 6 |
+
- sambanova (SambaNova OpenAI-compatible)
|
| 7 |
+
- github_gpt4o (GitHub Models, OpenAI-compatible)
|
| 8 |
+
|
| 9 |
+
Comma-separated keys per provider via env vars:
|
| 10 |
+
GEMINI_KEY, SAMBANOVA_KEY, GITHUB_KEY
|
| 11 |
+
|
| 12 |
+
Rotation:
|
| 13 |
+
- Round-robin across keys
|
| 14 |
+
- Track failures per key; cool down after N consecutive failures
|
| 15 |
+
- Failover to next provider when all keys exhausted
|
| 16 |
+
"""
|
| 17 |
+
|
| 18 |
+
from __future__ import annotations
|
| 19 |
+
|
| 20 |
+
import asyncio
|
| 21 |
+
import logging
|
| 22 |
+
import os
|
| 23 |
+
import time
|
| 24 |
+
from dataclasses import dataclass, field
|
| 25 |
+
from typing import Any, AsyncIterator, Dict, List, Optional
|
| 26 |
+
|
| 27 |
+
import httpx
|
| 28 |
+
|
| 29 |
+
logger = logging.getLogger(__name__)
|
| 30 |
+
|
| 31 |
+
# ----------------------------------------------------------------------------
|
| 32 |
+
# Constants
|
| 33 |
+
# ----------------------------------------------------------------------------
|
| 34 |
+
|
| 35 |
+
MAX_PROVIDER_RETRY = 3
|
| 36 |
+
MAX_FAILURES_BEFORE_COOLDOWN = 3
|
| 37 |
+
COOLDOWN_SECONDS = 300 # 5 min
|
| 38 |
+
REQUEST_TIMEOUT_SECONDS = 120.0
|
| 39 |
+
STREAM_TIMEOUT_SECONDS = 600.0
|
| 40 |
+
|
| 41 |
+
|
| 42 |
+
# ----------------------------------------------------------------------------
|
| 43 |
+
# Provider definitions
|
| 44 |
+
# ----------------------------------------------------------------------------
|
| 45 |
+
|
| 46 |
+
@dataclass
|
| 47 |
+
class ProviderConfig:
|
| 48 |
+
name: str
|
| 49 |
+
kind: str # 'gemini' | 'openai'
|
| 50 |
+
url: str
|
| 51 |
+
key_env: str
|
| 52 |
+
model: str
|
| 53 |
+
stream_supported: bool = True
|
| 54 |
+
|
| 55 |
+
|
| 56 |
+
PROVIDERS: Dict[str, ProviderConfig] = {
|
| 57 |
+
"gemini": ProviderConfig(
|
| 58 |
+
name="gemini",
|
| 59 |
+
kind="gemini",
|
| 60 |
+
url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent",
|
| 61 |
+
key_env="GEMINI_KEY",
|
| 62 |
+
model="gemini-2.0-flash",
|
| 63 |
+
stream_supported=False, # we use non-streaming generate for simplicity
|
| 64 |
+
),
|
| 65 |
+
"sambanova": ProviderConfig(
|
| 66 |
+
name="sambanova",
|
| 67 |
+
kind="openai",
|
| 68 |
+
url="https://api.sambanova.ai/v1/chat/completions",
|
| 69 |
+
key_env="SAMBANOVA_KEY",
|
| 70 |
+
model="Meta-Llama-3.3-70B-Instruct",
|
| 71 |
+
stream_supported=True,
|
| 72 |
+
),
|
| 73 |
+
"github_gpt4o": ProviderConfig(
|
| 74 |
+
name="github_gpt4o",
|
| 75 |
+
kind="openai",
|
| 76 |
+
url="https://models.inference.ai.azure.com/chat/completions",
|
| 77 |
+
key_env="GITHUB_KEY",
|
| 78 |
+
model="gpt-4o",
|
| 79 |
+
stream_supported=True,
|
| 80 |
+
),
|
| 81 |
+
}
|
| 82 |
+
|
| 83 |
+
|
| 84 |
+
# ----------------------------------------------------------------------------
|
| 85 |
+
# Key pool
|
| 86 |
+
# ----------------------------------------------------------------------------
|
| 87 |
+
|
| 88 |
+
@dataclass
|
| 89 |
+
class KeyState:
|
| 90 |
+
key: str
|
| 91 |
+
fail_count: int = 0
|
| 92 |
+
cooldown_until: float = 0.0 # epoch seconds
|
| 93 |
+
|
| 94 |
+
|
| 95 |
+
class KeyPool:
|
| 96 |
+
"""Round-robin key pool with failure tracking & cooldown."""
|
| 97 |
+
|
| 98 |
+
def __init__(self, keys: List[str]) -> None:
|
| 99 |
+
self._keys: List[KeyState] = [KeyState(k.strip()) for k in keys if k.strip()]
|
| 100 |
+
self._cursor = 0
|
| 101 |
+
|
| 102 |
+
def __bool__(self) -> bool:
|
| 103 |
+
return len(self._keys) > 0
|
| 104 |
+
|
| 105 |
+
def pick(self) -> Optional[KeyState]:
|
| 106 |
+
if not self._keys:
|
| 107 |
+
return None
|
| 108 |
+
now = time.time()
|
| 109 |
+
# Try every key starting from cursor
|
| 110 |
+
for _ in range(len(self._keys)):
|
| 111 |
+
ks = self._keys[self._cursor % len(self._keys)]
|
| 112 |
+
self._cursor += 1
|
| 113 |
+
if ks.cooldown_until <= now:
|
| 114 |
+
return ks
|
| 115 |
+
return None # all cooling down
|
| 116 |
+
|
| 117 |
+
@staticmethod
|
| 118 |
+
def mark_success(ks: KeyState) -> None:
|
| 119 |
+
ks.fail_count = 0
|
| 120 |
+
ks.cooldown_until = 0.0
|
| 121 |
+
|
| 122 |
+
@staticmethod
|
| 123 |
+
def mark_failure(ks: KeyState) -> None:
|
| 124 |
+
ks.fail_count += 1
|
| 125 |
+
if ks.fail_count >= MAX_FAILURES_BEFORE_COOLDOWN:
|
| 126 |
+
ks.cooldown_until = time.time() + COOLDOWN_SECONDS
|
| 127 |
+
logger.warning(
|
| 128 |
+
"Key cooled down for %ds (fail_count=%d)",
|
| 129 |
+
COOLDOWN_SECONDS, ks.fail_count,
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
|
| 133 |
+
# Cache pools so cooldown state persists across requests
|
| 134 |
+
_POOL_CACHE: Dict[str, KeyPool] = {}
|
| 135 |
+
|
| 136 |
+
|
| 137 |
+
def get_pool(provider: ProviderConfig) -> KeyPool:
|
| 138 |
+
if provider.name in _POOL_CACHE:
|
| 139 |
+
return _POOL_CACHE[provider.name]
|
| 140 |
+
raw = os.environ.get(provider.key_env, "")
|
| 141 |
+
keys = [k for k in raw.split(",") if k.strip()]
|
| 142 |
+
pool = KeyPool(keys)
|
| 143 |
+
_POOL_CACHE[provider.name] = pool
|
| 144 |
+
return pool
|
| 145 |
+
|
| 146 |
+
|
| 147 |
+
# ----------------------------------------------------------------------------
|
| 148 |
+
# Task classification → provider order
|
| 149 |
+
# ----------------------------------------------------------------------------
|
| 150 |
+
|
| 151 |
+
def classify_task(prompt: str) -> str:
|
| 152 |
+
p = (prompt or "").lower()
|
| 153 |
+
if any(w in p for w in ("workflow", "automation")):
|
| 154 |
+
return "planning"
|
| 155 |
+
if any(w in p for w in ("code", "python", "javascript", "function", "api", "build", "debug")):
|
| 156 |
+
return "engineering"
|
| 157 |
+
if any(w in p for w in ("why", "analyze", "explain", "reason")):
|
| 158 |
+
return "reasoning"
|
| 159 |
+
if any(w in p for w in ("translate", "summarize", "summary")):
|
| 160 |
+
return "language"
|
| 161 |
+
return "general"
|
| 162 |
+
|
| 163 |
+
|
| 164 |
+
def provider_order(prompt: str) -> List[str]:
|
| 165 |
+
task = classify_task(prompt)
|
| 166 |
+
if task == "engineering" or task == "reasoning":
|
| 167 |
+
return ["sambanova", "github_gpt4o", "gemini"]
|
| 168 |
+
if task == "planning":
|
| 169 |
+
return ["github_gpt4o", "sambanova", "gemini"]
|
| 170 |
+
if task == "language" or len(prompt) < 500:
|
| 171 |
+
return ["gemini", "sambanova", "github_gpt4o"]
|
| 172 |
+
return ["sambanova", "github_gpt4o", "gemini"]
|
| 173 |
+
|
| 174 |
+
|
| 175 |
+
# ----------------------------------------------------------------------------
|
| 176 |
+
# Provider callers
|
| 177 |
+
# ----------------------------------------------------------------------------
|
| 178 |
+
|
| 179 |
+
def _gemini_body(messages: List[Dict[str, str]]) -> Dict[str, Any]:
|
| 180 |
+
"""Convert OpenAI-style messages → Gemini body."""
|
| 181 |
+
contents = []
|
| 182 |
+
system_parts: List[str] = []
|
| 183 |
+
for m in messages:
|
| 184 |
+
role = m.get("role")
|
| 185 |
+
content = m.get("content", "")
|
| 186 |
+
if role == "system":
|
| 187 |
+
system_parts.append(content)
|
| 188 |
+
continue
|
| 189 |
+
gem_role = "user" if role == "user" else "model"
|
| 190 |
+
contents.append({"role": gem_role, "parts": [{"text": content}]})
|
| 191 |
+
body: Dict[str, Any] = {"contents": contents}
|
| 192 |
+
if system_parts:
|
| 193 |
+
body["systemInstruction"] = {"parts": [{"text": "\n".join(system_parts)}]}
|
| 194 |
+
return body
|
| 195 |
+
|
| 196 |
+
|
| 197 |
+
def _extract_text(provider: ProviderConfig, data: Dict[str, Any]) -> str:
|
| 198 |
+
if provider.kind == "gemini":
|
| 199 |
+
try:
|
| 200 |
+
return data["candidates"][0]["content"]["parts"][0]["text"]
|
| 201 |
+
except (KeyError, IndexError, TypeError):
|
| 202 |
+
return ""
|
| 203 |
+
try:
|
| 204 |
+
return data["choices"][0]["message"]["content"] or ""
|
| 205 |
+
except (KeyError, IndexError, TypeError):
|
| 206 |
+
return ""
|
| 207 |
+
|
| 208 |
+
|
| 209 |
+
async def _call_once(
|
| 210 |
+
client: httpx.AsyncClient,
|
| 211 |
+
provider: ProviderConfig,
|
| 212 |
+
key: str,
|
| 213 |
+
messages: List[Dict[str, str]],
|
| 214 |
+
temperature: float = 0.4,
|
| 215 |
+
max_tokens: int = 2048,
|
| 216 |
+
) -> str:
|
| 217 |
+
if provider.kind == "gemini":
|
| 218 |
+
url = f"{provider.url}?key={key}"
|
| 219 |
+
body = _gemini_body(messages)
|
| 220 |
+
body["generationConfig"] = {
|
| 221 |
+
"temperature": temperature,
|
| 222 |
+
"maxOutputTokens": max_tokens,
|
| 223 |
+
}
|
| 224 |
+
r = await client.post(url, json=body, timeout=REQUEST_TIMEOUT_SECONDS)
|
| 225 |
+
else:
|
| 226 |
+
headers = {
|
| 227 |
+
"Authorization": f"Bearer {key}",
|
| 228 |
+
"Content-Type": "application/json",
|
| 229 |
+
}
|
| 230 |
+
body = {
|
| 231 |
+
"model": provider.model,
|
| 232 |
+
"messages": messages,
|
| 233 |
+
"temperature": temperature,
|
| 234 |
+
"max_tokens": max_tokens,
|
| 235 |
+
}
|
| 236 |
+
r = await client.post(
|
| 237 |
+
provider.url, headers=headers, json=body, timeout=REQUEST_TIMEOUT_SECONDS
|
| 238 |
+
)
|
| 239 |
+
if r.status_code >= 400:
|
| 240 |
+
raise RuntimeError(f"{provider.name} HTTP {r.status_code}: {r.text[:200]}")
|
| 241 |
+
return _extract_text(provider, r.json())
|
| 242 |
+
|
| 243 |
+
|
| 244 |
+
async def complete(
|
| 245 |
+
messages: List[Dict[str, str]],
|
| 246 |
+
*,
|
| 247 |
+
temperature: float = 0.4,
|
| 248 |
+
max_tokens: int = 2048,
|
| 249 |
+
) -> Dict[str, Any]:
|
| 250 |
+
"""Non-streaming completion with provider/key failover.
|
| 251 |
+
|
| 252 |
+
Returns: {"content": str, "provider": str, "model": str}
|
| 253 |
+
"""
|
| 254 |
+
prompt_text = "\n".join(m.get("content", "") for m in messages if m.get("role") == "user")
|
| 255 |
+
order = provider_order(prompt_text)
|
| 256 |
+
last_err: Optional[str] = None
|
| 257 |
+
|
| 258 |
+
async with httpx.AsyncClient() as client:
|
| 259 |
+
for provider_name in order:
|
| 260 |
+
provider = PROVIDERS[provider_name]
|
| 261 |
+
pool = get_pool(provider)
|
| 262 |
+
if not pool:
|
| 263 |
+
continue
|
| 264 |
+
for _ in range(MAX_PROVIDER_RETRY):
|
| 265 |
+
ks = pool.pick()
|
| 266 |
+
if ks is None:
|
| 267 |
+
break
|
| 268 |
+
try:
|
| 269 |
+
text = await _call_once(
|
| 270 |
+
client, provider, ks.key, messages,
|
| 271 |
+
temperature=temperature, max_tokens=max_tokens,
|
| 272 |
+
)
|
| 273 |
+
if not text.strip():
|
| 274 |
+
raise RuntimeError("empty completion")
|
| 275 |
+
KeyPool.mark_success(ks)
|
| 276 |
+
return {
|
| 277 |
+
"content": text,
|
| 278 |
+
"provider": provider.name,
|
| 279 |
+
"model": provider.model,
|
| 280 |
+
}
|
| 281 |
+
except Exception as e:
|
| 282 |
+
last_err = f"{provider.name}: {e}"
|
| 283 |
+
logger.warning("LLM call failed → %s", last_err)
|
| 284 |
+
KeyPool.mark_failure(ks)
|
| 285 |
+
raise RuntimeError(f"ALL_PROVIDERS_FAILED ({last_err})")
|
| 286 |
+
|
| 287 |
+
|
| 288 |
+
# ----------------------------------------------------------------------------
|
| 289 |
+
# Streaming (OpenAI-compatible providers only; Gemini falls back to chunked)
|
| 290 |
+
# ----------------------------------------------------------------------------
|
| 291 |
+
|
| 292 |
+
async def _stream_openai(
|
| 293 |
+
client: httpx.AsyncClient,
|
| 294 |
+
provider: ProviderConfig,
|
| 295 |
+
key: str,
|
| 296 |
+
messages: List[Dict[str, str]],
|
| 297 |
+
temperature: float,
|
| 298 |
+
max_tokens: int,
|
| 299 |
+
) -> AsyncIterator[str]:
|
| 300 |
+
headers = {
|
| 301 |
+
"Authorization": f"Bearer {key}",
|
| 302 |
+
"Content-Type": "application/json",
|
| 303 |
+
"Accept": "text/event-stream",
|
| 304 |
+
}
|
| 305 |
+
body = {
|
| 306 |
+
"model": provider.model,
|
| 307 |
+
"messages": messages,
|
| 308 |
+
"temperature": temperature,
|
| 309 |
+
"max_tokens": max_tokens,
|
| 310 |
+
"stream": True,
|
| 311 |
+
}
|
| 312 |
+
async with client.stream(
|
| 313 |
+
"POST", provider.url, headers=headers, json=body,
|
| 314 |
+
timeout=STREAM_TIMEOUT_SECONDS,
|
| 315 |
+
) as r:
|
| 316 |
+
if r.status_code >= 400:
|
| 317 |
+
err_text = (await r.aread()).decode("utf-8", "ignore")[:200]
|
| 318 |
+
raise RuntimeError(f"{provider.name} HTTP {r.status_code}: {err_text}")
|
| 319 |
+
async for line in r.aiter_lines():
|
| 320 |
+
if not line or not line.startswith("data:"):
|
| 321 |
+
continue
|
| 322 |
+
payload = line[5:].strip()
|
| 323 |
+
if payload == "[DONE]":
|
| 324 |
+
break
|
| 325 |
+
try:
|
| 326 |
+
import json
|
| 327 |
+
obj = json.loads(payload)
|
| 328 |
+
delta = obj["choices"][0]["delta"].get("content")
|
| 329 |
+
if delta:
|
| 330 |
+
yield delta
|
| 331 |
+
except Exception:
|
| 332 |
+
continue
|
| 333 |
+
|
| 334 |
+
|
| 335 |
+
async def stream_complete(
|
| 336 |
+
messages: List[Dict[str, str]],
|
| 337 |
+
*,
|
| 338 |
+
temperature: float = 0.4,
|
| 339 |
+
max_tokens: int = 2048,
|
| 340 |
+
) -> AsyncIterator[Dict[str, Any]]:
|
| 341 |
+
"""Yield {'type':'delta','content':str,'provider':str} chunks, then {'type':'done', ...}."""
|
| 342 |
+
prompt_text = "\n".join(m.get("content", "") for m in messages if m.get("role") == "user")
|
| 343 |
+
order = provider_order(prompt_text)
|
| 344 |
+
last_err: Optional[str] = None
|
| 345 |
+
|
| 346 |
+
async with httpx.AsyncClient() as client:
|
| 347 |
+
for provider_name in order:
|
| 348 |
+
provider = PROVIDERS[provider_name]
|
| 349 |
+
pool = get_pool(provider)
|
| 350 |
+
if not pool:
|
| 351 |
+
continue
|
| 352 |
+
|
| 353 |
+
for _ in range(MAX_PROVIDER_RETRY):
|
| 354 |
+
ks = pool.pick()
|
| 355 |
+
if ks is None:
|
| 356 |
+
break
|
| 357 |
+
try:
|
| 358 |
+
if provider.stream_supported:
|
| 359 |
+
got_any = False
|
| 360 |
+
async for delta in _stream_openai(
|
| 361 |
+
client, provider, ks.key, messages, temperature, max_tokens
|
| 362 |
+
):
|
| 363 |
+
got_any = True
|
| 364 |
+
yield {"type": "delta", "content": delta, "provider": provider.name}
|
| 365 |
+
if not got_any:
|
| 366 |
+
raise RuntimeError("empty stream")
|
| 367 |
+
else:
|
| 368 |
+
# Gemini fallback: non-streaming, emit as a single delta
|
| 369 |
+
text = await _call_once(
|
| 370 |
+
client, provider, ks.key, messages,
|
| 371 |
+
temperature=temperature, max_tokens=max_tokens,
|
| 372 |
+
)
|
| 373 |
+
if not text.strip():
|
| 374 |
+
raise RuntimeError("empty completion")
|
| 375 |
+
yield {"type": "delta", "content": text, "provider": provider.name}
|
| 376 |
+
|
| 377 |
+
KeyPool.mark_success(ks)
|
| 378 |
+
yield {"type": "done", "provider": provider.name, "model": provider.model}
|
| 379 |
+
return
|
| 380 |
+
except Exception as e:
|
| 381 |
+
last_err = f"{provider.name}: {e}"
|
| 382 |
+
logger.warning("LLM stream failed → %s", last_err)
|
| 383 |
+
KeyPool.mark_failure(ks)
|
| 384 |
+
|
| 385 |
+
yield {"type": "error", "error": f"ALL_PROVIDERS_FAILED ({last_err})"}
|
| 386 |
+
|
| 387 |
+
|
| 388 |
+
def pool_status() -> Dict[str, Any]:
|
| 389 |
+
"""Diagnostic info about each provider's key pool."""
|
| 390 |
+
out: Dict[str, Any] = {}
|
| 391 |
+
now = time.time()
|
| 392 |
+
for name, provider in PROVIDERS.items():
|
| 393 |
+
pool = get_pool(provider)
|
| 394 |
+
out[name] = {
|
| 395 |
+
"keys_configured": len(pool._keys),
|
| 396 |
+
"keys_available": sum(1 for k in pool._keys if k.cooldown_until <= now),
|
| 397 |
+
"model": provider.model,
|
| 398 |
+
}
|
| 399 |
+
return out
|
requirements.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
fastapi==0.115.5
|
| 2 |
+
uvicorn[standard]==0.32.1
|
| 3 |
+
pydantic==2.10.3
|
| 4 |
+
httpx==0.27.2
|
| 5 |
+
e2b-code-interpreter==1.0.4
|
| 6 |
+
python-multipart==0.0.20
|
tests/__init__.py
ADDED
|
File without changes
|
tests/test_smoke.py
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Smoke tests that DO NOT hit the network.
|
| 3 |
+
|
| 4 |
+
Run with: pytest apps/backend/tests
|
| 5 |
+
"""
|
| 6 |
+
from __future__ import annotations
|
| 7 |
+
|
| 8 |
+
import importlib
|
| 9 |
+
import os
|
| 10 |
+
import sys
|
| 11 |
+
|
| 12 |
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def test_imports():
|
| 16 |
+
"""Every backend module imports cleanly."""
|
| 17 |
+
for mod in ("backend.app", "backend.agent", "backend.intent",
|
| 18 |
+
"backend.llm_router", "backend.executor"):
|
| 19 |
+
importlib.import_module(mod)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def test_intent_heuristic_chat():
|
| 23 |
+
from backend.intent import heuristic_detect
|
| 24 |
+
r = heuristic_detect("hello there")
|
| 25 |
+
assert r is not None and r.needs_sandbox is False
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def test_intent_heuristic_exec():
|
| 29 |
+
from backend.intent import heuristic_detect
|
| 30 |
+
r = heuristic_detect("Create proof.txt with the current unix timestamp and write it")
|
| 31 |
+
assert r is not None and r.needs_sandbox is True
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def test_intent_code_fence():
|
| 35 |
+
from backend.intent import heuristic_detect
|
| 36 |
+
r = heuristic_detect("```python\nprint('hi')\n```")
|
| 37 |
+
assert r is not None and r.needs_sandbox is True
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def test_extract_code_blocks():
|
| 41 |
+
from backend.agent import extract_code_blocks, pick_runnable
|
| 42 |
+
blocks = extract_code_blocks("Here is code:\n```python\nprint(1)\n```\nand shell:\n```bash\nls\n```")
|
| 43 |
+
assert len(blocks) == 2
|
| 44 |
+
chosen = pick_runnable(blocks)
|
| 45 |
+
assert chosen is not None
|
| 46 |
+
assert chosen.language in ("python", "py")
|
| 47 |
+
|
| 48 |
+
|
| 49 |
+
def test_provider_order_engineering():
|
| 50 |
+
from backend.llm_router import provider_order
|
| 51 |
+
order = provider_order("Write a python function to sort a list")
|
| 52 |
+
assert order[0] == "sambanova"
|
| 53 |
+
|
| 54 |
+
|
| 55 |
+
def test_provider_order_short_chat():
|
| 56 |
+
from backend.llm_router import provider_order
|
| 57 |
+
order = provider_order("hi")
|
| 58 |
+
assert order[0] == "gemini"
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def test_pool_empty_when_no_env():
|
| 62 |
+
from backend.llm_router import KeyPool
|
| 63 |
+
p = KeyPool([])
|
| 64 |
+
assert not p
|
| 65 |
+
assert p.pick() is None
|
| 66 |
+
|
| 67 |
+
|
| 68 |
+
def test_pool_rotation_and_cooldown():
|
| 69 |
+
from backend.llm_router import KeyPool, MAX_FAILURES_BEFORE_COOLDOWN
|
| 70 |
+
p = KeyPool(["a", "b", "c"])
|
| 71 |
+
seen = set()
|
| 72 |
+
for _ in range(3):
|
| 73 |
+
ks = p.pick()
|
| 74 |
+
assert ks is not None
|
| 75 |
+
seen.add(ks.key)
|
| 76 |
+
assert seen == {"a", "b", "c"}
|
| 77 |
+
# Mark one failed enough times to cool down
|
| 78 |
+
ks = p.pick()
|
| 79 |
+
for _ in range(MAX_FAILURES_BEFORE_COOLDOWN):
|
| 80 |
+
KeyPool.mark_failure(ks)
|
| 81 |
+
assert ks.cooldown_until > 0
|