PYAE1994 commited on
Commit
46258b3
·
verified ·
1 Parent(s): fe7add9

Phase 1 backend deploy

Browse files
Files changed (12) hide show
  1. .env.example +17 -0
  2. Dockerfile +37 -0
  3. README.md +36 -4
  4. __init__.py +2 -0
  5. agent.py +214 -0
  6. app.py +242 -0
  7. executor.py +230 -0
  8. intent.py +141 -0
  9. llm_router.py +399 -0
  10. requirements.txt +6 -0
  11. tests/__init__.py +0 -0
  12. tests/test_smoke.py +81 -0
.env.example ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copy to .env for local dev. DO NOT COMMIT real keys.
2
+
3
+ # LLM providers (comma-separated keys, at least one provider required)
4
+ GEMINI_KEY=
5
+ SAMBANOVA_KEY=
6
+ GITHUB_KEY=
7
+
8
+ # E2B (required for execution endpoints)
9
+ E2B_API_KEY=
10
+ # E2B_TEMPLATE=
11
+
12
+ # CORS
13
+ ALLOWED_ORIGINS=*
14
+
15
+ # Logging
16
+ LOG_LEVEL=INFO
17
+ PORT=7860
Dockerfile ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # HF Space Dockerfile for OpenHands Simplified Backend (Phase 1)
2
+ # Runs FastAPI on port 7860 (HF Space default).
3
+
4
+ FROM python:3.11-slim
5
+
6
+ ENV PYTHONDONTWRITEBYTECODE=1 \
7
+ PYTHONUNBUFFERED=1 \
8
+ PIP_NO_CACHE_DIR=1 \
9
+ PIP_DISABLE_PIP_VERSION_CHECK=1 \
10
+ PORT=7860 \
11
+ HOME=/home/user
12
+
13
+ # Non-root user expected by HF Spaces
14
+ RUN useradd -m -u 1000 user
15
+ WORKDIR /home/user/app
16
+
17
+ # System deps (minimal)
18
+ RUN apt-get update && apt-get install -y --no-install-recommends \
19
+ curl ca-certificates \
20
+ && rm -rf /var/lib/apt/lists/*
21
+
22
+ # Python deps
23
+ COPY --chown=user:user requirements.txt /home/user/app/requirements.txt
24
+ RUN pip install --no-cache-dir -r /home/user/app/requirements.txt
25
+
26
+ # App code
27
+ COPY --chown=user:user . /home/user/app/backend
28
+ # Make package importable as `backend`
29
+ ENV PYTHONPATH=/home/user/app
30
+
31
+ USER user
32
+ EXPOSE 7860
33
+
34
+ HEALTHCHECK --interval=30s --timeout=5s --start-period=20s --retries=3 \
35
+ CMD curl -fsS http://localhost:7860/health || exit 1
36
+
37
+ CMD ["uvicorn", "backend.app:app", "--host", "0.0.0.0", "--port", "7860", "--workers", "1", "--log-level", "info"]
README.md CHANGED
@@ -1,10 +1,42 @@
1
  ---
2
- title: Openhands Backend
3
- emoji: 📚
4
  colorFrom: indigo
5
- colorTo: pink
6
  sdk: docker
 
7
  pinned: false
8
  ---
9
 
10
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  ---
2
+ title: OpenHands Simplified Backend
3
+ emoji: 🤖
4
  colorFrom: indigo
5
+ colorTo: purple
6
  sdk: docker
7
+ app_port: 7860
8
  pinned: false
9
  ---
10
 
11
+ # OpenHands Simplified Backend (Phase 1)
12
+
13
+ FastAPI service that exposes:
14
+
15
+ - `GET /health` — liveness + provider/key status
16
+ - `POST /api/chat` — non-streaming chat
17
+ - `POST /api/chat/stream` — SSE; auto-routes chat vs E2B execution
18
+ - `POST /api/execute` — SSE; always runs in E2B sandbox
19
+ - `POST /api/intent` — intent classifier only
20
+
21
+ ## Environment variables (set in HF Space → Settings → Secrets)
22
+
23
+ | Name | Required | Notes |
24
+ |-------------------|----------|----------------------------------------|
25
+ | `GEMINI_KEY` | optional | comma-separated Gemini API keys |
26
+ | `SAMBANOVA_KEY` | optional | comma-separated SambaNova API keys |
27
+ | `GITHUB_KEY` | optional | comma-separated GitHub Models tokens |
28
+ | `E2B_API_KEY` | **yes** | required for execution endpoints |
29
+ | `E2B_TEMPLATE` | optional | custom sandbox template id |
30
+ | `ALLOWED_ORIGINS` | optional | CSV; defaults to `*` |
31
+ | `LOG_LEVEL` | optional | default `INFO` |
32
+
33
+ At least ONE LLM provider key must be set.
34
+
35
+ ## Test (after deploy)
36
+
37
+ ```bash
38
+ curl https://<your-space>.hf.space/health
39
+ curl -N -X POST https://<your-space>.hf.space/api/execute \
40
+ -H 'Content-Type: application/json' \
41
+ -d '{"message":"Create proof.txt with the current UNIX timestamp and print its contents."}'
42
+ ```
__init__.py ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ """Simplified OpenHands backend (Phase 1)."""
2
+ __version__ = "0.1.0"
agent.py ADDED
@@ -0,0 +1,214 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Agent loop: orchestrates LLM <-> E2B sandbox for execution tasks.
3
+
4
+ Phase 1 design — kept deliberately simple and robust:
5
+
6
+ 1. Ask LLM to produce a SINGLE python code block (and optional shell block)
7
+ to satisfy the user's request, given recent context.
8
+ 2. Extract the code block(s).
9
+ 3. Run them in a fresh E2B sandbox, streaming stdout/stderr to the caller.
10
+ 4. Show the LLM the real output and ask for a final natural-language reply.
11
+ 5. Stream that reply.
12
+ 6. Close the sandbox.
13
+
14
+ Anything more elaborate (multi-step planner, tool-calling, retry-on-error) is
15
+ intentionally OUT of Phase 1.
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import json
21
+ import logging
22
+ import re
23
+ from dataclasses import dataclass
24
+ from typing import AsyncIterator, Dict, List, Optional
25
+
26
+ from . import llm_router
27
+ from .executor import E2BExecutor, ExecEvent
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+
32
+ # ----------------------------------------------------------------------------
33
+ # Prompts
34
+ # ----------------------------------------------------------------------------
35
+
36
+ CODER_SYSTEM = """You are a code executor agent running inside a real Linux
37
+ sandbox (E2B). The user will ask you to do something that requires running
38
+ real code. Reply with ONE single fenced code block — Python preferred — that,
39
+ when executed, accomplishes the task.
40
+
41
+ Strict rules:
42
+ - Output ONLY the code block. No prose before or after.
43
+ - Prefer Python. Use ```python fences.
44
+ - If the task is shell-only (mkdir, ls, install a package), you may use one
45
+ ```bash block instead.
46
+ - Print clear progress messages so the user can see what happened.
47
+ - Always print a final confirmation line.
48
+ - Keep total output under ~200 lines.
49
+ """
50
+
51
+ REPLY_SYSTEM = """You are a helpful assistant. The user asked for a task that
52
+ required running real code. Below is the user's request, the code that ran,
53
+ and the REAL execution output. Write a short, friendly natural-language reply
54
+ summarising what was done and quoting any important values from the real
55
+ output. Do NOT fabricate. Do NOT re-run anything. Keep it concise (3-6
56
+ sentences)."""
57
+
58
+ CHAT_SYSTEM = """You are a concise, helpful assistant. Reply in the same
59
+ language as the user when natural. Keep answers focused."""
60
+
61
+
62
+ # ----------------------------------------------------------------------------
63
+ # Code extraction
64
+ # ----------------------------------------------------------------------------
65
+
66
+ _FENCE_RE = re.compile(
67
+ r"```([a-zA-Z0-9_+\-]*)\s*\n(.*?)```", re.DOTALL
68
+ )
69
+
70
+
71
+ @dataclass
72
+ class CodeBlock:
73
+ language: str
74
+ code: str
75
+
76
+
77
+ def extract_code_blocks(text: str) -> List[CodeBlock]:
78
+ blocks: List[CodeBlock] = []
79
+ for m in _FENCE_RE.finditer(text or ""):
80
+ lang = (m.group(1) or "").lower().strip()
81
+ code = m.group(2).rstrip()
82
+ blocks.append(CodeBlock(language=lang or "python", code=code))
83
+ return blocks
84
+
85
+
86
+ def pick_runnable(blocks: List[CodeBlock]) -> Optional[CodeBlock]:
87
+ # Prefer python; else bash/sh; else first
88
+ for b in blocks:
89
+ if b.language in ("python", "py"):
90
+ return b
91
+ for b in blocks:
92
+ if b.language in ("bash", "sh", "shell"):
93
+ return b
94
+ return blocks[0] if blocks else None
95
+
96
+
97
+ # ----------------------------------------------------------------------------
98
+ # Streaming agent
99
+ # ----------------------------------------------------------------------------
100
+
101
+ async def stream_chat_only(
102
+ messages: List[Dict[str, str]],
103
+ ) -> AsyncIterator[Dict]:
104
+ """Plain chat: no sandbox."""
105
+ full_messages = [{"role": "system", "content": CHAT_SYSTEM}, *messages]
106
+ yield {"type": "phase", "phase": "chat"}
107
+ async for chunk in llm_router.stream_complete(full_messages, temperature=0.4, max_tokens=1024):
108
+ if chunk["type"] == "delta":
109
+ yield {"type": "assistant_delta", "content": chunk["content"]}
110
+ elif chunk["type"] == "done":
111
+ yield {"type": "assistant_done", "provider": chunk.get("provider"), "model": chunk.get("model")}
112
+ elif chunk["type"] == "error":
113
+ yield {"type": "error", "error": chunk["error"]}
114
+
115
+
116
+ async def stream_execute(
117
+ messages: List[Dict[str, str]],
118
+ *,
119
+ sandbox_timeout: int = 300,
120
+ ) -> AsyncIterator[Dict]:
121
+ """Execution task: spin up E2B, run code, reply with real results."""
122
+
123
+ # --- step 1: ask LLM for code -------------------------------------------
124
+ yield {"type": "phase", "phase": "planning"}
125
+ code_messages = [{"role": "system", "content": CODER_SYSTEM}, *messages]
126
+ try:
127
+ coder_resp = await llm_router.complete(code_messages, temperature=0.2, max_tokens=1500)
128
+ except Exception as e:
129
+ yield {"type": "error", "error": f"LLM failed: {e}"}
130
+ return
131
+
132
+ raw = coder_resp["content"]
133
+ yield {"type": "plan", "content": raw, "provider": coder_resp.get("provider")}
134
+
135
+ blocks = extract_code_blocks(raw)
136
+ chosen = pick_runnable(blocks)
137
+ if chosen is None:
138
+ # No code block → degrade to chat reply
139
+ yield {"type": "assistant_delta", "content": raw}
140
+ yield {"type": "assistant_done"}
141
+ return
142
+
143
+ yield {"type": "code", "language": chosen.language, "code": chosen.code}
144
+
145
+ # --- step 2: launch sandbox & run ---------------------------------------
146
+ yield {"type": "phase", "phase": "sandbox_starting"}
147
+ executor: Optional[E2BExecutor] = None
148
+ stdout_buf: List[str] = []
149
+ stderr_buf: List[str] = []
150
+ error_text: Optional[str] = None
151
+ result_text: str = ""
152
+ exit_code: Optional[int] = None
153
+
154
+ try:
155
+ executor = E2BExecutor(timeout=sandbox_timeout)
156
+ await executor.start()
157
+ yield {"type": "sandbox_started", "sandbox_id": executor.sandbox_id}
158
+
159
+ runner = (
160
+ executor.run_python(chosen.code)
161
+ if chosen.language in ("python", "py")
162
+ else executor.run_shell(chosen.code)
163
+ )
164
+
165
+ yield {"type": "phase", "phase": "executing"}
166
+ async for ev in runner:
167
+ if ev.type == "stdout":
168
+ stdout_buf.append(ev.data)
169
+ yield {"type": "stdout", "content": ev.data}
170
+ elif ev.type == "stderr":
171
+ stderr_buf.append(ev.data)
172
+ yield {"type": "stderr", "content": ev.data}
173
+ elif ev.type == "error":
174
+ error_text = ev.data
175
+ yield {"type": "exec_error", "content": ev.data, "meta": ev.meta}
176
+ elif ev.type == "result":
177
+ result_text = ev.data
178
+ exit_code = ev.meta.get("exit_code") if ev.meta else None
179
+ yield {"type": "exec_result", "content": ev.data, "meta": ev.meta}
180
+
181
+ except Exception as e:
182
+ logger.exception("sandbox error")
183
+ yield {"type": "error", "error": f"Sandbox error: {e}"}
184
+ if executor:
185
+ await executor.close()
186
+ return
187
+ finally:
188
+ if executor:
189
+ await executor.close()
190
+ yield {"type": "sandbox_closed"}
191
+
192
+ # --- step 3: ask LLM for final reply with real outputs ------------------
193
+ yield {"type": "phase", "phase": "summarising"}
194
+ user_request = next((m["content"] for m in reversed(messages) if m.get("role") == "user"), "")
195
+ summary_user = (
196
+ f"USER REQUEST:\n{user_request}\n\n"
197
+ f"CODE EXECUTED ({chosen.language}):\n```\n{chosen.code}\n```\n\n"
198
+ f"STDOUT:\n{''.join(stdout_buf) or '(empty)'}\n\n"
199
+ f"STDERR:\n{''.join(stderr_buf) or '(empty)'}\n\n"
200
+ f"RESULT:\n{result_text or '(none)'}\n\n"
201
+ f"ERROR:\n{error_text or '(none)'}\n\n"
202
+ f"EXIT_CODE: {exit_code}"
203
+ )
204
+ reply_messages = [
205
+ {"role": "system", "content": REPLY_SYSTEM},
206
+ {"role": "user", "content": summary_user},
207
+ ]
208
+ async for chunk in llm_router.stream_complete(reply_messages, temperature=0.4, max_tokens=600):
209
+ if chunk["type"] == "delta":
210
+ yield {"type": "assistant_delta", "content": chunk["content"]}
211
+ elif chunk["type"] == "done":
212
+ yield {"type": "assistant_done", "provider": chunk.get("provider"), "model": chunk.get("model")}
213
+ elif chunk["type"] == "error":
214
+ yield {"type": "error", "error": chunk["error"]}
app.py ADDED
@@ -0,0 +1,242 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ FastAPI backend — simplified OpenHands runtime gateway.
3
+
4
+ Endpoints:
5
+ GET / → service info
6
+ GET /health → liveness + provider/key status
7
+ POST /api/chat → JSON, non-streaming convenience endpoint
8
+ POST /api/chat/stream → SSE streaming (chat OR execute, auto-routed)
9
+ POST /api/execute → SSE streaming, always uses sandbox
10
+ POST /api/intent → JSON, returns intent decision only
11
+
12
+ CORS is fully open by default (intended for Vercel frontend).
13
+ Configure with env var ALLOWED_ORIGINS (comma-separated) to lock down.
14
+ """
15
+
16
+ from __future__ import annotations
17
+
18
+ import asyncio
19
+ import json
20
+ import logging
21
+ import os
22
+ import time
23
+ from typing import Any, Dict, List, Optional
24
+
25
+ from fastapi import FastAPI, HTTPException, Request
26
+ from fastapi.middleware.cors import CORSMiddleware
27
+ from fastapi.responses import JSONResponse, StreamingResponse
28
+ from pydantic import BaseModel, Field
29
+
30
+ from . import agent, intent, llm_router
31
+
32
+ # ----------------------------------------------------------------------------
33
+ # Logging
34
+ # ----------------------------------------------------------------------------
35
+
36
+ logging.basicConfig(
37
+ level=os.environ.get("LOG_LEVEL", "INFO"),
38
+ format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
39
+ )
40
+ logger = logging.getLogger("openhands.backend")
41
+
42
+
43
+ # ----------------------------------------------------------------------------
44
+ # Models
45
+ # ----------------------------------------------------------------------------
46
+
47
+ class ChatMessage(BaseModel):
48
+ role: str = Field(..., pattern="^(system|user|assistant)$")
49
+ content: str
50
+
51
+
52
+ class ChatRequest(BaseModel):
53
+ messages: List[ChatMessage] = Field(default_factory=list)
54
+ message: Optional[str] = None # convenience: single user message
55
+ force_sandbox: Optional[bool] = None # override intent detection
56
+ sandbox_timeout: int = 300
57
+
58
+ def to_messages(self) -> List[Dict[str, str]]:
59
+ msgs = [m.dict() for m in self.messages]
60
+ if self.message:
61
+ msgs.append({"role": "user", "content": self.message})
62
+ if not msgs:
63
+ raise ValueError("at least one message is required")
64
+ return msgs
65
+
66
+
67
+ class IntentRequest(BaseModel):
68
+ message: str
69
+
70
+
71
+ # ----------------------------------------------------------------------------
72
+ # App
73
+ # ----------------------------------------------------------------------------
74
+
75
+ app = FastAPI(
76
+ title="OpenHands Simplified Backend",
77
+ version="0.1.0",
78
+ description="Phase-1 backend: LLM router + E2B real execution + SSE streaming.",
79
+ )
80
+
81
+ _allowed = os.environ.get("ALLOWED_ORIGINS", "*").strip()
82
+ allowed_origins = ["*"] if _allowed == "*" else [o.strip() for o in _allowed.split(",") if o.strip()]
83
+
84
+ app.add_middleware(
85
+ CORSMiddleware,
86
+ allow_origins=allowed_origins,
87
+ allow_credentials=False,
88
+ allow_methods=["*"],
89
+ allow_headers=["*"],
90
+ )
91
+
92
+
93
+ # ----------------------------------------------------------------------------
94
+ # Helpers
95
+ # ----------------------------------------------------------------------------
96
+
97
+ def sse_format(event: str, data: Any) -> bytes:
98
+ payload = data if isinstance(data, str) else json.dumps(data, ensure_ascii=False)
99
+ return f"event: {event}\ndata: {payload}\n\n".encode("utf-8")
100
+
101
+
102
+ SSE_HEADERS = {
103
+ "Content-Type": "text/event-stream",
104
+ "Cache-Control": "no-cache, no-transform",
105
+ "Connection": "keep-alive",
106
+ "X-Accel-Buffering": "no", # disable buffering on proxies that respect it
107
+ }
108
+
109
+
110
+ # ----------------------------------------------------------------------------
111
+ # Routes
112
+ # ----------------------------------------------------------------------------
113
+
114
+ @app.get("/")
115
+ async def root() -> Dict[str, Any]:
116
+ return {
117
+ "service": "openhands-simplified-backend",
118
+ "status": "ok",
119
+ "endpoints": [
120
+ "/health",
121
+ "/api/chat",
122
+ "/api/chat/stream",
123
+ "/api/execute",
124
+ "/api/intent",
125
+ ],
126
+ }
127
+
128
+
129
+ @app.get("/health")
130
+ async def health() -> Dict[str, Any]:
131
+ return {
132
+ "status": "ok",
133
+ "time": int(time.time()),
134
+ "providers": llm_router.pool_status(),
135
+ "e2b_configured": bool(os.environ.get("E2B_API_KEY")),
136
+ }
137
+
138
+
139
+ @app.post("/api/intent")
140
+ async def api_intent(req: IntentRequest) -> Dict[str, Any]:
141
+ decision = await intent.detect(req.message)
142
+ return {
143
+ "needs_sandbox": decision.needs_sandbox,
144
+ "reason": decision.reason,
145
+ "confidence": decision.confidence,
146
+ }
147
+
148
+
149
+ @app.post("/api/chat")
150
+ async def api_chat(req: ChatRequest) -> Dict[str, Any]:
151
+ """Non-streaming chat (no sandbox). Convenience for simple clients."""
152
+ try:
153
+ messages = req.to_messages()
154
+ except ValueError as e:
155
+ raise HTTPException(400, str(e))
156
+
157
+ # Add a light system message if not present
158
+ if not any(m["role"] == "system" for m in messages):
159
+ messages = [{"role": "system", "content": agent.CHAT_SYSTEM}, *messages]
160
+ result = await llm_router.complete(messages, temperature=0.4, max_tokens=1024)
161
+ return {
162
+ "content": result["content"],
163
+ "provider": result.get("provider"),
164
+ "model": result.get("model"),
165
+ }
166
+
167
+
168
+ @app.post("/api/chat/stream")
169
+ async def api_chat_stream(req: ChatRequest):
170
+ """SSE streaming endpoint.
171
+
172
+ Auto-routes between chat-only and sandbox execution based on intent
173
+ detection (override with `force_sandbox`).
174
+ """
175
+ try:
176
+ messages = req.to_messages()
177
+ except ValueError as e:
178
+ raise HTTPException(400, str(e))
179
+
180
+ last_user = next((m["content"] for m in reversed(messages) if m["role"] == "user"), "")
181
+
182
+ async def event_gen():
183
+ # Decide routing
184
+ if req.force_sandbox is True:
185
+ decision = intent.ExecutionIntent(True, "forced by client", 1.0)
186
+ elif req.force_sandbox is False:
187
+ decision = intent.ExecutionIntent(False, "forced by client", 1.0)
188
+ else:
189
+ decision = await intent.detect(last_user)
190
+
191
+ yield sse_format("intent", {
192
+ "needs_sandbox": decision.needs_sandbox,
193
+ "reason": decision.reason,
194
+ "confidence": decision.confidence,
195
+ })
196
+
197
+ try:
198
+ if decision.needs_sandbox:
199
+ async for ev in agent.stream_execute(messages, sandbox_timeout=req.sandbox_timeout):
200
+ yield sse_format(ev["type"], ev)
201
+ else:
202
+ async for ev in agent.stream_chat_only(messages):
203
+ yield sse_format(ev["type"], ev)
204
+ except Exception as e:
205
+ logger.exception("stream error")
206
+ yield sse_format("error", {"error": str(e)})
207
+ finally:
208
+ yield sse_format("end", {"done": True})
209
+
210
+ return StreamingResponse(event_gen(), headers=SSE_HEADERS)
211
+
212
+
213
+ @app.post("/api/execute")
214
+ async def api_execute(req: ChatRequest):
215
+ """SSE streaming endpoint that ALWAYS uses the sandbox."""
216
+ try:
217
+ messages = req.to_messages()
218
+ except ValueError as e:
219
+ raise HTTPException(400, str(e))
220
+
221
+ async def event_gen():
222
+ yield sse_format("intent", {"needs_sandbox": True, "reason": "explicit /execute"})
223
+ try:
224
+ async for ev in agent.stream_execute(messages, sandbox_timeout=req.sandbox_timeout):
225
+ yield sse_format(ev["type"], ev)
226
+ except Exception as e:
227
+ logger.exception("execute error")
228
+ yield sse_format("error", {"error": str(e)})
229
+ finally:
230
+ yield sse_format("end", {"done": True})
231
+
232
+ return StreamingResponse(event_gen(), headers=SSE_HEADERS)
233
+
234
+
235
+ # ----------------------------------------------------------------------------
236
+ # Local entrypoint
237
+ # ----------------------------------------------------------------------------
238
+
239
+ if __name__ == "__main__":
240
+ import uvicorn
241
+ port = int(os.environ.get("PORT", "7860"))
242
+ uvicorn.run(app, host="0.0.0.0", port=port)
executor.py ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ E2B Sandbox Executor.
3
+
4
+ Owns the real execution runtime. Wraps the e2b_code_interpreter SDK so the
5
+ rest of the backend never imports e2b directly.
6
+
7
+ Capabilities (Phase 1):
8
+ - run_python(code): execute Python in a sandbox, stream stdout/stderr
9
+ - run_shell(cmd): execute shell command, stream stdout/stderr
10
+ - write_file(path, contents)
11
+ - read_file(path)
12
+ - close()
13
+
14
+ A sandbox is created per task and closed at the end (Phase 1: no reuse).
15
+ """
16
+
17
+ from __future__ import annotations
18
+
19
+ import asyncio
20
+ import logging
21
+ import os
22
+ from contextlib import asynccontextmanager
23
+ from dataclasses import dataclass, field
24
+ from typing import AsyncIterator, Dict, List, Optional
25
+
26
+ logger = logging.getLogger(__name__)
27
+
28
+
29
+ # ----------------------------------------------------------------------------
30
+ # Event types streamed back to the client
31
+ # ----------------------------------------------------------------------------
32
+
33
+ @dataclass
34
+ class ExecEvent:
35
+ type: str # 'sandbox_started' | 'stdout' | 'stderr' | 'result' | 'error' | 'sandbox_closed'
36
+ data: str = ""
37
+ meta: Dict = field(default_factory=dict)
38
+
39
+
40
+ # ----------------------------------------------------------------------------
41
+ # E2B SDK import (lazy so missing dep doesn't crash module import)
42
+ # ----------------------------------------------------------------------------
43
+
44
+ def _get_sandbox_class():
45
+ try:
46
+ from e2b_code_interpreter import Sandbox # type: ignore
47
+ return Sandbox
48
+ except ImportError as e:
49
+ raise RuntimeError(
50
+ "e2b_code_interpreter not installed. Add `e2b-code-interpreter` to requirements."
51
+ ) from e
52
+
53
+
54
+ # ----------------------------------------------------------------------------
55
+ # Executor
56
+ # ----------------------------------------------------------------------------
57
+
58
+ class E2BExecutor:
59
+ """One sandbox = one E2BExecutor instance.
60
+
61
+ The SDK is synchronous; we offload calls to a thread to keep the event
62
+ loop free.
63
+ """
64
+
65
+ def __init__(self, api_key: Optional[str] = None, template: Optional[str] = None,
66
+ timeout: int = 300) -> None:
67
+ self.api_key = api_key or os.environ.get("E2B_API_KEY", "")
68
+ if not self.api_key:
69
+ raise RuntimeError("E2B_API_KEY is not configured")
70
+ self.template = template or os.environ.get("E2B_TEMPLATE") # None → default
71
+ self.timeout = timeout
72
+ self._sandbox = None # type: ignore
73
+ self._lock = asyncio.Lock()
74
+
75
+ # ---- lifecycle ----------------------------------------------------------
76
+
77
+ async def start(self) -> None:
78
+ if self._sandbox is not None:
79
+ return
80
+ Sandbox = _get_sandbox_class()
81
+ def _create():
82
+ kwargs = {"api_key": self.api_key, "timeout": self.timeout}
83
+ if self.template:
84
+ return Sandbox(self.template, **kwargs)
85
+ return Sandbox(**kwargs)
86
+ self._sandbox = await asyncio.to_thread(_create)
87
+ logger.info("E2B sandbox started: id=%s", getattr(self._sandbox, "sandbox_id", "?"))
88
+
89
+ async def close(self) -> None:
90
+ if self._sandbox is None:
91
+ return
92
+ sb = self._sandbox
93
+ self._sandbox = None
94
+ try:
95
+ await asyncio.to_thread(sb.kill)
96
+ except Exception as e:
97
+ logger.warning("E2B close error (non-fatal): %s", e)
98
+
99
+ @property
100
+ def sandbox_id(self) -> Optional[str]:
101
+ return getattr(self._sandbox, "sandbox_id", None) if self._sandbox else None
102
+
103
+ # ---- execution ----------------------------------------------------------
104
+
105
+ async def run_python(self, code: str) -> AsyncIterator[ExecEvent]:
106
+ """Run Python code; yield streaming events."""
107
+ if self._sandbox is None:
108
+ await self.start()
109
+ sb = self._sandbox
110
+
111
+ # Queue bridging the SDK callback thread → asyncio loop
112
+ loop = asyncio.get_running_loop()
113
+ queue: asyncio.Queue[ExecEvent] = asyncio.Queue()
114
+
115
+ def on_stdout(msg) -> None:
116
+ text = getattr(msg, "line", None) or getattr(msg, "text", None) or str(msg)
117
+ loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stdout", text))
118
+
119
+ def on_stderr(msg) -> None:
120
+ text = getattr(msg, "line", None) or getattr(msg, "text", None) or str(msg)
121
+ loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stderr", text))
122
+
123
+ async def runner():
124
+ try:
125
+ def _exec():
126
+ return sb.run_code(code, on_stdout=on_stdout, on_stderr=on_stderr)
127
+ execution = await asyncio.to_thread(_exec)
128
+ # Final result
129
+ result_text = ""
130
+ if execution is not None:
131
+ err = getattr(execution, "error", None)
132
+ if err is not None:
133
+ loop.call_soon_threadsafe(
134
+ queue.put_nowait,
135
+ ExecEvent("error", f"{getattr(err, 'name', 'Error')}: {getattr(err, 'value', err)}",
136
+ {"traceback": getattr(err, "traceback", "")}),
137
+ )
138
+ results = getattr(execution, "results", []) or []
139
+ if results:
140
+ for r in results:
141
+ t = getattr(r, "text", None)
142
+ if t:
143
+ result_text += t + "\n"
144
+ loop.call_soon_threadsafe(
145
+ queue.put_nowait,
146
+ ExecEvent("result", result_text.strip()),
147
+ )
148
+ except Exception as e:
149
+ loop.call_soon_threadsafe(
150
+ queue.put_nowait, ExecEvent("error", str(e)),
151
+ )
152
+ finally:
153
+ loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("__done__"))
154
+
155
+ task = asyncio.create_task(runner())
156
+ try:
157
+ while True:
158
+ ev = await queue.get()
159
+ if ev.type == "__done__":
160
+ break
161
+ yield ev
162
+ finally:
163
+ if not task.done():
164
+ task.cancel()
165
+
166
+ async def run_shell(self, cmd: str) -> AsyncIterator[ExecEvent]:
167
+ """Run shell command via sandbox.commands.run()."""
168
+ if self._sandbox is None:
169
+ await self.start()
170
+ sb = self._sandbox
171
+ loop = asyncio.get_running_loop()
172
+ queue: asyncio.Queue[ExecEvent] = asyncio.Queue()
173
+
174
+ def on_stdout(data) -> None:
175
+ loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stdout", str(data)))
176
+
177
+ def on_stderr(data) -> None:
178
+ loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("stderr", str(data)))
179
+
180
+ async def runner():
181
+ try:
182
+ def _exec():
183
+ return sb.commands.run(cmd, on_stdout=on_stdout, on_stderr=on_stderr)
184
+ result = await asyncio.to_thread(_exec)
185
+ exit_code = getattr(result, "exit_code", None)
186
+ loop.call_soon_threadsafe(
187
+ queue.put_nowait,
188
+ ExecEvent("result", "", {"exit_code": exit_code}),
189
+ )
190
+ except Exception as e:
191
+ loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("error", str(e)))
192
+ finally:
193
+ loop.call_soon_threadsafe(queue.put_nowait, ExecEvent("__done__"))
194
+
195
+ task = asyncio.create_task(runner())
196
+ try:
197
+ while True:
198
+ ev = await queue.get()
199
+ if ev.type == "__done__":
200
+ break
201
+ yield ev
202
+ finally:
203
+ if not task.done():
204
+ task.cancel()
205
+
206
+ async def write_file(self, path: str, contents: str) -> None:
207
+ if self._sandbox is None:
208
+ await self.start()
209
+ sb = self._sandbox
210
+ await asyncio.to_thread(sb.files.write, path, contents)
211
+
212
+ async def read_file(self, path: str) -> str:
213
+ if self._sandbox is None:
214
+ await self.start()
215
+ sb = self._sandbox
216
+ return await asyncio.to_thread(sb.files.read, path)
217
+
218
+
219
+ # ----------------------------------------------------------------------------
220
+ # Convenience context manager
221
+ # ----------------------------------------------------------------------------
222
+
223
+ @asynccontextmanager
224
+ async def sandbox_session(timeout: int = 300):
225
+ ex = E2BExecutor(timeout=timeout)
226
+ try:
227
+ await ex.start()
228
+ yield ex
229
+ finally:
230
+ await ex.close()
intent.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Execution intent detection.
3
+
4
+ Decides whether a user prompt needs a real E2B sandbox or is just chat.
5
+
6
+ Strategy:
7
+ 1. Fast keyword/regex heuristics (free, deterministic)
8
+ 2. If ambiguous → fall back to LLM classification (cheap)
9
+
10
+ Output: ExecutionIntent dataclass
11
+ """
12
+
13
+ from __future__ import annotations
14
+
15
+ import json
16
+ import logging
17
+ import re
18
+ from dataclasses import dataclass
19
+ from typing import List, Optional
20
+
21
+ from . import llm_router
22
+
23
+ logger = logging.getLogger(__name__)
24
+
25
+
26
+ @dataclass
27
+ class ExecutionIntent:
28
+ needs_sandbox: bool
29
+ reason: str
30
+ confidence: float # 0..1
31
+
32
+
33
+ # ----------------------------------------------------------------------------
34
+ # Heuristic rules
35
+ # ----------------------------------------------------------------------------
36
+
37
+ # Strong execution signals
38
+ _EXEC_PATTERNS = [
39
+ r"\brun\s+(this|the|my)?\s*(code|script|python|bash|shell|command)",
40
+ r"\bexecute\s+(this|the)?\s*(code|script|python|bash|shell)",
41
+ r"\bcreate\s+(a\s+)?(file|folder|directory|script)\b",
42
+ r"\bwrite\s+(a\s+)?(file|script)\s+(named|called)",
43
+ r"\b(install|pip install|npm install|apt install)\b",
44
+ r"\b(ls|cd|cat|mkdir|rm|cp|mv|grep|chmod)\s+",
45
+ r"```(python|bash|sh|shell|javascript|js|node)\b",
46
+ r"\b(unix\s+timestamp|current\s+time)\b.*\b(file|write|create)",
47
+ r"\b(proof\.txt|test\.py|script\.py|main\.py)\b",
48
+ r"\bdebug\s+(this|my)\b",
49
+ r"\bbuild\s+(an?\s+)?(app|website|api|server|script)",
50
+ ]
51
+
52
+ # Strong chat-only signals
53
+ _CHAT_PATTERNS = [
54
+ r"^\s*(hi|hello|hey|yo|hola|sup|good\s+(morning|evening|night))\b",
55
+ r"^\s*(thanks|thank you|thx|ty)\b",
56
+ r"^\s*(what|who|when|why|how)\s+(is|are|do|does)\b.*\?$",
57
+ r"\bexplain\s+(to me)?\b(?!.*\b(run|execute|build)\b)",
58
+ r"\b(define|definition\s+of)\b",
59
+ r"\btell me about\b",
60
+ r"\bdifference between\b",
61
+ ]
62
+
63
+ _EXEC_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _EXEC_PATTERNS]
64
+ _CHAT_RE = [re.compile(p, re.IGNORECASE | re.MULTILINE) for p in _CHAT_PATTERNS]
65
+
66
+
67
+ def heuristic_detect(prompt: str) -> Optional[ExecutionIntent]:
68
+ """Return strong-signal intent, or None if ambiguous."""
69
+ p = (prompt or "").strip()
70
+ if not p:
71
+ return ExecutionIntent(False, "empty prompt", 1.0)
72
+
73
+ exec_hits = sum(1 for r in _EXEC_RE if r.search(p))
74
+ chat_hits = sum(1 for r in _CHAT_RE if r.search(p))
75
+
76
+ # Triple-backtick code block always implies execution intent
77
+ if "```" in p and exec_hits == 0:
78
+ # bare code block without verb → still likely wants execution
79
+ if re.search(r"```(python|bash|sh|shell|js|node)", p, re.IGNORECASE):
80
+ return ExecutionIntent(True, "code block detected", 0.85)
81
+
82
+ if exec_hits >= 1 and chat_hits == 0:
83
+ return ExecutionIntent(True, f"matched {exec_hits} execution pattern(s)", 0.9)
84
+ if chat_hits >= 1 and exec_hits == 0:
85
+ return ExecutionIntent(False, f"matched {chat_hits} chat pattern(s)", 0.9)
86
+ if exec_hits == 0 and chat_hits == 0:
87
+ # Very short prompts are usually chat
88
+ if len(p) < 30:
89
+ return ExecutionIntent(False, "short prompt, likely chat", 0.7)
90
+ return None # ambiguous → ask LLM
91
+ # Mixed signals → ask LLM
92
+ return None
93
+
94
+
95
+ # ----------------------------------------------------------------------------
96
+ # LLM fallback classifier
97
+ # ----------------------------------------------------------------------------
98
+
99
+ _CLASSIFIER_SYSTEM = """You are an intent classifier. Decide if the user's message
100
+ requires running real code or shell commands in a sandbox computer.
101
+
102
+ Reply ONLY with strict JSON, no prose:
103
+ {"needs_sandbox": true|false, "reason": "<short explanation>"}
104
+
105
+ Rules:
106
+ - needs_sandbox = true when the user wants to run code, execute shell commands,
107
+ create/modify files, install packages, debug a running program, or otherwise
108
+ observe real execution results.
109
+ - needs_sandbox = false when the user asks for explanations, greetings,
110
+ brainstorming, advice, or static code review with no run request.
111
+ """
112
+
113
+
114
+ async def llm_detect(prompt: str) -> ExecutionIntent:
115
+ messages = [
116
+ {"role": "system", "content": _CLASSIFIER_SYSTEM},
117
+ {"role": "user", "content": prompt[:2000]},
118
+ ]
119
+ try:
120
+ result = await llm_router.complete(messages, temperature=0.0, max_tokens=120)
121
+ text = result["content"].strip()
122
+ # Tolerate models that wrap JSON in code fences
123
+ text = re.sub(r"^```(?:json)?|```$", "", text.strip(), flags=re.MULTILINE).strip()
124
+ obj = json.loads(text)
125
+ return ExecutionIntent(
126
+ needs_sandbox=bool(obj.get("needs_sandbox", False)),
127
+ reason=str(obj.get("reason", "llm classifier"))[:200],
128
+ confidence=0.75,
129
+ )
130
+ except Exception as e:
131
+ logger.warning("LLM intent classifier failed: %s", e)
132
+ # Conservative default → no sandbox
133
+ return ExecutionIntent(False, f"llm fallback failed ({e})", 0.3)
134
+
135
+
136
+ async def detect(prompt: str) -> ExecutionIntent:
137
+ """Top-level: heuristic first, LLM fallback."""
138
+ h = heuristic_detect(prompt)
139
+ if h is not None:
140
+ return h
141
+ return await llm_detect(prompt)
llm_router.py ADDED
@@ -0,0 +1,399 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ LLM Router - Multi-provider with key rotation, cooldown, and failover.
3
+
4
+ Providers:
5
+ - gemini (Google Generative Language API)
6
+ - sambanova (SambaNova OpenAI-compatible)
7
+ - github_gpt4o (GitHub Models, OpenAI-compatible)
8
+
9
+ Comma-separated keys per provider via env vars:
10
+ GEMINI_KEY, SAMBANOVA_KEY, GITHUB_KEY
11
+
12
+ Rotation:
13
+ - Round-robin across keys
14
+ - Track failures per key; cool down after N consecutive failures
15
+ - Failover to next provider when all keys exhausted
16
+ """
17
+
18
+ from __future__ import annotations
19
+
20
+ import asyncio
21
+ import logging
22
+ import os
23
+ import time
24
+ from dataclasses import dataclass, field
25
+ from typing import Any, AsyncIterator, Dict, List, Optional
26
+
27
+ import httpx
28
+
29
+ logger = logging.getLogger(__name__)
30
+
31
+ # ----------------------------------------------------------------------------
32
+ # Constants
33
+ # ----------------------------------------------------------------------------
34
+
35
+ MAX_PROVIDER_RETRY = 3
36
+ MAX_FAILURES_BEFORE_COOLDOWN = 3
37
+ COOLDOWN_SECONDS = 300 # 5 min
38
+ REQUEST_TIMEOUT_SECONDS = 120.0
39
+ STREAM_TIMEOUT_SECONDS = 600.0
40
+
41
+
42
+ # ----------------------------------------------------------------------------
43
+ # Provider definitions
44
+ # ----------------------------------------------------------------------------
45
+
46
+ @dataclass
47
+ class ProviderConfig:
48
+ name: str
49
+ kind: str # 'gemini' | 'openai'
50
+ url: str
51
+ key_env: str
52
+ model: str
53
+ stream_supported: bool = True
54
+
55
+
56
+ PROVIDERS: Dict[str, ProviderConfig] = {
57
+ "gemini": ProviderConfig(
58
+ name="gemini",
59
+ kind="gemini",
60
+ url="https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent",
61
+ key_env="GEMINI_KEY",
62
+ model="gemini-2.0-flash",
63
+ stream_supported=False, # we use non-streaming generate for simplicity
64
+ ),
65
+ "sambanova": ProviderConfig(
66
+ name="sambanova",
67
+ kind="openai",
68
+ url="https://api.sambanova.ai/v1/chat/completions",
69
+ key_env="SAMBANOVA_KEY",
70
+ model="Meta-Llama-3.3-70B-Instruct",
71
+ stream_supported=True,
72
+ ),
73
+ "github_gpt4o": ProviderConfig(
74
+ name="github_gpt4o",
75
+ kind="openai",
76
+ url="https://models.inference.ai.azure.com/chat/completions",
77
+ key_env="GITHUB_KEY",
78
+ model="gpt-4o",
79
+ stream_supported=True,
80
+ ),
81
+ }
82
+
83
+
84
+ # ----------------------------------------------------------------------------
85
+ # Key pool
86
+ # ----------------------------------------------------------------------------
87
+
88
+ @dataclass
89
+ class KeyState:
90
+ key: str
91
+ fail_count: int = 0
92
+ cooldown_until: float = 0.0 # epoch seconds
93
+
94
+
95
+ class KeyPool:
96
+ """Round-robin key pool with failure tracking & cooldown."""
97
+
98
+ def __init__(self, keys: List[str]) -> None:
99
+ self._keys: List[KeyState] = [KeyState(k.strip()) for k in keys if k.strip()]
100
+ self._cursor = 0
101
+
102
+ def __bool__(self) -> bool:
103
+ return len(self._keys) > 0
104
+
105
+ def pick(self) -> Optional[KeyState]:
106
+ if not self._keys:
107
+ return None
108
+ now = time.time()
109
+ # Try every key starting from cursor
110
+ for _ in range(len(self._keys)):
111
+ ks = self._keys[self._cursor % len(self._keys)]
112
+ self._cursor += 1
113
+ if ks.cooldown_until <= now:
114
+ return ks
115
+ return None # all cooling down
116
+
117
+ @staticmethod
118
+ def mark_success(ks: KeyState) -> None:
119
+ ks.fail_count = 0
120
+ ks.cooldown_until = 0.0
121
+
122
+ @staticmethod
123
+ def mark_failure(ks: KeyState) -> None:
124
+ ks.fail_count += 1
125
+ if ks.fail_count >= MAX_FAILURES_BEFORE_COOLDOWN:
126
+ ks.cooldown_until = time.time() + COOLDOWN_SECONDS
127
+ logger.warning(
128
+ "Key cooled down for %ds (fail_count=%d)",
129
+ COOLDOWN_SECONDS, ks.fail_count,
130
+ )
131
+
132
+
133
+ # Cache pools so cooldown state persists across requests
134
+ _POOL_CACHE: Dict[str, KeyPool] = {}
135
+
136
+
137
+ def get_pool(provider: ProviderConfig) -> KeyPool:
138
+ if provider.name in _POOL_CACHE:
139
+ return _POOL_CACHE[provider.name]
140
+ raw = os.environ.get(provider.key_env, "")
141
+ keys = [k for k in raw.split(",") if k.strip()]
142
+ pool = KeyPool(keys)
143
+ _POOL_CACHE[provider.name] = pool
144
+ return pool
145
+
146
+
147
+ # ----------------------------------------------------------------------------
148
+ # Task classification → provider order
149
+ # ----------------------------------------------------------------------------
150
+
151
+ def classify_task(prompt: str) -> str:
152
+ p = (prompt or "").lower()
153
+ if any(w in p for w in ("workflow", "automation")):
154
+ return "planning"
155
+ if any(w in p for w in ("code", "python", "javascript", "function", "api", "build", "debug")):
156
+ return "engineering"
157
+ if any(w in p for w in ("why", "analyze", "explain", "reason")):
158
+ return "reasoning"
159
+ if any(w in p for w in ("translate", "summarize", "summary")):
160
+ return "language"
161
+ return "general"
162
+
163
+
164
+ def provider_order(prompt: str) -> List[str]:
165
+ task = classify_task(prompt)
166
+ if task == "engineering" or task == "reasoning":
167
+ return ["sambanova", "github_gpt4o", "gemini"]
168
+ if task == "planning":
169
+ return ["github_gpt4o", "sambanova", "gemini"]
170
+ if task == "language" or len(prompt) < 500:
171
+ return ["gemini", "sambanova", "github_gpt4o"]
172
+ return ["sambanova", "github_gpt4o", "gemini"]
173
+
174
+
175
+ # ----------------------------------------------------------------------------
176
+ # Provider callers
177
+ # ----------------------------------------------------------------------------
178
+
179
+ def _gemini_body(messages: List[Dict[str, str]]) -> Dict[str, Any]:
180
+ """Convert OpenAI-style messages → Gemini body."""
181
+ contents = []
182
+ system_parts: List[str] = []
183
+ for m in messages:
184
+ role = m.get("role")
185
+ content = m.get("content", "")
186
+ if role == "system":
187
+ system_parts.append(content)
188
+ continue
189
+ gem_role = "user" if role == "user" else "model"
190
+ contents.append({"role": gem_role, "parts": [{"text": content}]})
191
+ body: Dict[str, Any] = {"contents": contents}
192
+ if system_parts:
193
+ body["systemInstruction"] = {"parts": [{"text": "\n".join(system_parts)}]}
194
+ return body
195
+
196
+
197
+ def _extract_text(provider: ProviderConfig, data: Dict[str, Any]) -> str:
198
+ if provider.kind == "gemini":
199
+ try:
200
+ return data["candidates"][0]["content"]["parts"][0]["text"]
201
+ except (KeyError, IndexError, TypeError):
202
+ return ""
203
+ try:
204
+ return data["choices"][0]["message"]["content"] or ""
205
+ except (KeyError, IndexError, TypeError):
206
+ return ""
207
+
208
+
209
+ async def _call_once(
210
+ client: httpx.AsyncClient,
211
+ provider: ProviderConfig,
212
+ key: str,
213
+ messages: List[Dict[str, str]],
214
+ temperature: float = 0.4,
215
+ max_tokens: int = 2048,
216
+ ) -> str:
217
+ if provider.kind == "gemini":
218
+ url = f"{provider.url}?key={key}"
219
+ body = _gemini_body(messages)
220
+ body["generationConfig"] = {
221
+ "temperature": temperature,
222
+ "maxOutputTokens": max_tokens,
223
+ }
224
+ r = await client.post(url, json=body, timeout=REQUEST_TIMEOUT_SECONDS)
225
+ else:
226
+ headers = {
227
+ "Authorization": f"Bearer {key}",
228
+ "Content-Type": "application/json",
229
+ }
230
+ body = {
231
+ "model": provider.model,
232
+ "messages": messages,
233
+ "temperature": temperature,
234
+ "max_tokens": max_tokens,
235
+ }
236
+ r = await client.post(
237
+ provider.url, headers=headers, json=body, timeout=REQUEST_TIMEOUT_SECONDS
238
+ )
239
+ if r.status_code >= 400:
240
+ raise RuntimeError(f"{provider.name} HTTP {r.status_code}: {r.text[:200]}")
241
+ return _extract_text(provider, r.json())
242
+
243
+
244
+ async def complete(
245
+ messages: List[Dict[str, str]],
246
+ *,
247
+ temperature: float = 0.4,
248
+ max_tokens: int = 2048,
249
+ ) -> Dict[str, Any]:
250
+ """Non-streaming completion with provider/key failover.
251
+
252
+ Returns: {"content": str, "provider": str, "model": str}
253
+ """
254
+ prompt_text = "\n".join(m.get("content", "") for m in messages if m.get("role") == "user")
255
+ order = provider_order(prompt_text)
256
+ last_err: Optional[str] = None
257
+
258
+ async with httpx.AsyncClient() as client:
259
+ for provider_name in order:
260
+ provider = PROVIDERS[provider_name]
261
+ pool = get_pool(provider)
262
+ if not pool:
263
+ continue
264
+ for _ in range(MAX_PROVIDER_RETRY):
265
+ ks = pool.pick()
266
+ if ks is None:
267
+ break
268
+ try:
269
+ text = await _call_once(
270
+ client, provider, ks.key, messages,
271
+ temperature=temperature, max_tokens=max_tokens,
272
+ )
273
+ if not text.strip():
274
+ raise RuntimeError("empty completion")
275
+ KeyPool.mark_success(ks)
276
+ return {
277
+ "content": text,
278
+ "provider": provider.name,
279
+ "model": provider.model,
280
+ }
281
+ except Exception as e:
282
+ last_err = f"{provider.name}: {e}"
283
+ logger.warning("LLM call failed → %s", last_err)
284
+ KeyPool.mark_failure(ks)
285
+ raise RuntimeError(f"ALL_PROVIDERS_FAILED ({last_err})")
286
+
287
+
288
+ # ----------------------------------------------------------------------------
289
+ # Streaming (OpenAI-compatible providers only; Gemini falls back to chunked)
290
+ # ----------------------------------------------------------------------------
291
+
292
+ async def _stream_openai(
293
+ client: httpx.AsyncClient,
294
+ provider: ProviderConfig,
295
+ key: str,
296
+ messages: List[Dict[str, str]],
297
+ temperature: float,
298
+ max_tokens: int,
299
+ ) -> AsyncIterator[str]:
300
+ headers = {
301
+ "Authorization": f"Bearer {key}",
302
+ "Content-Type": "application/json",
303
+ "Accept": "text/event-stream",
304
+ }
305
+ body = {
306
+ "model": provider.model,
307
+ "messages": messages,
308
+ "temperature": temperature,
309
+ "max_tokens": max_tokens,
310
+ "stream": True,
311
+ }
312
+ async with client.stream(
313
+ "POST", provider.url, headers=headers, json=body,
314
+ timeout=STREAM_TIMEOUT_SECONDS,
315
+ ) as r:
316
+ if r.status_code >= 400:
317
+ err_text = (await r.aread()).decode("utf-8", "ignore")[:200]
318
+ raise RuntimeError(f"{provider.name} HTTP {r.status_code}: {err_text}")
319
+ async for line in r.aiter_lines():
320
+ if not line or not line.startswith("data:"):
321
+ continue
322
+ payload = line[5:].strip()
323
+ if payload == "[DONE]":
324
+ break
325
+ try:
326
+ import json
327
+ obj = json.loads(payload)
328
+ delta = obj["choices"][0]["delta"].get("content")
329
+ if delta:
330
+ yield delta
331
+ except Exception:
332
+ continue
333
+
334
+
335
+ async def stream_complete(
336
+ messages: List[Dict[str, str]],
337
+ *,
338
+ temperature: float = 0.4,
339
+ max_tokens: int = 2048,
340
+ ) -> AsyncIterator[Dict[str, Any]]:
341
+ """Yield {'type':'delta','content':str,'provider':str} chunks, then {'type':'done', ...}."""
342
+ prompt_text = "\n".join(m.get("content", "") for m in messages if m.get("role") == "user")
343
+ order = provider_order(prompt_text)
344
+ last_err: Optional[str] = None
345
+
346
+ async with httpx.AsyncClient() as client:
347
+ for provider_name in order:
348
+ provider = PROVIDERS[provider_name]
349
+ pool = get_pool(provider)
350
+ if not pool:
351
+ continue
352
+
353
+ for _ in range(MAX_PROVIDER_RETRY):
354
+ ks = pool.pick()
355
+ if ks is None:
356
+ break
357
+ try:
358
+ if provider.stream_supported:
359
+ got_any = False
360
+ async for delta in _stream_openai(
361
+ client, provider, ks.key, messages, temperature, max_tokens
362
+ ):
363
+ got_any = True
364
+ yield {"type": "delta", "content": delta, "provider": provider.name}
365
+ if not got_any:
366
+ raise RuntimeError("empty stream")
367
+ else:
368
+ # Gemini fallback: non-streaming, emit as a single delta
369
+ text = await _call_once(
370
+ client, provider, ks.key, messages,
371
+ temperature=temperature, max_tokens=max_tokens,
372
+ )
373
+ if not text.strip():
374
+ raise RuntimeError("empty completion")
375
+ yield {"type": "delta", "content": text, "provider": provider.name}
376
+
377
+ KeyPool.mark_success(ks)
378
+ yield {"type": "done", "provider": provider.name, "model": provider.model}
379
+ return
380
+ except Exception as e:
381
+ last_err = f"{provider.name}: {e}"
382
+ logger.warning("LLM stream failed → %s", last_err)
383
+ KeyPool.mark_failure(ks)
384
+
385
+ yield {"type": "error", "error": f"ALL_PROVIDERS_FAILED ({last_err})"}
386
+
387
+
388
+ def pool_status() -> Dict[str, Any]:
389
+ """Diagnostic info about each provider's key pool."""
390
+ out: Dict[str, Any] = {}
391
+ now = time.time()
392
+ for name, provider in PROVIDERS.items():
393
+ pool = get_pool(provider)
394
+ out[name] = {
395
+ "keys_configured": len(pool._keys),
396
+ "keys_available": sum(1 for k in pool._keys if k.cooldown_until <= now),
397
+ "model": provider.model,
398
+ }
399
+ return out
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ fastapi==0.115.5
2
+ uvicorn[standard]==0.32.1
3
+ pydantic==2.10.3
4
+ httpx==0.27.2
5
+ e2b-code-interpreter==1.0.4
6
+ python-multipart==0.0.20
tests/__init__.py ADDED
File without changes
tests/test_smoke.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Smoke tests that DO NOT hit the network.
3
+
4
+ Run with: pytest apps/backend/tests
5
+ """
6
+ from __future__ import annotations
7
+
8
+ import importlib
9
+ import os
10
+ import sys
11
+
12
+ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
13
+
14
+
15
+ def test_imports():
16
+ """Every backend module imports cleanly."""
17
+ for mod in ("backend.app", "backend.agent", "backend.intent",
18
+ "backend.llm_router", "backend.executor"):
19
+ importlib.import_module(mod)
20
+
21
+
22
+ def test_intent_heuristic_chat():
23
+ from backend.intent import heuristic_detect
24
+ r = heuristic_detect("hello there")
25
+ assert r is not None and r.needs_sandbox is False
26
+
27
+
28
+ def test_intent_heuristic_exec():
29
+ from backend.intent import heuristic_detect
30
+ r = heuristic_detect("Create proof.txt with the current unix timestamp and write it")
31
+ assert r is not None and r.needs_sandbox is True
32
+
33
+
34
+ def test_intent_code_fence():
35
+ from backend.intent import heuristic_detect
36
+ r = heuristic_detect("```python\nprint('hi')\n```")
37
+ assert r is not None and r.needs_sandbox is True
38
+
39
+
40
+ def test_extract_code_blocks():
41
+ from backend.agent import extract_code_blocks, pick_runnable
42
+ blocks = extract_code_blocks("Here is code:\n```python\nprint(1)\n```\nand shell:\n```bash\nls\n```")
43
+ assert len(blocks) == 2
44
+ chosen = pick_runnable(blocks)
45
+ assert chosen is not None
46
+ assert chosen.language in ("python", "py")
47
+
48
+
49
+ def test_provider_order_engineering():
50
+ from backend.llm_router import provider_order
51
+ order = provider_order("Write a python function to sort a list")
52
+ assert order[0] == "sambanova"
53
+
54
+
55
+ def test_provider_order_short_chat():
56
+ from backend.llm_router import provider_order
57
+ order = provider_order("hi")
58
+ assert order[0] == "gemini"
59
+
60
+
61
+ def test_pool_empty_when_no_env():
62
+ from backend.llm_router import KeyPool
63
+ p = KeyPool([])
64
+ assert not p
65
+ assert p.pick() is None
66
+
67
+
68
+ def test_pool_rotation_and_cooldown():
69
+ from backend.llm_router import KeyPool, MAX_FAILURES_BEFORE_COOLDOWN
70
+ p = KeyPool(["a", "b", "c"])
71
+ seen = set()
72
+ for _ in range(3):
73
+ ks = p.pick()
74
+ assert ks is not None
75
+ seen.add(ks.key)
76
+ assert seen == {"a", "b", "c"}
77
+ # Mark one failed enough times to cool down
78
+ ks = p.pick()
79
+ for _ in range(MAX_FAILURES_BEFORE_COOLDOWN):
80
+ KeyPool.mark_failure(ks)
81
+ assert ks.cooldown_until > 0