Update app.py
Browse files
app.py
CHANGED
|
@@ -1,30 +1,31 @@
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
| 3 |
-
import time
|
| 4 |
from typing import List, Dict, Any, Optional, Literal
|
| 5 |
|
| 6 |
import httpx
|
| 7 |
-
from fastapi import FastAPI,
|
|
|
|
|
|
|
| 8 |
from pydantic import BaseModel, Field, ValidationError
|
| 9 |
|
| 10 |
# =========================
|
| 11 |
# Config (from env vars)
|
| 12 |
# =========================
|
| 13 |
-
PYTHON_AI_URL = os.getenv("PYTHON_AI_URL", "")
|
| 14 |
-
TTS_URL
|
| 15 |
-
STT_URL
|
| 16 |
-
VISION_URL
|
| 17 |
-
MEMORY_URL
|
| 18 |
|
| 19 |
PROMPT_BUDGET_BYTES = int(os.getenv("PROMPT_BUDGET_BYTES", "12000"))
|
| 20 |
MEMORY_BUDGET_BYTES = int(os.getenv("MEMORY_BUDGET_BYTES", "6000"))
|
| 21 |
-
VIEWPORT_MAX_LINES = int(os.getenv("VIEWPORT_MAX_LINES", "60"))
|
| 22 |
|
| 23 |
REQUEST_TIMEOUT_S = float(os.getenv("REQUEST_TIMEOUT_S", "60"))
|
| 24 |
CONNECT_TIMEOUT_S = float(os.getenv("CONNECT_TIMEOUT_S", "10"))
|
| 25 |
|
| 26 |
# =========================
|
| 27 |
-
#
|
| 28 |
# =========================
|
| 29 |
class Cursor(BaseModel):
|
| 30 |
l: int = Field(..., description="line")
|
|
@@ -80,20 +81,45 @@ class CodeHelpOut(BaseModel):
|
|
| 80 |
notes: Dict[str, Any] = {}
|
| 81 |
|
| 82 |
# =========================
|
| 83 |
-
# App
|
| 84 |
# =========================
|
| 85 |
app = FastAPI(title="Brain (Router)", version="1.0")
|
| 86 |
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 91 |
|
| 92 |
# =========================
|
| 93 |
# Small Utilities
|
| 94 |
# =========================
|
| 95 |
def _truncate_bytes(s: str, budget: int) -> str:
|
| 96 |
-
"""Truncate a string to a byte budget (UTF-8 safe)."""
|
| 97 |
b = s.encode("utf-8")
|
| 98 |
if len(b) <= budget:
|
| 99 |
return s
|
|
@@ -104,12 +130,13 @@ def _shrink_lines_to_max(window: Viewport, max_lines: int) -> Viewport:
|
|
| 104 |
if len(lines) <= max_lines:
|
| 105 |
return window
|
| 106 |
keep = max_lines
|
| 107 |
-
start_line = max(window.start, window.end - keep + 1)
|
| 108 |
slice_start = max(0, len(lines) - keep)
|
| 109 |
new_text = "\n".join(lines[slice_start:])
|
| 110 |
-
return Viewport(start=
|
| 111 |
|
| 112 |
async def _safe_post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
|
|
|
|
| 113 |
try:
|
| 114 |
r = await client.post(url, json=payload)
|
| 115 |
r.raise_for_status()
|
|
@@ -118,11 +145,8 @@ async def _safe_post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
| 118 |
raise HTTPException(status_code=502, detail=f"POST {url} failed: {e}")
|
| 119 |
|
| 120 |
# =========================
|
| 121 |
-
# Priority Queue (
|
| 122 |
# =========================
|
| 123 |
-
# P0: speech/telemetry (not used yet in this minimal Brain, reserved)
|
| 124 |
-
# P1: code model + TTS (interactive)
|
| 125 |
-
# P2: thumbnails / heavy analysis (future)
|
| 126 |
TASK_Q: "asyncio.PriorityQueue[tuple[int,float,dict]]" = asyncio.PriorityQueue()
|
| 127 |
|
| 128 |
async def worker_loop():
|
|
@@ -133,24 +157,10 @@ async def worker_loop():
|
|
| 133 |
if handler:
|
| 134 |
await handler(**task.get("args", {}))
|
| 135 |
except Exception:
|
| 136 |
-
# We keep the worker resilient; detailed logs would go here.
|
| 137 |
pass
|
| 138 |
finally:
|
| 139 |
TASK_Q.task_done()
|
| 140 |
|
| 141 |
-
@app.on_event("startup")
|
| 142 |
-
async def _startup():
|
| 143 |
-
# Start a couple of workers
|
| 144 |
-
asyncio.create_task(worker_loop())
|
| 145 |
-
asyncio.create_task(worker_loop())
|
| 146 |
-
|
| 147 |
-
@app.on_event("shutdown")
|
| 148 |
-
async def _shutdown():
|
| 149 |
-
try:
|
| 150 |
-
await client.aclose()
|
| 151 |
-
except Exception:
|
| 152 |
-
pass
|
| 153 |
-
|
| 154 |
# =========================
|
| 155 |
# Health & Warmup
|
| 156 |
# =========================
|
|
@@ -167,11 +177,9 @@ async def health():
|
|
| 167 |
|
| 168 |
@app.post("/warmup")
|
| 169 |
async def warmup():
|
| 170 |
-
|
| 171 |
-
notes = {}
|
| 172 |
if PYTHON_AI_URL:
|
| 173 |
try:
|
| 174 |
-
# If your Python AI exposes /health, use it. Otherwise skip.
|
| 175 |
res = await _safe_post_json(f"{PYTHON_AI_URL}/code_help", {
|
| 176 |
"intent":"ping","file":"_warmup_.py","lang":"python",
|
| 177 |
"cursor":{"l":1,"c":1},
|
|
@@ -179,52 +187,35 @@ async def warmup():
|
|
| 179 |
"diag": [], "term":"", "mem":{"short":[],"sess":[],"proj":[]}
|
| 180 |
})
|
| 181 |
notes["python_ai"] = "ok" if res else "no-response"
|
| 182 |
-
except
|
| 183 |
-
notes["python_ai"] = f"err: {e}"
|
| 184 |
if TTS_URL:
|
| 185 |
try:
|
| 186 |
res = await _safe_post_json(f"{TTS_URL}/speak", {"text":"warming up"})
|
| 187 |
notes["tts"] = "ok" if "audio_path" in res else "no-audio"
|
| 188 |
-
except
|
| 189 |
-
notes["tts"] = f"err: {e}"
|
| 190 |
return {"ok": True, "notes": notes}
|
| 191 |
|
| 192 |
# =========================
|
| 193 |
# Core: Code Help endpoint
|
| 194 |
# =========================
|
| 195 |
def _enforce_budgets(t: Telemetry, m: Memory) -> tuple[Telemetry, Memory, int, int]:
|
| 196 |
-
# shrink viewport to max lines
|
| 197 |
t2 = Telemetry(
|
| 198 |
file=t.file, lang=t.lang, cursor=t.cursor,
|
| 199 |
viewport=_shrink_lines_to_max(t.viewport, VIEWPORT_MAX_LINES),
|
| 200 |
-
diag=t.diag[:5],
|
| 201 |
term=t.term
|
| 202 |
)
|
| 203 |
-
# compress memory bullets and apply byte budget
|
| 204 |
mem_text = " | ".join(m.short + m.sess + m.proj)
|
| 205 |
mem_text = _truncate_bytes(mem_text, MEMORY_BUDGET_BYTES)
|
| 206 |
-
# reconstruct memory by naive split (keeps one string bucketed in 'sess')
|
| 207 |
m2 = Memory(short=[], sess=[mem_text] if mem_text else [], proj=[])
|
| 208 |
-
|
| 209 |
-
# count budgets (approx: sum key strings + text fields)
|
| 210 |
used_mem = len(mem_text.encode("utf-8"))
|
| 211 |
prompt_bytes = (
|
| 212 |
len(t2.file) + len(t2.lang) +
|
| 213 |
len(t2.viewport.text) + sum(len(d.msg) for d in t2.diag) +
|
| 214 |
len(t2.term) + used_mem
|
| 215 |
)
|
| 216 |
-
if prompt_bytes > PROMPT_BUDGET_BYTES:
|
| 217 |
-
# tighten viewport again by half if still too large
|
| 218 |
-
t2 = Telemetry(
|
| 219 |
-
file=t2.file, lang=t2.lang, cursor=t2.cursor,
|
| 220 |
-
viewport=_shrink_lines_to_max(t2.viewport, max(20, VIEWPORT_MAX_LINES//2)),
|
| 221 |
-
diag=t2.diag, term=_truncate_bytes(t2.term, 1024)
|
| 222 |
-
)
|
| 223 |
-
prompt_bytes = (
|
| 224 |
-
len(t2.file) + len(t2.lang) +
|
| 225 |
-
len(t2.viewport.text) + sum(len(d.msg) for d in t2.diag) +
|
| 226 |
-
len(t2.term) + used_mem
|
| 227 |
-
)
|
| 228 |
return t2, m2, used_mem, prompt_bytes
|
| 229 |
|
| 230 |
async def _route_python_ai(payload: Dict[str, Any]) -> PythonAIOutput:
|
|
@@ -234,7 +225,6 @@ async def _route_python_ai(payload: Dict[str, Any]) -> PythonAIOutput:
|
|
| 234 |
try:
|
| 235 |
return PythonAIOutput(**res)
|
| 236 |
except ValidationError as ve:
|
| 237 |
-
# If downstream returns bad JSON, surface as 502 with details.
|
| 238 |
raise HTTPException(status_code=502, detail=f"Bad AI JSON schema: {ve}")
|
| 239 |
|
| 240 |
async def _send_tts(text: str) -> Optional[str]:
|
|
@@ -242,23 +232,18 @@ async def _send_tts(text: str) -> Optional[str]:
|
|
| 242 |
return None
|
| 243 |
try:
|
| 244 |
res = await _safe_post_json(f"{TTS_URL}/speak", {"text": text})
|
| 245 |
-
# HF Space fastapi static path helper
|
| 246 |
audio_path = res.get("audio_path")
|
| 247 |
if not audio_path:
|
| 248 |
return None
|
| 249 |
-
# Convert to absolute URL the browser can open
|
| 250 |
base = TTS_URL.rstrip("/")
|
| 251 |
name = audio_path.split("/")[-1]
|
| 252 |
return f"{base}/file/{name}"
|
| 253 |
-
except
|
| 254 |
return None
|
| 255 |
|
| 256 |
@app.post("/code_help", response_model=CodeHelpOut)
|
| 257 |
async def code_help(x: CodeHelpIn):
|
| 258 |
-
# 1) enforce budgets / shrink context
|
| 259 |
t2, m2, used_mem, used_prompt = _enforce_budgets(x.telemetry, x.memory)
|
| 260 |
-
|
| 261 |
-
# 2) build compact contract for Python AI (as agreed)
|
| 262 |
py_in = {
|
| 263 |
"intent": x.utterance,
|
| 264 |
"file": t2.file,
|
|
@@ -269,14 +254,8 @@ async def code_help(x: CodeHelpIn):
|
|
| 269 |
"term": t2.term,
|
| 270 |
"mem": {"short": m2.short, "sess": m2.sess, "proj": m2.proj}
|
| 271 |
}
|
| 272 |
-
|
| 273 |
-
# 3) call Python AI (async)
|
| 274 |
ai_out = await _route_python_ai(py_in)
|
| 275 |
-
|
| 276 |
-
# 4) send short voice summary in parallel (explanation only)
|
| 277 |
tts_url = await _send_tts(ai_out.explanation)
|
| 278 |
-
|
| 279 |
-
# 5) respond
|
| 280 |
return CodeHelpOut(
|
| 281 |
ai=ai_out,
|
| 282 |
tts_audio_url=tts_url,
|
|
|
|
| 1 |
import os
|
| 2 |
import asyncio
|
|
|
|
| 3 |
from typing import List, Dict, Any, Optional, Literal
|
| 4 |
|
| 5 |
import httpx
|
| 6 |
+
from fastapi import FastAPI, HTTPException
|
| 7 |
+
from fastapi.responses import FileResponse
|
| 8 |
+
from fastapi.staticfiles import StaticFiles
|
| 9 |
from pydantic import BaseModel, Field, ValidationError
|
| 10 |
|
| 11 |
# =========================
|
| 12 |
# Config (from env vars)
|
| 13 |
# =========================
|
| 14 |
+
PYTHON_AI_URL = os.getenv("PYTHON_AI_URL", "")
|
| 15 |
+
TTS_URL = os.getenv("TTS_URL", "")
|
| 16 |
+
STT_URL = os.getenv("STT_URL", "")
|
| 17 |
+
VISION_URL = os.getenv("VISION_URL", "")
|
| 18 |
+
MEMORY_URL = os.getenv("MEMORY_URL", "")
|
| 19 |
|
| 20 |
PROMPT_BUDGET_BYTES = int(os.getenv("PROMPT_BUDGET_BYTES", "12000"))
|
| 21 |
MEMORY_BUDGET_BYTES = int(os.getenv("MEMORY_BUDGET_BYTES", "6000"))
|
| 22 |
+
VIEWPORT_MAX_LINES = int(os.getenv("VIEWPORT_MAX_LINES", "60"))
|
| 23 |
|
| 24 |
REQUEST_TIMEOUT_S = float(os.getenv("REQUEST_TIMEOUT_S", "60"))
|
| 25 |
CONNECT_TIMEOUT_S = float(os.getenv("CONNECT_TIMEOUT_S", "10"))
|
| 26 |
|
| 27 |
# =========================
|
| 28 |
+
# Schemas
|
| 29 |
# =========================
|
| 30 |
class Cursor(BaseModel):
|
| 31 |
l: int = Field(..., description="line")
|
|
|
|
| 81 |
notes: Dict[str, Any] = {}
|
| 82 |
|
| 83 |
# =========================
|
| 84 |
+
# App + Static UI
|
| 85 |
# =========================
|
| 86 |
app = FastAPI(title="Brain (Router)", version="1.0")
|
| 87 |
|
| 88 |
+
# Serve the static UI
|
| 89 |
+
app.mount("/static", StaticFiles(directory="static"), name="static")
|
| 90 |
+
|
| 91 |
+
@app.get("/")
|
| 92 |
+
async def root_ui():
|
| 93 |
+
return FileResponse("static/ui.html")
|
| 94 |
+
|
| 95 |
+
# =========================
|
| 96 |
+
# HTTP client (async)
|
| 97 |
+
# =========================
|
| 98 |
+
client: Optional[httpx.AsyncClient] = None
|
| 99 |
+
|
| 100 |
+
@app.on_event("startup")
|
| 101 |
+
async def _startup():
|
| 102 |
+
global client
|
| 103 |
+
client = httpx.AsyncClient(
|
| 104 |
+
timeout=httpx.Timeout(REQUEST_TIMEOUT_S, connect=CONNECT_TIMEOUT_S),
|
| 105 |
+
headers={"User-Agent": "BrainRouter/1.0"}
|
| 106 |
+
)
|
| 107 |
+
asyncio.create_task(worker_loop())
|
| 108 |
+
asyncio.create_task(worker_loop())
|
| 109 |
+
|
| 110 |
+
@app.on_event("shutdown")
|
| 111 |
+
async def _shutdown():
|
| 112 |
+
global client
|
| 113 |
+
try:
|
| 114 |
+
if client is not None:
|
| 115 |
+
await client.aclose()
|
| 116 |
+
except Exception:
|
| 117 |
+
pass
|
| 118 |
|
| 119 |
# =========================
|
| 120 |
# Small Utilities
|
| 121 |
# =========================
|
| 122 |
def _truncate_bytes(s: str, budget: int) -> str:
|
|
|
|
| 123 |
b = s.encode("utf-8")
|
| 124 |
if len(b) <= budget:
|
| 125 |
return s
|
|
|
|
| 130 |
if len(lines) <= max_lines:
|
| 131 |
return window
|
| 132 |
keep = max_lines
|
|
|
|
| 133 |
slice_start = max(0, len(lines) - keep)
|
| 134 |
new_text = "\n".join(lines[slice_start:])
|
| 135 |
+
return Viewport(start=window.end - keep + 1, end=window.end, text=new_text)
|
| 136 |
|
| 137 |
async def _safe_post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
|
| 138 |
+
if client is None:
|
| 139 |
+
raise HTTPException(status_code=500, detail="HTTP client not initialized")
|
| 140 |
try:
|
| 141 |
r = await client.post(url, json=payload)
|
| 142 |
r.raise_for_status()
|
|
|
|
| 145 |
raise HTTPException(status_code=502, detail=f"POST {url} failed: {e}")
|
| 146 |
|
| 147 |
# =========================
|
| 148 |
+
# Priority Queue (reserved)
|
| 149 |
# =========================
|
|
|
|
|
|
|
|
|
|
| 150 |
TASK_Q: "asyncio.PriorityQueue[tuple[int,float,dict]]" = asyncio.PriorityQueue()
|
| 151 |
|
| 152 |
async def worker_loop():
|
|
|
|
| 157 |
if handler:
|
| 158 |
await handler(**task.get("args", {}))
|
| 159 |
except Exception:
|
|
|
|
| 160 |
pass
|
| 161 |
finally:
|
| 162 |
TASK_Q.task_done()
|
| 163 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 164 |
# =========================
|
| 165 |
# Health & Warmup
|
| 166 |
# =========================
|
|
|
|
| 177 |
|
| 178 |
@app.post("/warmup")
|
| 179 |
async def warmup():
|
| 180 |
+
notes: Dict[str, Any] = {}
|
|
|
|
| 181 |
if PYTHON_AI_URL:
|
| 182 |
try:
|
|
|
|
| 183 |
res = await _safe_post_json(f"{PYTHON_AI_URL}/code_help", {
|
| 184 |
"intent":"ping","file":"_warmup_.py","lang":"python",
|
| 185 |
"cursor":{"l":1,"c":1},
|
|
|
|
| 187 |
"diag": [], "term":"", "mem":{"short":[],"sess":[],"proj":[]}
|
| 188 |
})
|
| 189 |
notes["python_ai"] = "ok" if res else "no-response"
|
| 190 |
+
except HTTPException as e:
|
| 191 |
+
notes["python_ai"] = f"err: {e.detail}"
|
| 192 |
if TTS_URL:
|
| 193 |
try:
|
| 194 |
res = await _safe_post_json(f"{TTS_URL}/speak", {"text":"warming up"})
|
| 195 |
notes["tts"] = "ok" if "audio_path" in res else "no-audio"
|
| 196 |
+
except HTTPException as e:
|
| 197 |
+
notes["tts"] = f"err: {e.detail}"
|
| 198 |
return {"ok": True, "notes": notes}
|
| 199 |
|
| 200 |
# =========================
|
| 201 |
# Core: Code Help endpoint
|
| 202 |
# =========================
|
| 203 |
def _enforce_budgets(t: Telemetry, m: Memory) -> tuple[Telemetry, Memory, int, int]:
|
|
|
|
| 204 |
t2 = Telemetry(
|
| 205 |
file=t.file, lang=t.lang, cursor=t.cursor,
|
| 206 |
viewport=_shrink_lines_to_max(t.viewport, VIEWPORT_MAX_LINES),
|
| 207 |
+
diag=t.diag[:5],
|
| 208 |
term=t.term
|
| 209 |
)
|
|
|
|
| 210 |
mem_text = " | ".join(m.short + m.sess + m.proj)
|
| 211 |
mem_text = _truncate_bytes(mem_text, MEMORY_BUDGET_BYTES)
|
|
|
|
| 212 |
m2 = Memory(short=[], sess=[mem_text] if mem_text else [], proj=[])
|
|
|
|
|
|
|
| 213 |
used_mem = len(mem_text.encode("utf-8"))
|
| 214 |
prompt_bytes = (
|
| 215 |
len(t2.file) + len(t2.lang) +
|
| 216 |
len(t2.viewport.text) + sum(len(d.msg) for d in t2.diag) +
|
| 217 |
len(t2.term) + used_mem
|
| 218 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
return t2, m2, used_mem, prompt_bytes
|
| 220 |
|
| 221 |
async def _route_python_ai(payload: Dict[str, Any]) -> PythonAIOutput:
|
|
|
|
| 225 |
try:
|
| 226 |
return PythonAIOutput(**res)
|
| 227 |
except ValidationError as ve:
|
|
|
|
| 228 |
raise HTTPException(status_code=502, detail=f"Bad AI JSON schema: {ve}")
|
| 229 |
|
| 230 |
async def _send_tts(text: str) -> Optional[str]:
|
|
|
|
| 232 |
return None
|
| 233 |
try:
|
| 234 |
res = await _safe_post_json(f"{TTS_URL}/speak", {"text": text})
|
|
|
|
| 235 |
audio_path = res.get("audio_path")
|
| 236 |
if not audio_path:
|
| 237 |
return None
|
|
|
|
| 238 |
base = TTS_URL.rstrip("/")
|
| 239 |
name = audio_path.split("/")[-1]
|
| 240 |
return f"{base}/file/{name}"
|
| 241 |
+
except HTTPException:
|
| 242 |
return None
|
| 243 |
|
| 244 |
@app.post("/code_help", response_model=CodeHelpOut)
|
| 245 |
async def code_help(x: CodeHelpIn):
|
|
|
|
| 246 |
t2, m2, used_mem, used_prompt = _enforce_budgets(x.telemetry, x.memory)
|
|
|
|
|
|
|
| 247 |
py_in = {
|
| 248 |
"intent": x.utterance,
|
| 249 |
"file": t2.file,
|
|
|
|
| 254 |
"term": t2.term,
|
| 255 |
"mem": {"short": m2.short, "sess": m2.sess, "proj": m2.proj}
|
| 256 |
}
|
|
|
|
|
|
|
| 257 |
ai_out = await _route_python_ai(py_in)
|
|
|
|
|
|
|
| 258 |
tts_url = await _send_tts(ai_out.explanation)
|
|
|
|
|
|
|
| 259 |
return CodeHelpOut(
|
| 260 |
ai=ai_out,
|
| 261 |
tts_audio_url=tts_url,
|