Percy3822 commited on
Commit
e54d33d
·
verified ·
1 Parent(s): 0e3cac8

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +286 -0
app.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import asyncio
3
+ import time
4
+ from typing import List, Dict, Any, Optional, Literal
5
+
6
+ import httpx
7
+ from fastapi import FastAPI, BackgroundTasks, HTTPException
8
+ from pydantic import BaseModel, Field, ValidationError
9
+
10
+ # =========================
11
+ # Config (from env vars)
12
+ # =========================
13
+ PYTHON_AI_URL = os.getenv("PYTHON_AI_URL", "") # e.g. https://<you>-python_ai_space.hf.space
14
+ TTS_URL = os.getenv("TTS_URL", "") # e.g. https://<you>-tts_space.hf.space
15
+ STT_URL = os.getenv("STT_URL", "") # (optional) future WS/HTTP
16
+ VISION_URL = os.getenv("VISION_URL", "") # (optional) OCR/thumbnail summaries
17
+ MEMORY_URL = os.getenv("MEMORY_URL", "") # (optional) external memory/RAG svc
18
+
19
+ PROMPT_BUDGET_BYTES = int(os.getenv("PROMPT_BUDGET_BYTES", "12000"))
20
+ MEMORY_BUDGET_BYTES = int(os.getenv("MEMORY_BUDGET_BYTES", "6000"))
21
+ VIEWPORT_MAX_LINES = int(os.getenv("VIEWPORT_MAX_LINES", "60")) # visible ±30 default
22
+
23
+ REQUEST_TIMEOUT_S = float(os.getenv("REQUEST_TIMEOUT_S", "60"))
24
+ CONNECT_TIMEOUT_S = float(os.getenv("CONNECT_TIMEOUT_S", "10"))
25
+
26
+ # =========================
27
+ # Models / Schemas
28
+ # =========================
29
+ class Cursor(BaseModel):
30
+ l: int = Field(..., description="line")
31
+ c: int = Field(..., description="col")
32
+
33
+ class Viewport(BaseModel):
34
+ start: int
35
+ end: int
36
+ text: str
37
+
38
+ class Diagnostic(BaseModel):
39
+ l: int
40
+ sev: str
41
+ msg: str
42
+
43
+ class Memory(BaseModel):
44
+ short: List[str] = []
45
+ sess: List[str] = []
46
+ proj: List[str] = []
47
+
48
+ class Telemetry(BaseModel):
49
+ file: str
50
+ lang: str
51
+ cursor: Cursor
52
+ viewport: Viewport
53
+ diag: List[Diagnostic] = []
54
+ term: str = ""
55
+
56
+ class CodeHelpIn(BaseModel):
57
+ utterance: str
58
+ telemetry: Telemetry
59
+ memory: Memory = Memory()
60
+ response_mode: Literal["patch","full"] = "patch"
61
+
62
+ class Need(BaseModel):
63
+ function: bool = False
64
+ xrefs: List[str] = []
65
+ page_ids: List[str] = []
66
+
67
+ class PythonAIOutput(BaseModel):
68
+ mode: Literal["patch","full","ask"]
69
+ patch: str = ""
70
+ full_text: str = ""
71
+ explanation: str = ""
72
+ confidence: float = 0.5
73
+ need: Need = Need()
74
+
75
+ class CodeHelpOut(BaseModel):
76
+ ai: PythonAIOutput
77
+ tts_audio_url: Optional[str] = None
78
+ used_memory_bytes: int
79
+ used_prompt_bytes: int
80
+ notes: Dict[str, Any] = {}
81
+
82
+ # =========================
83
+ # App & HTTP client
84
+ # =========================
85
+ app = FastAPI(title="Brain (Router)", version="1.0")
86
+
87
+ client = httpx.AsyncClient(
88
+ timeout=httpx.Timeout(REQUEST_TIMEOUT_S, connect=CONNECT_TIMEOUT_S),
89
+ headers={"User-Agent": "BrainRouter/1.0"}
90
+ )
91
+
92
+ # =========================
93
+ # Small Utilities
94
+ # =========================
95
+ def _truncate_bytes(s: str, budget: int) -> str:
96
+ """Truncate a string to a byte budget (UTF-8 safe)."""
97
+ b = s.encode("utf-8")
98
+ if len(b) <= budget:
99
+ return s
100
+ return b[:budget].decode("utf-8", errors="ignore")
101
+
102
+ def _shrink_lines_to_max(window: Viewport, max_lines: int) -> Viewport:
103
+ lines = window.text.splitlines()
104
+ if len(lines) <= max_lines:
105
+ return window
106
+ keep = max_lines
107
+ start_line = max(window.start, window.end - keep + 1)
108
+ slice_start = max(0, len(lines) - keep)
109
+ new_text = "\n".join(lines[slice_start:])
110
+ return Viewport(start=start_line, end=window.end, text=new_text)
111
+
112
+ async def _safe_post_json(url: str, payload: Dict[str, Any]) -> Dict[str, Any]:
113
+ try:
114
+ r = await client.post(url, json=payload)
115
+ r.raise_for_status()
116
+ return r.json()
117
+ except Exception as e:
118
+ raise HTTPException(status_code=502, detail=f"POST {url} failed: {e}")
119
+
120
+ # =========================
121
+ # Priority Queue (P0/P1/P2)
122
+ # =========================
123
+ # P0: speech/telemetry (not used yet in this minimal Brain, reserved)
124
+ # P1: code model + TTS (interactive)
125
+ # P2: thumbnails / heavy analysis (future)
126
+ TASK_Q: "asyncio.PriorityQueue[tuple[int,float,dict]]" = asyncio.PriorityQueue()
127
+
128
+ async def worker_loop():
129
+ while True:
130
+ priority, ts, task = await TASK_Q.get()
131
+ try:
132
+ handler = task.get("handler")
133
+ if handler:
134
+ await handler(**task.get("args", {}))
135
+ except Exception:
136
+ # We keep the worker resilient; detailed logs would go here.
137
+ pass
138
+ finally:
139
+ TASK_Q.task_done()
140
+
141
+ @app.on_event("startup")
142
+ async def _startup():
143
+ # Start a couple of workers
144
+ asyncio.create_task(worker_loop())
145
+ asyncio.create_task(worker_loop())
146
+
147
+ @app.on_event("shutdown")
148
+ async def _shutdown():
149
+ try:
150
+ await client.aclose()
151
+ except Exception:
152
+ pass
153
+
154
+ # =========================
155
+ # Health & Warmup
156
+ # =========================
157
+ @app.get("/health")
158
+ async def health():
159
+ deps = {
160
+ "python_ai": bool(PYTHON_AI_URL),
161
+ "tts": bool(TTS_URL),
162
+ "stt": bool(STT_URL),
163
+ "vision": bool(VISION_URL),
164
+ "memory": bool(MEMORY_URL),
165
+ }
166
+ return {"ok": True, "deps": deps, "version": "1.0"}
167
+
168
+ @app.post("/warmup")
169
+ async def warmup():
170
+ """Optionally ping downstream services to avoid cold starts."""
171
+ notes = {}
172
+ if PYTHON_AI_URL:
173
+ try:
174
+ # If your Python AI exposes /health, use it. Otherwise skip.
175
+ res = await _safe_post_json(f"{PYTHON_AI_URL}/code_help", {
176
+ "intent":"ping","file":"_warmup_.py","lang":"python",
177
+ "cursor":{"l":1,"c":1},
178
+ "viewport":{"start":1,"end":1,"text":"print('warmup')\n"},
179
+ "diag": [], "term":"", "mem":{"short":[],"sess":[],"proj":[]}
180
+ })
181
+ notes["python_ai"] = "ok" if res else "no-response"
182
+ except Exception as e:
183
+ notes["python_ai"] = f"err: {e}"
184
+ if TTS_URL:
185
+ try:
186
+ res = await _safe_post_json(f"{TTS_URL}/speak", {"text":"warming up"})
187
+ notes["tts"] = "ok" if "audio_path" in res else "no-audio"
188
+ except Exception as e:
189
+ notes["tts"] = f"err: {e}"
190
+ return {"ok": True, "notes": notes}
191
+
192
+ # =========================
193
+ # Core: Code Help endpoint
194
+ # =========================
195
+ def _enforce_budgets(t: Telemetry, m: Memory) -> tuple[Telemetry, Memory, int, int]:
196
+ # shrink viewport to max lines
197
+ t2 = Telemetry(
198
+ file=t.file, lang=t.lang, cursor=t.cursor,
199
+ viewport=_shrink_lines_to_max(t.viewport, VIEWPORT_MAX_LINES),
200
+ diag=t.diag[:5], # cap diagnostics
201
+ term=t.term
202
+ )
203
+ # compress memory bullets and apply byte budget
204
+ mem_text = " | ".join(m.short + m.sess + m.proj)
205
+ mem_text = _truncate_bytes(mem_text, MEMORY_BUDGET_BYTES)
206
+ # reconstruct memory by naive split (keeps one string bucketed in 'sess')
207
+ m2 = Memory(short=[], sess=[mem_text] if mem_text else [], proj=[])
208
+
209
+ # count budgets (approx: sum key strings + text fields)
210
+ used_mem = len(mem_text.encode("utf-8"))
211
+ prompt_bytes = (
212
+ len(t2.file) + len(t2.lang) +
213
+ len(t2.viewport.text) + sum(len(d.msg) for d in t2.diag) +
214
+ len(t2.term) + used_mem
215
+ )
216
+ if prompt_bytes > PROMPT_BUDGET_BYTES:
217
+ # tighten viewport again by half if still too large
218
+ t2 = Telemetry(
219
+ file=t2.file, lang=t2.lang, cursor=t2.cursor,
220
+ viewport=_shrink_lines_to_max(t2.viewport, max(20, VIEWPORT_MAX_LINES//2)),
221
+ diag=t2.diag, term=_truncate_bytes(t2.term, 1024)
222
+ )
223
+ prompt_bytes = (
224
+ len(t2.file) + len(t2.lang) +
225
+ len(t2.viewport.text) + sum(len(d.msg) for d in t2.diag) +
226
+ len(t2.term) + used_mem
227
+ )
228
+ return t2, m2, used_mem, prompt_bytes
229
+
230
+ async def _route_python_ai(payload: Dict[str, Any]) -> PythonAIOutput:
231
+ if not PYTHON_AI_URL:
232
+ raise HTTPException(status_code=500, detail="PYTHON_AI_URL not configured")
233
+ res = await _safe_post_json(f"{PYTHON_AI_URL}/code_help", payload)
234
+ try:
235
+ return PythonAIOutput(**res)
236
+ except ValidationError as ve:
237
+ # If downstream returns bad JSON, surface as 502 with details.
238
+ raise HTTPException(status_code=502, detail=f"Bad AI JSON schema: {ve}")
239
+
240
+ async def _send_tts(text: str) -> Optional[str]:
241
+ if not TTS_URL or not text:
242
+ return None
243
+ try:
244
+ res = await _safe_post_json(f"{TTS_URL}/speak", {"text": text})
245
+ # HF Space fastapi static path helper
246
+ audio_path = res.get("audio_path")
247
+ if not audio_path:
248
+ return None
249
+ # Convert to absolute URL the browser can open
250
+ base = TTS_URL.rstrip("/")
251
+ name = audio_path.split("/")[-1]
252
+ return f"{base}/file/{name}"
253
+ except Exception:
254
+ return None
255
+
256
+ @app.post("/code_help", response_model=CodeHelpOut)
257
+ async def code_help(x: CodeHelpIn):
258
+ # 1) enforce budgets / shrink context
259
+ t2, m2, used_mem, used_prompt = _enforce_budgets(x.telemetry, x.memory)
260
+
261
+ # 2) build compact contract for Python AI (as agreed)
262
+ py_in = {
263
+ "intent": x.utterance,
264
+ "file": t2.file,
265
+ "lang": t2.lang,
266
+ "cursor": {"l": t2.cursor.l, "c": t2.cursor.c},
267
+ "viewport": {"start": t2.viewport.start, "end": t2.viewport.end, "text": t2.viewport.text},
268
+ "diag": [{"l": d.l, "sev": d.sev, "msg": d.msg} for d in t2.diag],
269
+ "term": t2.term,
270
+ "mem": {"short": m2.short, "sess": m2.sess, "proj": m2.proj}
271
+ }
272
+
273
+ # 3) call Python AI (async)
274
+ ai_out = await _route_python_ai(py_in)
275
+
276
+ # 4) send short voice summary in parallel (explanation only)
277
+ tts_url = await _send_tts(ai_out.explanation)
278
+
279
+ # 5) respond
280
+ return CodeHelpOut(
281
+ ai=ai_out,
282
+ tts_audio_url=tts_url,
283
+ used_memory_bytes=used_mem,
284
+ used_prompt_bytes=used_prompt,
285
+ notes={"response_mode": x.response_mode}
286
+ )