nxdev-org commited on
Commit
f6de22c
Β·
1 Parent(s): e0d7edf

update api

Browse files
Files changed (1) hide show
  1. api_server.py +964 -198
api_server.py CHANGED
@@ -1,54 +1,54 @@
1
  #!/usr/bin/env python3
2
  """
3
- api_server.py β€” High-Performance OpenAI-Compatible MQTT Proxy (Multi-Worker Edition)
4
 
5
- Features:
6
- - Full OpenAI Chat Completion API support.
7
- - Multi-Worker Discovery: Lists every active browser tab as a unique model.
8
- - Intelligent Routing: Routes requests to specific workers or load-balances across ready ones.
9
- - Enhanced Session Isolation: Handles per-tab worker sessions (Zen v9.5).
10
  """
11
 
12
- import json
13
- import time
14
- import uuid
15
- import asyncio
16
- import logging
17
- import os
18
- from typing import Optional, List, Dict, Any, Union, Literal, AsyncGenerator
19
  from contextlib import asynccontextmanager
20
 
21
- from fastapi.responses import HTMLResponse
22
  from fastapi import FastAPI, HTTPException, Request
23
- from fastapi.responses import StreamingResponse, JSONResponse
24
  from fastapi.middleware.cors import CORSMiddleware
25
  from pydantic import BaseModel
26
  import paho.mqtt.client as mqtt
27
  from paho.mqtt.client import CallbackAPIVersion
28
  import uvicorn
29
 
30
- # ============================================================
31
  # CONFIGURATION
32
- # ============================================================
 
33
  class Config:
34
- BROKER_HOST = os.getenv("MQTT_BROKER_HOST", "nxdev-org-mqtt-broker.hf.space")
35
- BROKER_PORT = int(os.getenv("MQTT_BROKER_PORT", "443"))
36
- USE_TLS = os.getenv("MQTT_USE_TLS", "true").lower() in ("1", "true", "yes")
37
- WS_PATH = os.getenv("MQTT_WS_PATH", "/mqtt")
38
-
39
- API_HOST = "0.0.0.0"
40
- API_PORT = 8001
41
-
42
- TIMEOUT_SEC = 120.0
43
- SESSION_EXPIRY = 30.0 # Workers must heartbeat every 2s, 30s is generous
 
 
44
 
45
  config = Config()
46
- logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(message)s")
47
- logger = logging.getLogger("openai-proxy")
 
 
 
48
 
49
- # ============================================================
50
- # MODELS
51
- # ============================================================
52
 
53
  class ChatMessage(BaseModel):
54
  role: str
@@ -76,258 +76,1024 @@ class ChatCompletionChunk(BaseModel):
76
  model: str
77
  choices: List[ChoiceChunk]
78
 
79
- # ============================================================
80
- # MQTT BRIDGE ENGINE
81
- # ============================================================
82
 
83
- class OpenAIProxyEngine:
84
  def __init__(self):
85
  self.client_id = f"proxy-{uuid.uuid4().hex[:8]}"
86
- self.workers: Dict[str, Dict] = {} # sid -> {model, status, last_seen, host}
87
  self._queues: Dict[str, asyncio.Queue] = {}
88
  self._loop: Optional[asyncio.AbstractEventLoop] = None
89
-
90
- # Paho MQTT Setup
 
 
 
 
 
 
 
 
 
 
 
91
  self.mqtt = mqtt.Client(
92
  callback_api_version=CallbackAPIVersion.VERSION2,
93
  client_id=self.client_id,
94
- transport="websockets"
95
  )
96
  if config.USE_TLS:
97
  self.mqtt.tls_set()
98
- self.mqtt.ws_set_options(path=config.WS_PATH, headers={"Sec-WebSocket-Protocol": "mqtt"})
99
-
100
- self.mqtt.on_connect = self._on_connect
101
- self.mqtt.on_message = self._on_message
 
 
 
 
102
 
103
- def set_loop(self, loop):
104
- self._loop = loop
105
 
106
  def _on_connect(self, client, userdata, flags, rc, props=None):
107
  if rc == 0:
108
- logger.info("βœ… Proxy connected to MQTT broker")
 
 
109
  client.subscribe("arena-ai/+/response")
110
  client.subscribe("arena-ai/global/heartbeat")
111
- client.publish("arena-ai/global/discovery", "ping")
112
  else:
113
- logger.error(f"❌ MQTT Connection failed: {rc}")
 
 
 
 
 
114
 
115
  def _on_message(self, client, userdata, msg):
116
  try:
117
- topic = msg.topic
118
  payload = json.loads(msg.payload.decode())
119
 
120
  if topic == "arena-ai/global/heartbeat":
121
  sid = payload.get("id")
122
  if sid:
123
- self.workers[sid] = {
124
- "last_seen": time.time(),
125
- "model": payload.get("model", "AI-Worker"),
126
- "status": payload.get("status", "ready"),
127
- "host": payload.get("host", "unknown")
128
- }
 
 
129
  return
130
 
131
  if topic.endswith("/response"):
132
  rid = payload.get("id")
133
- if rid in self._queues and self._loop:
 
134
  self._loop.call_soon_threadsafe(self._queues[rid].put_nowait, payload)
135
- except Exception as e:
136
- logger.error(f"Error processing MQTT message: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
137
 
138
- def get_active_workers(self):
139
  now = time.time()
140
- active = {}
141
- for sid, info in list(self.workers.items()):
142
- if now - info["last_seen"] < config.SESSION_EXPIRY:
143
- active[sid] = info
144
- else:
145
- del self.workers[sid]
146
- return active
147
 
148
  async def chat(self, req: ChatCompletionRequest) -> AsyncGenerator[Dict, None]:
 
 
 
149
  active = self.get_active_workers()
150
- target_sid = None
151
-
152
- # 1. Try exact SID match
153
- if req.model in active:
154
- target_sid = req.model
155
- # 2. Try "Model:SID" format match
156
- elif ":" in req.model:
157
- parts = req.model.split(":")
158
- potential_sid = parts[-1]
159
- if potential_sid in active:
160
- target_sid = potential_sid
161
-
162
- # 3. Fallback: Find worker by model name
163
- if not target_sid:
164
- candidates = [sid for sid, info in active.items() if info["model"] == req.model and info["status"] == "ready"]
165
- if candidates:
166
- target_sid = candidates[0]
167
-
168
- # 4. Final Fallback: First ready worker
169
- if not target_sid:
170
- ready = [sid for sid, info in active.items() if info["status"] == "ready"]
171
- if not ready:
172
- logger.error("❌ No active Zen workers available")
173
- raise HTTPException(status_code=503, detail="No active Zen Bridge workers found")
174
- target_sid = ready[0]
175
-
176
- req_id = f"req-{uuid.uuid4().hex[:12]}"
177
- q = asyncio.Queue()
178
- self._queues[req_id] = q
179
-
180
- mqtt_payload = {
181
- "id": req_id,
182
- "messages": [m.model_dump() for m in req.messages],
183
- "stream": req.stream,
184
- "temperature": req.temperature
185
- }
186
 
187
- logger.info(f"πŸ“€ [OpenAI] Start {req_id} -> Worker {target_sid} ({active[target_sid]['model']})")
 
 
 
 
 
 
 
 
 
 
 
188
 
189
  try:
190
- self.mqtt.publish(f"arena-ai/{target_sid}/request", json.dumps(mqtt_payload), qos=1)
191
-
192
- start = time.time()
193
- chunk_count = 0
194
  while True:
195
- if time.time() - start > config.TIMEOUT_SEC:
196
- logger.warning(f"⏰ {req_id} timed out")
197
- raise asyncio.TimeoutError()
198
-
199
- chunk = await q.get()
200
- chunk_count += 1
 
 
201
  yield chunk
202
-
203
  choices = chunk.get("choices", [])
204
- is_done = False
205
- if choices and choices[0].get("finish_reason"):
206
- is_done = True
207
- elif chunk.get("object") == "chat.completion":
208
- is_done = True
209
-
210
- if is_done:
211
- while not q.empty():
212
- extra = q.get_nowait()
213
- yield extra
214
- chunk_count += 1
215
- logger.info(f"βœ… [OpenAI] End {req_id} ({chunk_count} chunks)")
216
  break
 
 
 
 
 
 
217
  finally:
218
- if req_id in self._queues:
219
- del self._queues[req_id]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
 
221
- # ============================================================
222
- # API SERVER
223
- # ============================================================
 
 
 
 
 
 
224
 
225
- engine = OpenAIProxyEngine()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
226
 
227
  @asynccontextmanager
228
  async def lifespan(app: FastAPI):
229
  engine.set_loop(asyncio.get_running_loop())
230
  engine.mqtt.connect(config.BROKER_HOST, config.BROKER_PORT)
231
  engine.mqtt.loop_start()
 
232
  yield
233
  engine.mqtt.loop_stop()
234
  engine.mqtt.disconnect()
235
 
236
- app = FastAPI(title="Zen OpenAI Proxy", lifespan=lifespan)
237
-
238
- app.add_middleware(
239
- CORSMiddleware,
240
- allow_origins=["*"],
241
- allow_methods=["*"],
242
- allow_headers=["*"],
243
- )
244
 
 
245
 
246
  @app.get("/", response_class=HTMLResponse)
247
- async def index():
248
- return "High-Performance OpenAI-Compatible MQTT Proxy (Multi-Worker Edition)"
 
 
 
 
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
 
251
  @app.get("/v1/models")
252
- async def models():
253
  active = engine.get_active_workers()
254
- data = []
255
-
256
- # Add a generic "auto" model
257
- data.append({"id": "auto", "object": "model", "owned_by": "zen-bridge"})
258
-
259
  for sid, info in active.items():
260
- # Represent each session as a model: "ModelName:SID"
261
- model_id = f"{info['model']}:{sid}"
262
  data.append({
263
- "id": model_id,
264
  "object": "model",
265
- "owned_by": "zen-bridge",
266
- "meta": {
267
- "sid": sid,
268
- "status": info["status"],
269
- "host": info["host"]
270
- }
271
  })
272
  return {"object": "list", "data": data}
273
 
274
  @app.post("/v1/chat/completions")
275
- async def chat(req: ChatCompletionRequest, request: Request):
276
  chat_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
277
- created = int(time.time())
278
 
279
  if req.stream:
280
- async def stream_gen():
281
  try:
282
  async for chunk in engine.chat(req):
283
  if await request.is_disconnected():
284
  break
285
-
286
  choices = chunk.get("choices", [])
287
- if not choices: continue
288
- delta_data = choices[0].get("delta", {}) or choices[0].get("message", {})
289
-
290
  resp = ChatCompletionChunk(
291
  id=chat_id, created=created, model=req.model,
292
  choices=[ChoiceChunk(
293
  delta=ChoiceDelta(
294
- content=delta_data.get("content"),
295
- reasoning_content=delta_data.get("reasoning_content")
296
  ),
297
- finish_reason=choices[0].get("finish_reason")
298
- )]
299
  )
300
  yield f"data: {resp.model_dump_json(exclude_none=True)}\n\n"
301
-
302
  if not await request.is_disconnected():
303
  yield "data: [DONE]\n\n"
304
- except Exception as e:
305
- logger.error(f"Stream Error: {e}")
306
- yield f"data: {json.dumps({'error': str(e)})}\n\n"
307
-
308
- return StreamingResponse(stream_gen(), media_type="text/event-stream")
309
-
310
- else:
311
- full_content = ""
312
- full_reasoning = ""
313
- async for chunk in engine.chat(req):
314
- choices = chunk.get("choices", [])
315
- if not choices: continue
316
- delta_data = choices[0].get("delta", {}) or choices[0].get("message", {})
317
- full_content += delta_data.get("content", "") or ""
318
- full_reasoning += delta_data.get("reasoning_content", "") or ""
319
-
320
- return {
321
- "id": chat_id, "object": "chat.completion", "created": created, "model": req.model,
322
- "choices": [{
323
- "message": {
324
- "role": "assistant",
325
- "content": full_content,
326
- "reasoning_content": full_reasoning if full_reasoning else None
327
- },
328
- "finish_reason": "stop", "index": 0
329
- }]
330
- }
 
 
 
 
 
 
 
 
 
331
 
332
  if __name__ == "__main__":
333
- uvicorn.run(app, host=config.API_HOST, port=config.API_PORT)
 
1
  #!/usr/bin/env python3
2
  """
3
+ api_server.py β€” OpenAI-Compatible MQTT Proxy v3.0
4
 
5
+ β€’ Modern chat UI with markdown, code highlighting, thinking blocks
6
+ β€’ Admin debug console with live connection / worker / stats monitoring
7
+ β€’ Pressure-aware load balancing across browser-tab workers
8
+ β€’ Robust SSE streaming with proper chunk buffering
 
9
  """
10
 
11
+ import json, time, uuid, asyncio, logging, os
12
+ from collections import deque
13
+ from typing import Optional, List, Dict, AsyncGenerator
 
 
 
 
14
  from contextlib import asynccontextmanager
15
 
 
16
  from fastapi import FastAPI, HTTPException, Request
17
+ from fastapi.responses import HTMLResponse, StreamingResponse, JSONResponse
18
  from fastapi.middleware.cors import CORSMiddleware
19
  from pydantic import BaseModel
20
  import paho.mqtt.client as mqtt
21
  from paho.mqtt.client import CallbackAPIVersion
22
  import uvicorn
23
 
24
+ # ════════════════════════════════════════════════════════════
25
  # CONFIGURATION
26
+ # ════════════════════════════════════════════════════════════
27
+
28
  class Config:
29
+ BROKER_HOST = os.getenv("MQTT_BROKER_HOST", os.getenv("BROKER_HOST", "127.0.0.1"))
30
+ BROKER_PORT = int(os.getenv("MQTT_BROKER_PORT", os.getenv("BROKER_PORT", "1883")))
31
+ USE_TLS = os.getenv("MQTT_USE_TLS", "false").lower() in ("1", "true", "yes")
32
+ WS_PATH = os.getenv("MQTT_WS_PATH", "/mqtt")
33
+ WS_TRANSPORT = os.getenv("MQTT_TRANSPORT",
34
+ "websockets" if int(os.getenv("MQTT_BROKER_PORT",
35
+ os.getenv("BROKER_PORT", "1883"))) in [80, 443, 7860] else "tcp")
36
+ API_HOST = "0.0.0.0"
37
+ API_PORT = 8001
38
+ TIMEOUT_SEC = 180.0
39
+ SESSION_EXPIRY = 45.0
40
+ DEBUG_MODE = os.getenv("DEBUG_MODE", "false").lower() in ("1", "true", "yes")
41
 
42
  config = Config()
43
+ logging.basicConfig(
44
+ level=logging.DEBUG if config.DEBUG_MODE else logging.INFO,
45
+ format="%(asctime)s [%(levelname)s] %(message)s",
46
+ )
47
+ logger = logging.getLogger("zen-proxy")
48
 
49
+ # ════════════════════════════════════════════════════════════
50
+ # PYDANTIC MODELS
51
+ # ════════════════════════════════════════════════════════════
52
 
53
  class ChatMessage(BaseModel):
54
  role: str
 
76
  model: str
77
  choices: List[ChoiceChunk]
78
 
79
+ # ════════════════════════════════════════════════════════════
80
+ # MQTT PROXY ENGINE
81
+ # ════════════════════════════════════════════════════════════
82
 
83
+ class ProxyEngine:
84
  def __init__(self):
85
  self.client_id = f"proxy-{uuid.uuid4().hex[:8]}"
86
+ self.workers: Dict[str, Dict] = {}
87
  self._queues: Dict[str, asyncio.Queue] = {}
88
  self._loop: Optional[asyncio.AbstractEventLoop] = None
89
+ self.connected = False
90
+ self.activity_log: deque = deque(maxlen=200)
91
+
92
+ self.stats = dict(
93
+ start_time=time.time(),
94
+ total_requests=0,
95
+ active_streams=0,
96
+ completed=0,
97
+ failed=0,
98
+ total_chunks=0,
99
+ heartbeats_rx=0,
100
+ )
101
+
102
  self.mqtt = mqtt.Client(
103
  callback_api_version=CallbackAPIVersion.VERSION2,
104
  client_id=self.client_id,
105
+ transport=config.WS_TRANSPORT,
106
  )
107
  if config.USE_TLS:
108
  self.mqtt.tls_set()
109
+ if config.WS_TRANSPORT == "websockets":
110
+ self.mqtt.ws_set_options(
111
+ path=config.WS_PATH,
112
+ headers={"Sec-WebSocket-Protocol": "mqtt"},
113
+ )
114
+ self.mqtt.on_connect = self._on_connect
115
+ self.mqtt.on_message = self._on_message
116
+ self.mqtt.on_disconnect = self._on_disconnect
117
 
118
+ # ── MQTT callbacks (run in paho thread) ──────────────────
 
119
 
120
  def _on_connect(self, client, userdata, flags, rc, props=None):
121
  if rc == 0:
122
+ self.connected = True
123
+ logger.info("βœ… MQTT connected (%s:%s %s)",
124
+ config.BROKER_HOST, config.BROKER_PORT, config.WS_TRANSPORT)
125
  client.subscribe("arena-ai/+/response")
126
  client.subscribe("arena-ai/global/heartbeat")
127
+ self._log("system", "mqtt/connect", "Connected to broker")
128
  else:
129
+ logger.error("❌ MQTT connect failed rc=%s", rc)
130
+
131
+ def _on_disconnect(self, client, userdata, flags, rc, props=None):
132
+ self.connected = False
133
+ logger.warning("⚠️ MQTT disconnected rc=%s β€” will auto-reconnect", rc)
134
+ self._log("system", "mqtt/disconnect", f"Disconnected rc={rc}")
135
 
136
  def _on_message(self, client, userdata, msg):
137
  try:
138
+ topic = msg.topic
139
  payload = json.loads(msg.payload.decode())
140
 
141
  if topic == "arena-ai/global/heartbeat":
142
  sid = payload.get("id")
143
  if sid:
144
+ self.workers[sid] = dict(
145
+ last_seen=time.time(),
146
+ model=payload.get("model", "AI-Worker"),
147
+ status=payload.get("status", "ready"),
148
+ pressure=payload.get("pressure", 0),
149
+ )
150
+ self.stats["heartbeats_rx"] += 1
151
+ self._log("heartbeat", topic, f"{sid} p={payload.get('pressure',0)}")
152
  return
153
 
154
  if topic.endswith("/response"):
155
  rid = payload.get("id")
156
+ if rid and rid in self._queues and self._loop:
157
+ self.stats["total_chunks"] += 1
158
  self._loop.call_soon_threadsafe(self._queues[rid].put_nowait, payload)
159
+ self._log("response", topic, f"{rid}")
160
+ except Exception as exc:
161
+ logger.error("Message parse error: %s", exc)
162
+
163
+ # ── helpers ──────────────────────────────────────────────
164
+
165
+ def _log(self, kind: str, topic: str, summary: str):
166
+ self.activity_log.append(dict(
167
+ ts=time.time(),
168
+ time=time.strftime("%H:%M:%S"),
169
+ kind=kind,
170
+ topic=topic,
171
+ summary=summary,
172
+ ))
173
+
174
+ def set_loop(self, loop):
175
+ self._loop = loop
176
 
177
+ def get_active_workers(self) -> Dict[str, Dict]:
178
  now = time.time()
179
+ expired = [s for s, i in self.workers.items()
180
+ if now - i["last_seen"] >= config.SESSION_EXPIRY]
181
+ for s in expired:
182
+ del self.workers[s]
183
+ return dict(self.workers)
184
+
185
+ # ── core chat generator ──────────────────────────────────
186
 
187
  async def chat(self, req: ChatCompletionRequest) -> AsyncGenerator[Dict, None]:
188
+ self.stats["total_requests"] += 1
189
+ self.stats["active_streams"] += 1
190
+
191
  active = self.get_active_workers()
192
+ target = None
193
+
194
+ # direct model:sid routing
195
+ if ":" in req.model:
196
+ sid = req.model.rsplit(":", 1)[-1]
197
+ if sid in active:
198
+ target = sid
199
+
200
+ # least-pressure routing
201
+ if not target:
202
+ cands = [(s, i) for s, i in active.items()
203
+ if (req.model in i["model"] or req.model == "auto")
204
+ and i["status"] == "ready"]
205
+ if cands:
206
+ cands.sort(key=lambda x: x[1]["pressure"])
207
+ target = cands[0][0]
208
+
209
+ if not target:
210
+ self.stats["active_streams"] -= 1
211
+ self.stats["failed"] += 1
212
+ raise HTTPException(503, "No active workers. Open a Zen Bridge tab.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
213
 
214
+ rid = f"req-{uuid.uuid4().hex[:12]}"
215
+ q: asyncio.Queue = asyncio.Queue()
216
+ self._queues[rid] = q
217
+
218
+ mqtt_payload = dict(
219
+ id=rid,
220
+ messages=[m.model_dump() for m in req.messages],
221
+ stream=req.stream,
222
+ temperature=req.temperature,
223
+ )
224
+ logger.info("πŸ“€ %s β†’ %s (%s)", rid, active[target]["model"], target)
225
+ self._log("request", f"arena-ai/{target}/request", rid)
226
 
227
  try:
228
+ self.mqtt.publish(
229
+ f"arena-ai/{target}/request", json.dumps(mqtt_payload), qos=1
230
+ )
231
+ deadline = time.time() + config.TIMEOUT_SEC
232
  while True:
233
+ remaining = deadline - time.time()
234
+ if remaining <= 0:
235
+ self.stats["failed"] += 1
236
+ raise HTTPException(504, "Worker response timeout")
237
+ try:
238
+ chunk = await asyncio.wait_for(q.get(), timeout=min(remaining, 30))
239
+ except asyncio.TimeoutError:
240
+ continue
241
  yield chunk
 
242
  choices = chunk.get("choices", [])
243
+ if (choices and choices[0].get("finish_reason")) \
244
+ or chunk.get("object") == "chat.completion":
245
+ self.stats["completed"] += 1
 
 
 
 
 
 
 
 
 
246
  break
247
+ except HTTPException:
248
+ raise
249
+ except Exception as exc:
250
+ self.stats["failed"] += 1
251
+ logger.error("Chat error: %s", exc)
252
+ raise HTTPException(502, str(exc))
253
  finally:
254
+ self.stats["active_streams"] -= 1
255
+ self._queues.pop(rid, None)
256
+
257
+
258
+ engine = ProxyEngine()
259
+
260
+ # ════════════════════════════════════════════════════════════
261
+ # HTML β€” Landing Page
262
+ # ════════════════════════════════════════════════════════════
263
+
264
+ LANDING_HTML = """<!DOCTYPE html>
265
+ <html lang="en"><head><meta charset="UTF-8"><meta name="viewport" content="width=device-width,initial-scale=1">
266
+ <title>Zen AI Proxy</title>
267
+ <style>
268
+ *{box-sizing:border-box;margin:0;padding:0}
269
+ body{font-family:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;
270
+ background:#050505;color:#e4e4e7;display:flex;align-items:center;justify-content:center;min-height:100vh}
271
+ .wrap{text-align:center;max-width:480px;width:100%;padding:20px}
272
+ h1{font-size:2rem;margin-bottom:6px}
273
+ .sub{color:#71717a;margin-bottom:36px;font-size:.95rem}
274
+ .cards{display:flex;gap:16px;justify-content:center;flex-wrap:wrap}
275
+ a.c{display:block;background:#111;border:1px solid #222;border-radius:14px;
276
+ padding:32px 44px;text-decoration:none;color:#e4e4e7;transition:.2s}
277
+ a.c:hover{border-color:#3b82f6;background:#0a0f1a;transform:translateY(-2px)}
278
+ .ci{font-size:2rem;margin-bottom:10px}
279
+ .ct{font-weight:600;font-size:1.05rem}
280
+ .cd{color:#71717a;font-size:.8rem;margin-top:4px}
281
+ #st{margin-top:36px;font-size:.8rem;color:#52525b}
282
+ .d{display:inline-block;width:8px;height:8px;border-radius:50%;margin-right:6px;vertical-align:middle}
283
+ .d.on{background:#22c55e;box-shadow:0 0 8px #22c55e}
284
+ .d.off{background:#ef4444;box-shadow:0 0 8px #ef4444}
285
+ </style></head><body>
286
+ <div class="wrap">
287
+ <h1>⚑ Zen AI Proxy</h1>
288
+ <p class="sub">OpenAI-Compatible MQTT Bridge</p>
289
+ <div class="cards">
290
+ <a class="c" href="/chat"><div class="ci">πŸ’¬</div><div class="ct">Chat</div><div class="cd">AI chat interface</div></a>
291
+ <a class="c" href="/admin"><div class="ci">πŸ”§</div><div class="ct">Admin</div><div class="cd">Debug &amp; monitoring</div></a>
292
+ </div>
293
+ <div id="st">checking…</div>
294
+ </div>
295
+ <script>
296
+ fetch('/admin/api/stats').then(r=>r.json()).then(d=>{
297
+ const on=d.connection.connected;
298
+ document.getElementById('st').innerHTML=
299
+ `<span class="d ${on?'on':'off'}"></span>${on?'Connected':'Disconnected'} Β· `+
300
+ `${d.workers.count} worker(s) Β· ${d.stats.total_requests} requests`;
301
+ }).catch(()=>{document.getElementById('st').innerHTML='<span class="d off"></span>API unreachable'});
302
+ </script></body></html>"""
303
+
304
+ # ════════════════════════════════════════════════════════════
305
+ # HTML β€” Modern Chat UI
306
+ # ════════════════════════════════════════════════════════════
307
+
308
+ CHAT_HTML = r"""<!DOCTYPE html>
309
+ <html lang="en"><head><meta charset="UTF-8">
310
+ <meta name="viewport" content="width=device-width,initial-scale=1">
311
+ <title>Zen AI Chat</title>
312
+
313
+ <!-- markdown + highlight + sanitize -->
314
+ <script src="https://cdn.jsdelivr.net/npm/marked/marked.min.js"></script>
315
+ <link href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/github-dark-dimmed.min.css" rel="stylesheet">
316
+ <script src="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/highlight.min.js"></script>
317
+ <script src="https://cdn.jsdelivr.net/npm/dompurify@3/dist/purify.min.js"></script>
318
+
319
+ <style>
320
+ /* ── reset & vars ─────────────────────────────── */
321
+ *{box-sizing:border-box;margin:0;padding:0}
322
+ :root{
323
+ --bg0:#0a0a0a;--bg1:#111113;--bg2:#1a1a1d;--bg3:#232326;
324
+ --tx:#e4e4e7;--tx2:#a1a1aa;--tx3:#52525b;
325
+ --accent:#3b82f6;--accent2:#2563eb;
326
+ --border:#27272a;--green:#22c55e;--red:#ef4444;--amber:#ca8a04;
327
+ --radius:12px;--maxW:780px;
328
+ --font:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;
329
+ --mono:'SF Mono',SFMono-Regular,Consolas,'Liberation Mono',Menlo,monospace;
330
+ }
331
+
332
+ [data-theme="light"] {
333
+ --bg0:#ffffff;--bg1:#f4f4f5;--bg2:#e4e4e7;--bg3:#d4d4d8;
334
+ --tx:#18181b;--tx2:#3f3f46;--tx3:#71717a;
335
+ --accent:#2563eb;--accent2:#1d4ed8;
336
+ --border:#d4d4d8;--green:#16a34a;--red:#dc2626;--amber:#a16207;
337
+ }
338
+ [data-theme="light"] .msg-body code{background:#e4e4e7}
339
+ [data-theme="light"] .msg-body pre{background:#f4f4f5;border-color:#d4d4d8}
340
+ [data-theme="light"] .code-top{background:#e4e4e7;border-color:#d4d4d8}
341
+ [data-theme="light"] .msg-body strong{color:#18181b}
342
+ [data-theme="light"] .msg-body h1,
343
+ [data-theme="light"] .msg-body h2,
344
+ [data-theme="light"] .msg-body h3{color:#18181b}
345
+ html,body{height:100%}
346
+ body{font-family:var(--font);background:var(--bg0);color:var(--tx);display:flex;flex-direction:column;overflow:hidden}
347
+
348
+ /* ── header ───────────────────────────────────── */
349
+ .hdr{display:flex;align-items:center;justify-content:space-between;
350
+ padding:10px 20px;background:var(--bg1);border-bottom:1px solid var(--border);flex-shrink:0;gap:12px}
351
+ .hdr-l{display:flex;align-items:center;gap:12px}
352
+ .logo{font-weight:700;font-size:1.05rem;white-space:nowrap}
353
+ .logo span{color:var(--accent)}
354
+ select.ms{background:var(--bg2);color:var(--tx);border:1px solid var(--border);
355
+ border-radius:8px;padding:6px 10px;font-size:.82rem;outline:none;max-width:220px}
356
+ .hdr-r{display:flex;gap:6px}
357
+ .ib{background:transparent;border:1px solid var(--border);color:var(--tx2);border-radius:8px;
358
+ padding:6px 12px;font-size:.78rem;cursor:pointer;transition:.15s;text-decoration:none;white-space:nowrap}
359
+ .ib:hover{background:var(--bg2);color:var(--tx)}
360
+ #connDot{display:inline-block;width:7px;height:7px;border-radius:50%;margin-right:5px;vertical-align:middle;
361
+ background:var(--red);box-shadow:0 0 6px var(--red)}
362
+ #connDot.on{background:var(--green);box-shadow:0 0 6px var(--green)}
363
+
364
+ /* ── chat area ────────────────────────────────── */
365
+ .chat{flex:1;overflow-y:auto;padding:24px 20px 12px}
366
+ .msgs{max-width:var(--maxW);margin:0 auto}
367
+ .msg{margin-bottom:28px;animation:fadeUp .25s ease}
368
+ @keyframes fadeUp{from{opacity:0;transform:translateY(6px)}to{opacity:1;transform:none}}
369
+ .msg-hdr{display:flex;align-items:center;gap:8px;margin-bottom:6px}
370
+ .role{font-weight:600;font-size:.85rem}
371
+ .role-u{color:var(--accent)}.role-a{color:var(--green)}
372
+ .msg-body{font-size:.92rem;line-height:1.75;color:var(--tx);overflow-wrap:break-word}
373
+
374
+ /* markdown prose */
375
+ .msg-body p{margin-bottom:10px}.msg-body p:last-child{margin-bottom:0}
376
+ .msg-body ul,.msg-body ol{margin:8px 0 8px 20px}
377
+ .msg-body li{margin-bottom:4px}
378
+ .msg-body blockquote{border-left:3px solid var(--border);padding-left:14px;color:var(--tx2);margin:10px 0}
379
+ .msg-body a{color:var(--accent)}
380
+ .msg-body strong{color:#fff}
381
+ .msg-body h1,.msg-body h2,.msg-body h3{margin:18px 0 8px;color:#fff}
382
+ .msg-body table{border-collapse:collapse;margin:10px 0;width:100%}
383
+ .msg-body th,.msg-body td{border:1px solid var(--border);padding:6px 10px;font-size:.85rem;text-align:left}
384
+ .msg-body th{background:var(--bg2)}
385
+
386
+ /* code */
387
+ .msg-body code{background:var(--bg2);padding:2px 6px;border-radius:4px;font-size:.83em;font-family:var(--mono)}
388
+ .msg-body pre{position:relative;background:var(--bg1);border:1px solid var(--border);border-radius:8px;
389
+ margin:12px 0;overflow:hidden}
390
+ .msg-body pre code{display:block;padding:16px;background:transparent;font-size:.82rem;overflow-x:auto;line-height:1.55}
391
+ .code-top{display:flex;justify-content:space-between;align-items:center;
392
+ padding:6px 12px;background:var(--bg2);border-bottom:1px solid var(--border);font-size:.72rem;color:var(--tx3)}
393
+ .cp-btn{background:none;border:1px solid var(--border);color:var(--tx3);border-radius:4px;
394
+ padding:2px 8px;font-size:.72rem;cursor:pointer;transition:.15s}
395
+ .cp-btn:hover{color:var(--tx);border-color:var(--tx3)}
396
+
397
+ /* thinking block */
398
+ .think{border-left:3px solid var(--amber);background:rgba(202,138,4,.06);
399
+ padding:10px 14px;margin:6px 0 10px;border-radius:0 8px 8px 0}
400
+ .think-tog{display:flex;align-items:center;gap:6px;cursor:pointer;font-size:.78rem;
401
+ color:var(--amber);user-select:none}
402
+ .think-tog svg{transition:transform .2s}
403
+ .think-tog.open svg{transform:rotate(90deg)}
404
+ .think-body{margin-top:8px;font-size:.82rem;color:var(--tx2);line-height:1.6;white-space:pre-wrap;
405
+ max-height:300px;overflow-y:auto;display:none}
406
+ .think-tog.open+.think-body{display:block}
407
+
408
+ /* welcome */
409
+ .welcome{text-align:center;padding:80px 20px;color:var(--tx3)}
410
+ .welcome h2{font-size:1.4rem;color:var(--tx2);margin-bottom:6px;font-weight:600}
411
+ .welcome p{font-size:.9rem}
412
+
413
+ /* loading dots */
414
+ .ld span{display:inline-block;animation:bk 1.4s infinite both;font-size:1.2rem;color:var(--tx3)}
415
+ .ld span:nth-child(2){animation-delay:.2s}.ld span:nth-child(3){animation-delay:.4s}
416
+ @keyframes bk{0%,80%,100%{opacity:.15}40%{opacity:1}}
417
+
418
+ /* error */
419
+ .err{color:var(--red);font-size:.88rem;padding:10px 14px;background:rgba(239,68,68,.08);
420
+ border:1px solid rgba(239,68,68,.2);border-radius:8px;margin-top:6px}
421
+
422
+ /* ── input area ───────────────────────────────── */
423
+ .inp{padding:12px 20px 20px;background:var(--bg0);border-top:1px solid var(--border);flex-shrink:0}
424
+ .inp-c{max-width:var(--maxW);margin:0 auto}
425
+ .inp-w{display:flex;align-items:flex-end;background:var(--bg1);border:1px solid var(--border);
426
+ border-radius:var(--radius);padding:10px 14px;transition:border-color .2s}
427
+ .inp-w:focus-within{border-color:var(--accent)}
428
+ textarea{flex:1;background:transparent;border:none;color:var(--tx);font-size:.92rem;line-height:1.5;
429
+ resize:none;outline:none;font-family:var(--font);max-height:180px;min-height:24px}
430
+ textarea::placeholder{color:var(--tx3)}
431
+ .send{background:var(--accent);color:#fff;border:none;border-radius:8px;width:36px;height:36px;
432
+ display:flex;align-items:center;justify-content:center;cursor:pointer;margin-left:8px;
433
+ flex-shrink:0;transition:.15s}
434
+ .send:hover{background:var(--accent2)}.send:disabled{opacity:.4;cursor:not-allowed}
435
+ .hint{text-align:center;margin-top:6px;font-size:.72rem;color:var(--tx3)}
436
+
437
+ /* scrollbar */
438
+ ::-webkit-scrollbar{width:5px}::-webkit-scrollbar-track{background:transparent}
439
+ ::-webkit-scrollbar-thumb{background:#333;border-radius:3px}
440
+
441
+ @media(max-width:600px){
442
+ .hdr{padding:8px 12px;gap:8px}.chat{padding:14px 10px}.inp{padding:8px 10px 14px}
443
+ .hdr-r .ib:not(:first-child){display:none}select.ms{max-width:140px}
444
+ }
445
+ </style>
446
+
447
+
448
+ <!-- Highlight.js CSS theme -->
449
+ <link rel="stylesheet"
450
+ href="https://cdnjs.cloudflare.com/ajax/libs/highlight.js/11.9.0/styles/default.min.css">
451
+
452
+ </head>
453
+ <body>
454
+
455
+ <!-- HEADER -->
456
+ <div class="hdr">
457
+ <div class="hdr-l">
458
+ <div class="logo">⚑ <span>Zen AI</span></div>
459
+ <select class="ms" id="mSel"><option value="auto">loading…</option></select>
460
+ <span id="connDot" title="Broker"></span>
461
+ </div>
462
+ <div class="hdr-r">
463
+ <button class="ib" onclick="clearChat()">πŸ—‘ Clear</button>
464
+ <a class="ib" href="/admin" target="_blank">πŸ”§ Admin</a>
465
+ <button class="ib" id="themeBtn" onclick="toggleTheme()">πŸŒ™</button>
466
+
467
+ </div>
468
+ </div>
469
+
470
+ <!-- MESSAGES -->
471
+ <div class="chat" id="chatScroll">
472
+ <div class="msgs" id="msgs">
473
+ <div class="welcome" id="wel"><h2>Zen AI Chat</h2><p>Select a model and start a conversation</p></div>
474
+ </div>
475
+ </div>
476
+
477
+ <!-- INPUT -->
478
+ <div class="inp">
479
+ <div class="inp-c">
480
+ <div class="inp-w">
481
+ <textarea id="ta" rows="1" placeholder="Message Zen AI…"></textarea>
482
+ <button class="send" id="sendBtn" title="Send">
483
+ <svg width="16" height="16" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round">
484
+ <path d="M14.5 1.5L6.5 9.5M14.5 1.5L10 14.5 6.5 9.5 1.5 6 14.5 1.5z"/>
485
+ </svg>
486
+ </button>
487
+ </div>
488
+ <div class="hint">Enter to send Β· Shift + Enter for new line</div>
489
+ </div>
490
+ </div>
491
+
492
+ <script>
493
+ const API = window.location.origin;
494
+ let history = [];
495
+ let busy = false;
496
+
497
+ /* ── theme toggle ───────────────────────────── */
498
+ function toggleTheme(){
499
+ const t = document.documentElement.getAttribute('data-theme') === 'light' ? '' : 'light';
500
+ document.documentElement.setAttribute('data-theme', t);
501
+ document.getElementById('themeBtn').textContent = t === 'light' ? 'β˜€οΈ' : 'πŸŒ™';
502
+ localStorage.setItem('zen-theme', t);
503
+ }
504
+ (function(){
505
+ const saved = localStorage.getItem('zen-theme');
506
+ if(saved === 'light'){
507
+ document.documentElement.setAttribute('data-theme','light');
508
+ document.addEventListener('DOMContentLoaded',()=>{
509
+ const b=document.getElementById('themeBtn'); if(b) b.textContent='β˜€οΈ';
510
+ });
511
+ }
512
+ })();
513
+
514
+ /* ── marked config ──────────────────────────── */
515
+ /* ── marked config (works with marked v12+) ─────── */
516
+ const renderer = new marked.Renderer();
517
+
518
+ renderer.code = function({ text, lang, escaped }) {
519
+ let hi;
520
+ const language = lang || 'code';
521
+ try {
522
+ hi = lang && hljs.getLanguage(lang)
523
+ ? hljs.highlight(text, { language: lang }).value
524
+ : hljs.highlightAuto(text).value;
525
+ } catch (e) {
526
+ // Escape HTML manually on fallback
527
+ hi = text.replace(/&/g,'&amp;').replace(/</g,'&lt;').replace(/>/g,'&gt;');
528
+ }
529
+ const safeLabel = language.replace(/</g, '&lt;');
530
+ return `<pre><div class="code-top"><span>${safeLabel}</span>`
531
+ + `<button class="cp-btn" data-copy>Copy</button></div>`
532
+ + `<code class="hljs">${hi}</code></pre>`;
533
+ };
534
+
535
+ marked.setOptions({ renderer, breaks: true, gfm: true });
536
+
537
+ function render(md) {
538
+ const html = marked.parse(md || '');
539
+ return DOMPurify.sanitize(html, {
540
+ ADD_TAGS: ['button'],
541
+ ADD_ATTR: ['data-copy', 'class']
542
+ });
543
+ }
544
+
545
+ /* ── copy-code via event delegation (survives DOMPurify) ── */
546
+ document.addEventListener('click', function(e) {
547
+ const btn = e.target.closest('[data-copy]');
548
+ if (!btn) return;
549
+ const code = btn.closest('pre').querySelector('code');
550
+ navigator.clipboard.writeText(code.textContent);
551
+ btn.textContent = 'Copied!';
552
+ setTimeout(() => btn.textContent = 'Copy', 1500);
553
+ });
554
+
555
+
556
+ /* ── helpers ─────────────────────────────────── */
557
+ const $=s=>document.getElementById(s);
558
+ const ta=$('ta'), msgs=$('msgs'), chatScroll=$('chatScroll');
559
+
560
+ ta.addEventListener('input',()=>{ta.style.height='auto';ta.style.height=Math.min(ta.scrollHeight,180)+'px';});
561
+ ta.addEventListener('keydown',e=>{if(e.key==='Enter'&&!e.shiftKey){e.preventDefault();send();}});
562
+ $('sendBtn').addEventListener('click',send);
563
+
564
+ function scroll(){chatScroll.scrollTop=chatScroll.scrollHeight;}
565
+
566
+ function cpCode(btn){
567
+ const code=btn.closest('pre').querySelector('code');
568
+ navigator.clipboard.writeText(code.textContent);
569
+ btn.textContent='Copied!';setTimeout(()=>btn.textContent='Copy',1500);
570
+ }
571
 
572
+ /* ── fetch models ────────────────────────────── */
573
+ async function fetchModels(){
574
+ try{
575
+ const r=await fetch(API+'/v1/models');const d=await r.json();
576
+ const s=$('mSel');const cur=s.value;s.innerHTML='';
577
+ d.data.forEach(m=>{const o=document.createElement('option');o.value=m.id;o.textContent=m.id;s.appendChild(o);});
578
+ if([...s.options].find(o=>o.value===cur))s.value=cur;
579
+ }catch(e){console.warn('model fetch failed',e);}
580
+ }
581
 
582
+ /* ── connection dot ──────────────────────────── */
583
+ async function checkConn(){
584
+ try{const r=await fetch(API+'/admin/api/stats');const d=await r.json();
585
+ $('connDot').className=d.connection.connected?'on':'';
586
+ }catch(e){$('connDot').className='';}
587
+ }
588
+
589
+ /* ── create message element ──────────────────── */
590
+ function mkMsg(role){
591
+ const w=document.createElement('div');w.className='msg';
592
+ const hdr=document.createElement('div');hdr.className='msg-hdr';
593
+ const r=document.createElement('span');r.className=`role role-${role[0]}`;
594
+ r.textContent=role==='user'?'You':'Assistant';
595
+ hdr.appendChild(r);
596
+ const body=document.createElement('div');body.className='msg-body';
597
+ w.appendChild(hdr);w.appendChild(body);
598
+ return{el:w,body};
599
+ }
600
+
601
+ /* ── clear ───────────────────────────────────── */
602
+ function clearChat(){
603
+ history=[];msgs.innerHTML='<div class="welcome" id="wel"><h2>Zen AI Chat</h2><p>Select a model and start a conversation</p></div>';
604
+ }
605
+
606
+ /* ── send message ────────────────────────────── */
607
+ async function send(){
608
+ if(busy)return;
609
+ const txt=ta.value.trim();if(!txt)return;
610
+ const wel=$('wel');if(wel)wel.remove();
611
+
612
+ // user msg
613
+ const u=mkMsg('user');u.body.textContent=txt;msgs.appendChild(u.el);
614
+ history.push({role:'user',content:txt});
615
+ ta.value='';ta.style.height='auto';scroll();
616
+
617
+ // assistant placeholder
618
+ const a=mkMsg('assistant');
619
+ a.body.innerHTML='<span class="ld"><span>●</span><span>●</span><span>●</span></span>';
620
+ msgs.appendChild(a.el);scroll();
621
+
622
+ busy=true;$('sendBtn').disabled=true;
623
+ const model=$('mSel').value||'auto';
624
+
625
+ try{
626
+ const res=await fetch(API+'/v1/chat/completions',{
627
+ method:'POST',headers:{'Content-Type':'application/json'},
628
+ body:JSON.stringify({model,messages:history,stream:true})
629
+ });
630
+ if(!res.ok){const e=await res.json().catch(()=>({}));throw new Error(e.detail||res.statusText);}
631
+
632
+ const reader=res.body.getReader(), dec=new TextDecoder();
633
+ let buf='',content='',thinking='',thinkEl=null,contentEl=null;
634
+
635
+ while(true){
636
+ const{done,value}=await reader.read();
637
+ if(done)break;
638
+ buf+=dec.decode(value,{stream:true});
639
+ const lines=buf.split('\n');buf=lines.pop()||'';
640
+
641
+ for(const ln of lines){
642
+ if(!ln.startsWith('data: '))continue;
643
+ const raw=ln.slice(6).trim();
644
+ if(raw==='[DONE]')continue;
645
+ let j;try{j=JSON.parse(raw);}catch(e){continue;}
646
+ if(j.error)throw new Error(j.error);
647
+ const d=j.choices&&j.choices[0]&&j.choices[0].delta;
648
+ if(!d)continue;
649
+
650
+ /* thinking */
651
+ if(d.reasoning_content){
652
+ thinking+=d.reasoning_content;
653
+ if(!thinkEl){
654
+ a.body.innerHTML='';
655
+ thinkEl=document.createElement('div');thinkEl.className='think';
656
+ thinkEl.innerHTML=`<div class="think-tog open" onclick="this.classList.toggle('open')">
657
+ <svg width="12" height="12" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2"><path d="M9 18l6-6-6-6"/></svg>
658
+ Thinking…</div><div class="think-body"></div>`;
659
+ a.body.appendChild(thinkEl);
660
+ }
661
+ //thinkEl.querySelector('.think-body').textContent=render(thinking);
662
+ //thinkEl.querySelector('.think-body').innerHTML=render(thinking);
663
+ thinkEl.querySelector('.think-body').innerHTML = render(thinking);
664
+ }
665
+
666
+ /* content */
667
+ if(d.content){
668
+ content+=d.content;
669
+ if(!contentEl){contentEl=document.createElement('div');contentEl.className='rendered';a.body.appendChild(contentEl);}
670
+ contentEl.innerHTML=render(content);
671
+ }
672
+ scroll();
673
+ }
674
+ }
675
+ // If nothing was rendered (empty response)
676
+ if(!content&&!thinking){a.body.innerHTML='<span style="color:var(--tx3)">(empty response)</span>';}
677
+ history.push({role:'assistant',content});
678
+ }catch(e){
679
+ a.body.innerHTML=`<div class="err">⚠ ${e.message}</div>`;
680
+ }finally{
681
+ busy=false;$('sendBtn').disabled=false;scroll();ta.focus();
682
+ }
683
+ }
684
+
685
+ /* ── init ────────────────────────────────────── */
686
+ fetchModels();checkConn();
687
+ setInterval(fetchModels,15000);
688
+ setInterval(checkConn,8000);
689
+ ta.focus();
690
+ </script>
691
+ </body></html>"""
692
+
693
+ # ════════════════════════════════════════════════════════════
694
+ # HTML β€” Admin Debug Console
695
+ # ════════════════════════════════════════════════════════════
696
+
697
+ ADMIN_HTML = r"""<!DOCTYPE html>
698
+ <html lang="en"><head><meta charset="UTF-8">
699
+ <meta name="viewport" content="width=device-width,initial-scale=1">
700
+ <title>Zen Admin Console</title>
701
+ <style>
702
+ *{box-sizing:border-box;margin:0;padding:0}
703
+ :root{--bg:#050505;--c1:#0e0e10;--c2:#161618;--c3:#1e1e21;--tx:#d4d4d8;--tx2:#a1a1aa;--tx3:#52525b;
704
+ --bdr:#222;--accent:#3b82f6;--green:#22c55e;--red:#ef4444;--amber:#eab308;
705
+ --font:-apple-system,BlinkMacSystemFont,'Segoe UI',Roboto,sans-serif;
706
+ --mono:'SF Mono',Consolas,'Liberation Mono',monospace;}
707
+ body{font-family:var(--font);background:var(--bg);color:var(--tx);padding:0;min-height:100vh}
708
+
709
+ /* header */
710
+ .top{background:var(--c1);border-bottom:1px solid var(--bdr);padding:14px 24px;
711
+ display:flex;align-items:center;justify-content:space-between;flex-wrap:wrap;gap:10px}
712
+ .top h1{font-size:1rem;font-weight:700;white-space:nowrap}
713
+ .top h1 span{color:var(--accent)}
714
+ .top-r{display:flex;align-items:center;gap:14px;font-size:.8rem;color:var(--tx2)}
715
+ .dot{display:inline-block;width:9px;height:9px;border-radius:50%;vertical-align:middle;margin-right:5px}
716
+ .dot.on{background:var(--green);box-shadow:0 0 8px var(--green)}
717
+ .dot.off{background:var(--red);box-shadow:0 0 8px var(--red)}
718
+ .lnk{color:var(--accent);text-decoration:none;font-size:.8rem}
719
+ .lnk:hover{text-decoration:underline}
720
+
721
+ /* layout */
722
+ .grid{display:grid;grid-template-columns:1fr 1fr;gap:14px;padding:20px 24px;max-width:1200px;margin:0 auto}
723
+ .full{grid-column:1/-1}
724
+ @media(max-width:700px){.grid{grid-template-columns:1fr;padding:12px}}
725
+
726
+ /* card */
727
+ .card{background:var(--c1);border:1px solid var(--bdr);border-radius:12px;overflow:hidden}
728
+ .card-h{padding:12px 16px;border-bottom:1px solid var(--bdr);display:flex;align-items:center;justify-content:space-between}
729
+ .card-t{font-size:.7rem;text-transform:uppercase;letter-spacing:.06em;color:var(--tx3);font-weight:600}
730
+ .card-b{padding:16px}
731
+
732
+ /* kv rows */
733
+ .kv{display:flex;justify-content:space-between;padding:6px 0;border-bottom:1px solid #111;font-size:.83rem}
734
+ .kv:last-child{border:none}
735
+ .kv .k{color:var(--tx3)}.kv .v{color:var(--tx);font-family:var(--mono);font-size:.8rem}
736
+
737
+ /* stat boxes */
738
+ .stats{display:grid;grid-template-columns:repeat(auto-fit,minmax(120px,1fr));gap:10px}
739
+ .stat{background:var(--c2);border-radius:8px;padding:14px;text-align:center}
740
+ .stat .n{font-size:1.5rem;font-weight:700;color:#fff;font-family:var(--mono)}
741
+ .stat .l{font-size:.68rem;text-transform:uppercase;color:var(--tx3);margin-top:4px;letter-spacing:.04em}
742
+
743
+ /* table */
744
+ table{width:100%;border-collapse:collapse}
745
+ th,td{padding:10px 12px;text-align:left;border-bottom:1px solid #111;font-size:.82rem}
746
+ th{color:var(--tx3);font-size:.68rem;text-transform:uppercase;letter-spacing:.05em;font-weight:600}
747
+ td{color:var(--tx2)}
748
+ code{background:var(--c3);color:var(--accent);padding:2px 6px;border-radius:4px;font-size:.78rem;font-family:var(--mono)}
749
+ .badge{display:inline-block;padding:2px 8px;border-radius:10px;font-size:.7rem;font-weight:600}
750
+ .badge-ok{background:rgba(34,197,94,.12);color:var(--green)}
751
+ .badge-busy{background:rgba(234,179,8,.12);color:var(--amber)}
752
+ .badge-off{background:rgba(239,68,68,.12);color:var(--red)}
753
+ .empty{text-align:center;padding:32px;color:var(--tx3);font-size:.85rem}
754
+
755
+ /* log */
756
+ .log{max-height:350px;overflow-y:auto;font-family:var(--mono);font-size:.78rem}
757
+ .log-row{display:flex;gap:10px;padding:5px 0;border-bottom:1px solid #0c0c0c}
758
+ .log-row:hover{background:var(--c2)}
759
+ .log-t{color:var(--tx3);flex-shrink:0;width:64px}
760
+ .log-k{border-radius:4px;padding:1px 6px;font-size:.7rem;flex-shrink:0}
761
+ .log-k.heartbeat{background:rgba(34,197,94,.1);color:var(--green)}
762
+ .log-k.response{background:rgba(59,130,246,.1);color:var(--accent)}
763
+ .log-k.request{background:rgba(168,85,247,.1);color:#a855f7}
764
+ .log-k.system{background:rgba(234,179,8,.1);color:var(--amber)}
765
+ .log-topic{color:var(--tx3);flex:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
766
+ .log-sum{color:var(--tx2);flex:1;overflow:hidden;text-overflow:ellipsis;white-space:nowrap}
767
+
768
+ /* filter bar */
769
+ .fbar{display:flex;gap:6px;margin-bottom:10px;flex-wrap:wrap}
770
+ .fbtn{background:var(--c2);border:1px solid var(--bdr);color:var(--tx3);border-radius:6px;
771
+ padding:4px 10px;font-size:.72rem;cursor:pointer;transition:.15s}
772
+ .fbtn:hover,.fbtn.act{border-color:var(--accent);color:var(--accent)}
773
+
774
+ /* refresh indicator */
775
+ .pulse{display:inline-block;width:6px;height:6px;border-radius:50%;background:var(--accent);
776
+ animation:pulse 2s infinite}
777
+ @keyframes pulse{0%,100%{opacity:.3}50%{opacity:1}}
778
+ </style></head>
779
+ <body>
780
+
781
+ <div class="top">
782
+ <h1>πŸ”§ <span>Zen</span> Admin Console</h1>
783
+ <div class="top-r">
784
+ <span><span class="dot off" id="dot"></span><span id="connLabel">Connecting…</span></span>
785
+ <span><span class="pulse"></span> Live</span>
786
+ <a class="lnk" href="/chat">← Chat</a>
787
+ <a class="lnk" href="/">Home</a>
788
+ </div>
789
+ </div>
790
+
791
+ <div class="grid">
792
+
793
+ <!-- CONNECTION -->
794
+ <div class="card">
795
+ <div class="card-h"><span class="card-t">Connection</span></div>
796
+ <div class="card-b" id="connCard">β€”</div>
797
+ </div>
798
+
799
+ <!-- CONFIG -->
800
+ <div class="card">
801
+ <div class="card-h"><span class="card-t">Configuration</span></div>
802
+ <div class="card-b" id="cfgCard">β€”</div>
803
+ </div>
804
+
805
+ <!-- STATS -->
806
+ <div class="card full">
807
+ <div class="card-h"><span class="card-t">Engine Statistics</span><span id="uptime" style="font-size:.75rem;color:var(--tx3)"></span></div>
808
+ <div class="card-b"><div class="stats" id="statsGrid">β€”</div></div>
809
+ </div>
810
+
811
+ <!-- WORKERS -->
812
+ <div class="card full">
813
+ <div class="card-h"><span class="card-t">Workers</span><span id="wCount" style="font-size:.75rem;color:var(--tx3)">0</span></div>
814
+ <div class="card-b" style="padding:0"><div id="wTable" class="empty">No workers connected</div></div>
815
+ </div>
816
+
817
+ <!-- ACTIVITY LOG -->
818
+ <div class="card full">
819
+ <div class="card-h"><span class="card-t">Network Activity</span>
820
+ <button class="fbtn" onclick="clearLog()" style="font-size:.68rem">Clear</button>
821
+ </div>
822
+ <div class="card-b">
823
+ <div class="fbar" id="filters"></div>
824
+ <div class="log" id="logBox"><div class="empty">Waiting for activity…</div></div>
825
+ </div>
826
+ </div>
827
+
828
+ <!-- PENDING QUEUES -->
829
+ <div class="card full">
830
+ <div class="card-h"><span class="card-t">Pending Request Queues</span><span id="qCount" style="font-size:.75rem;color:var(--tx3)">0</span></div>
831
+ <div class="card-b"><div id="qInfo" class="empty" style="padding:16px">None</div></div>
832
+ </div>
833
+
834
+ </div>
835
+
836
+ <script>
837
+ const API=window.location.origin;
838
+ let filter='all';
839
+ let logData=[];
840
+
841
+ function fmt(s){const h=Math.floor(s/3600),m=Math.floor(s%3600/60),ss=Math.floor(s%60);return `${h}h ${m}m ${ss}s`;}
842
+
843
+ function badge(status){
844
+ if(status==='ready')return '<span class="badge badge-ok">ready</span>';
845
+ if(status==='busy')return '<span class="badge badge-busy">busy</span>';
846
+ return '<span class="badge badge-off">'+status+'</span>';
847
+ }
848
+
849
+ function renderConn(c){
850
+ return `<div class="kv"><span class="k">Broker</span><span class="v">${c.broker}</span></div>
851
+ <div class="kv"><span class="k">Transport</span><span class="v">${c.transport}</span></div>
852
+ <div class="kv"><span class="k">Client ID</span><span class="v"><code>${c.client_id}</code></span></div>
853
+ <div class="kv"><span class="k">Status</span><span class="v">${c.connected?'<span style="color:var(--green)">Connected</span>':'<span style="color:var(--red)">Disconnected</span>'}</span></div>
854
+ <div class="kv"><span class="k">Uptime</span><span class="v">${fmt(c.uptime_seconds)}</span></div>`;
855
+ }
856
+
857
+ function renderCfg(c){
858
+ return `<div class="kv"><span class="k">Timeout</span><span class="v">${c.timeout_sec}s</span></div>
859
+ <div class="kv"><span class="k">Session Expiry</span><span class="v">${c.session_expiry}s</span></div>
860
+ <div class="kv"><span class="k">Debug Mode</span><span class="v">${c.debug_mode?'ON':'OFF'}</span></div>`;
861
+ }
862
+
863
+ function renderStats(s){
864
+ return `<div class="stat"><div class="n">${s.total_requests}</div><div class="l">Total Requests</div></div>
865
+ <div class="stat"><div class="n" style="color:var(--accent)">${s.active_streams}</div><div class="l">Active Streams</div></div>
866
+ <div class="stat"><div class="n" style="color:var(--green)">${s.completed}</div><div class="l">Completed</div></div>
867
+ <div class="stat"><div class="n" style="color:var(--red)">${s.failed}</div><div class="l">Failed</div></div>
868
+ <div class="stat"><div class="n">${s.total_chunks}</div><div class="l">Chunks Processed</div></div>
869
+ <div class="stat"><div class="n">${s.heartbeats_rx}</div><div class="l">Heartbeats RX</div></div>
870
+ <div class="stat"><div class="n">${s.pending_queues}</div><div class="l">Pending Queues</div></div>`;
871
+ }
872
+
873
+ function renderWorkers(w){
874
+ if(!w.length)return '<div class="empty">No workers connected. Open a Zen Bridge browser tab.</div>';
875
+ let h='<table><thead><tr><th>Model</th><th>Session ID</th><th>Status</th><th>Pressure</th><th>Last Seen</th></tr></thead><tbody>';
876
+ w.forEach(r=>{
877
+ h+=`<tr><td><strong>${r.model}</strong></td><td><code>${r.sid}</code></td>
878
+ <td>${badge(r.status)}</td><td>${r.pressure.toFixed(1)}</td>
879
+ <td>${r.last_seen_ago.toFixed(0)}s ago</td></tr>`;
880
+ });
881
+ return h+'</tbody></table>';
882
+ }
883
+
884
+ function renderLog(items){
885
+ const f=filter==='all'?items:items.filter(i=>i.kind===filter);
886
+ if(!f.length)return '<div class="empty">No activity</div>';
887
+ return f.map(i=>`<div class="log-row">
888
+ <span class="log-t">${i.time}</span>
889
+ <span class="log-k ${i.kind}">${i.kind}</span>
890
+ <span class="log-topic">${i.topic}</span>
891
+ <span class="log-sum">${i.summary}</span>
892
+ </div>`).join('');
893
+ }
894
+
895
+ function setFilter(f,btn){
896
+ filter=f;
897
+ document.querySelectorAll('.fbtn[data-f]').forEach(b=>b.classList.remove('act'));
898
+ btn.classList.add('act');
899
+ document.getElementById('logBox').innerHTML=renderLog(logData);
900
+ }
901
+ function clearLog(){logData=[];document.getElementById('logBox').innerHTML='<div class="empty">Cleared</div>';}
902
+
903
+ function buildFilters(){
904
+ const kinds=['all','heartbeat','response','request','system'];
905
+ document.getElementById('filters').innerHTML=kinds.map(k=>
906
+ `<button class="fbtn${k==='all'?' act':''}" data-f="${k}" onclick="setFilter('${k}',this)">${k}</button>`
907
+ ).join('');
908
+ }
909
+
910
+ async function refresh(){
911
+ try{
912
+ const r=await fetch(API+'/admin/api/stats');const d=await r.json();
913
+ // connection
914
+ const dot=document.getElementById('dot');
915
+ dot.className=d.connection.connected?'dot on':'dot off';
916
+ document.getElementById('connLabel').textContent=d.connection.connected?'Connected':'Disconnected';
917
+ document.getElementById('connCard').innerHTML=renderConn(d.connection);
918
+ document.getElementById('cfgCard').innerHTML=renderCfg(d.config);
919
+ // stats
920
+ document.getElementById('uptime').textContent='Uptime: '+fmt(d.connection.uptime_seconds);
921
+ document.getElementById('statsGrid').innerHTML=renderStats(d.stats);
922
+ // workers
923
+ document.getElementById('wCount').textContent=d.workers.count+' active';
924
+ document.getElementById('wTable').innerHTML=renderWorkers(d.workers.details);
925
+ // log
926
+ if(d.recent_activity.length){
927
+ logData=d.recent_activity;
928
+ document.getElementById('logBox').innerHTML=renderLog(logData);
929
+ }
930
+ // queues
931
+ document.getElementById('qCount').textContent=d.stats.pending_queues;
932
+ document.getElementById('qInfo').textContent=d.stats.pending_queues>0
933
+ ? `${d.stats.pending_queues} request(s) waiting for worker response`
934
+ : 'No pending requests';
935
+ }catch(e){
936
+ document.getElementById('connLabel').textContent='API Error';
937
+ document.getElementById('dot').className='dot off';
938
+ }
939
+ }
940
+
941
+ buildFilters();
942
+ refresh();
943
+ setInterval(refresh,2500);
944
+ </script>
945
+ </body></html>"""
946
+
947
+ # ════════════════════════════════════════════════════════════
948
+ # FASTAPI APPLICATION
949
+ # ════════════════════════════════════════════════════════════
950
 
951
  @asynccontextmanager
952
  async def lifespan(app: FastAPI):
953
  engine.set_loop(asyncio.get_running_loop())
954
  engine.mqtt.connect(config.BROKER_HOST, config.BROKER_PORT)
955
  engine.mqtt.loop_start()
956
+ logger.info("πŸš€ Zen Proxy starting on %s:%s", config.API_HOST, config.API_PORT)
957
  yield
958
  engine.mqtt.loop_stop()
959
  engine.mqtt.disconnect()
960
 
961
+ app = FastAPI(title="Zen AI Proxy v3.0", lifespan=lifespan)
962
+ app.add_middleware(CORSMiddleware, allow_origins=["*"], allow_methods=["*"], allow_headers=["*"])
 
 
 
 
 
 
963
 
964
+ # ── pages ────────────────────────────────────────────────────
965
 
966
  @app.get("/", response_class=HTMLResponse)
967
+ async def landing():
968
+ return HTMLResponse(LANDING_HTML)
969
+
970
+ @app.get("/chat", response_class=HTMLResponse)
971
+ async def chat_page():
972
+ return HTMLResponse(CHAT_HTML)
973
 
974
+ @app.get("/admin", response_class=HTMLResponse)
975
+ async def admin_page():
976
+ return HTMLResponse(ADMIN_HTML)
977
+
978
+ # ── admin stats API ──────────────────────────────────────────
979
+
980
+ @app.get("/admin/api/stats")
981
+ async def admin_stats():
982
+ active = engine.get_active_workers()
983
+ now = time.time()
984
+ return {
985
+ "connection": {
986
+ "connected": engine.connected,
987
+ "broker": f"{config.BROKER_HOST}:{config.BROKER_PORT}",
988
+ "transport": config.WS_TRANSPORT,
989
+ "client_id": engine.client_id,
990
+ "uptime_seconds": int(now - engine.stats["start_time"]),
991
+ },
992
+ "workers": {
993
+ "count": len(active),
994
+ "details": [
995
+ dict(sid=sid, model=i["model"], status=i["status"],
996
+ pressure=i["pressure"],
997
+ last_seen_ago=round(now - i["last_seen"], 1))
998
+ for sid, i in active.items()
999
+ ],
1000
+ },
1001
+ "stats": {
1002
+ "total_requests": engine.stats["total_requests"],
1003
+ "active_streams": engine.stats["active_streams"],
1004
+ "completed": engine.stats["completed"],
1005
+ "failed": engine.stats["failed"],
1006
+ "total_chunks": engine.stats["total_chunks"],
1007
+ "heartbeats_rx": engine.stats["heartbeats_rx"],
1008
+ "pending_queues": len(engine._queues),
1009
+ },
1010
+ "recent_activity": list(reversed(list(engine.activity_log)))[:100],
1011
+ "config": {
1012
+ "timeout_sec": config.TIMEOUT_SEC,
1013
+ "session_expiry": config.SESSION_EXPIRY,
1014
+ "debug_mode": config.DEBUG_MODE,
1015
+ },
1016
+ }
1017
+
1018
+ # ── OpenAI-compatible API ────────────────────────────────────
1019
 
1020
  @app.get("/v1/models")
1021
+ async def list_models():
1022
  active = engine.get_active_workers()
1023
+ data = [{"id": "auto", "object": "model", "owned_by": "zen", "created": 1700000000}]
 
 
 
 
1024
  for sid, info in active.items():
 
 
1025
  data.append({
1026
+ "id": f"{info['model']}:{sid}",
1027
  "object": "model",
1028
+ "owned_by": "zen",
1029
+ "created": 1700000000,
 
 
 
 
1030
  })
1031
  return {"object": "list", "data": data}
1032
 
1033
  @app.post("/v1/chat/completions")
1034
+ async def chat_completions(req: ChatCompletionRequest, request: Request):
1035
  chat_id = f"chatcmpl-{uuid.uuid4().hex[:12]}"
1036
+ created = int(time.time())
1037
 
1038
  if req.stream:
1039
+ async def generate():
1040
  try:
1041
  async for chunk in engine.chat(req):
1042
  if await request.is_disconnected():
1043
  break
 
1044
  choices = chunk.get("choices", [])
1045
+ if not choices:
1046
+ continue
1047
+ raw_delta = choices[0].get("delta") or choices[0].get("message") or {}
1048
  resp = ChatCompletionChunk(
1049
  id=chat_id, created=created, model=req.model,
1050
  choices=[ChoiceChunk(
1051
  delta=ChoiceDelta(
1052
+ content=raw_delta.get("content"),
1053
+ reasoning_content=raw_delta.get("reasoning_content"),
1054
  ),
1055
+ finish_reason=choices[0].get("finish_reason"),
1056
+ )],
1057
  )
1058
  yield f"data: {resp.model_dump_json(exclude_none=True)}\n\n"
 
1059
  if not await request.is_disconnected():
1060
  yield "data: [DONE]\n\n"
1061
+ except HTTPException as he:
1062
+ yield f"data: {json.dumps({'error': he.detail})}\n\n"
1063
+ except Exception as exc:
1064
+ logger.error("Stream error: %s", exc)
1065
+ yield f"data: {json.dumps({'error': str(exc)})}\n\n"
1066
+
1067
+ return StreamingResponse(generate(), media_type="text/event-stream")
1068
+
1069
+ # non-streaming
1070
+ full_content = ""
1071
+ full_reasoning = ""
1072
+ async for chunk in engine.chat(req):
1073
+ choices = chunk.get("choices", [])
1074
+ if not choices:
1075
+ continue
1076
+ delta = choices[0].get("delta") or choices[0].get("message") or {}
1077
+ full_content += delta.get("content", "") or ""
1078
+ full_reasoning += delta.get("reasoning_content", "") or ""
1079
+
1080
+ return {
1081
+ "id": chat_id,
1082
+ "object": "chat.completion",
1083
+ "created": created,
1084
+ "model": req.model,
1085
+ "choices": [{
1086
+ "message": {
1087
+ "role": "assistant",
1088
+ "content": full_content,
1089
+ **({"reasoning_content": full_reasoning} if full_reasoning else {}),
1090
+ },
1091
+ "finish_reason": "stop",
1092
+ "index": 0,
1093
+ }],
1094
+ }
1095
+
1096
+ # ════════════════════════════════════════════════════════════
1097
 
1098
  if __name__ == "__main__":
1099
+ uvicorn.run(app, host=config.API_HOST, port=config.API_PORT)