SiddhJagani commited on
Commit
4c797fd
·
verified ·
1 Parent(s): 5b95a7c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +263 -100
app.py CHANGED
@@ -9,6 +9,7 @@ os.system("npm install puter-js")
9
  BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
10
  BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
11
  BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY")
 
12
  LOCAL_API_KEY = os.getenv("LOCAL_API_KEY")
13
 
14
  # ---------------------------------------------------------------------
@@ -170,134 +171,298 @@ async def chat(request: Request, authorization: str = Header(None)):
170
 
171
 
172
 
 
173
 
 
 
 
 
174
 
175
 
 
 
 
 
 
 
 
 
 
 
176
  # ---------------------------------------------------------------------
177
- # /v2/chat/completions → Puter.js → OpenAI compatible
178
  # ---------------------------------------------------------------------
179
- @api.post("/v2/chat/completions")
180
- async def puter_chat(request: Request, authorization: str = Header(None)):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
181
  check_key(authorization)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
 
183
  payload = await request.json()
184
- model = payload.get("model")
185
- messages = payload.get("messages", [])
186
- temperature = payload.get("temperature", 1.0)
187
- max_tokens = payload.get("max_tokens")
188
- stream = payload.get("stream", False)
189
-
190
- # Convert OpenAI-style messages → single string prompt for Puter.js
191
- prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
192
-
193
- # Node helper input
194
- node_payload = json.dumps({
195
- "prompt": prompt,
196
- "model": model,
197
- "temperature": temperature,
198
- "max_tokens": max_tokens,
199
- "stream": False # streaming handled later
200
- })
201
-
202
- # ------------------------------------------------------------------
203
- # Non-streaming
204
- # ------------------------------------------------------------------
205
- if not stream:
206
- proc = subprocess.Popen(
207
- ["node", "puter_helper.js"],
208
- stdin=subprocess.PIPE,
209
- stdout=subprocess.PIPE,
210
- stderr=subprocess.PIPE,
211
- text=True
212
- )
213
 
214
- stdout, stderr = proc.communicate(node_payload)
 
 
 
 
 
 
 
 
 
 
 
 
215
 
216
- if stderr:
217
- raise HTTPException(status_code=500, detail=f"Node error: {stderr}")
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  try:
220
- node_out = json.loads(stdout)
221
- except:
222
- raise HTTPException(status_code=500, detail=f"Bad Node output: {stdout}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
223
 
224
- if not node_out.get("ok"):
225
- raise HTTPException(status_code=502, detail=node_out.get("error"))
 
 
 
 
 
 
 
 
 
226
 
227
- final_text = node_out["result"]
 
 
 
 
 
 
228
 
 
 
 
 
229
  return {
230
- "id": "chatcmpl-puter",
231
- "object": "chat.completion",
232
- "model": model,
 
233
  "choices": [
234
  {
235
  "index": 0,
236
- "message": {
237
- "role": "assistant",
238
- "content": final_text
239
- },
240
- "finish_reason": "stop"
241
  }
242
- ]
243
  }
244
 
245
- # ------------------------------------------------------------------
246
- # Streaming path: /v2/chat/completions?stream=true
247
- # ------------------------------------------------------------------
248
- async def stream_generator():
249
- # Because Puter.js Node helper is not streaming yet,
250
- # we emulate SSE streaming by splitting text gradually.
251
- proc = subprocess.Popen(
252
- ["node", "puter_helper.js"],
253
- stdin=subprocess.PIPE,
254
- stdout=subprocess.PIPE,
255
- stderr=subprocess.PIPE,
256
- text=True
257
- )
258
 
259
- stdout, stderr = proc.communicate(node_payload)
 
260
 
261
- if stderr:
262
- yield f"data: {{\"error\": \"{stderr}\"}}\n\n"
263
- yield "data: [DONE]\n\n"
264
- return
 
 
 
 
 
 
 
 
 
 
 
 
 
265
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
266
  try:
267
- node_out = json.loads(stdout)
268
- except:
269
- yield f"data: {{\"error\": \"Bad Node output\"}}\n\n"
270
- yield "data: [DONE]\n\n"
271
- return
272
-
273
- if not node_out.get("ok"):
274
- yield f"data: {{\"error\": \"{node_out.get('error')}\"}}\n\n"
275
- yield "data: [DONE]\n\n"
276
- return
277
-
278
- full_text = node_out["result"]
279
-
280
- # Send word-by-word as streaming chunks
281
- for word in full_text.split():
282
- chunk = {
283
- "id": "chatcmpl-puter-stream",
284
- "object": "chat.completion.chunk",
285
- "model": model,
286
  "choices": [
287
- {
288
- "index": 0,
289
- "delta": {"content": word + " "},
290
- "finish_reason": None,
291
- }
292
- ]
293
  }
294
- yield f"data: {json.dumps(chunk)}\n\n"
295
- await asyncio.sleep(0.02)
296
-
297
- yield "data: [DONE]\n\n"
298
 
 
 
 
299
  return StreamingResponse(
300
- stream_generator(),
301
  media_type="text/event-stream",
302
  headers={"Access-Control-Allow-Origin": "*"},
303
  )
@@ -308,8 +473,6 @@ async def puter_chat(request: Request, authorization: str = Header(None)):
308
 
309
 
310
 
311
-
312
-
313
 
314
  # ---------------------------------------------------------------------
315
  # Minimal Gradio UI (to make HF Space start)
 
9
  BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
10
  BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
11
  BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY")
12
+ BYTEZ_AUTH_2 = os.getenv("BYTEZ_API_2")
13
  LOCAL_API_KEY = os.getenv("LOCAL_API_KEY")
14
 
15
  # ---------------------------------------------------------------------
 
171
 
172
 
173
 
174
+ Below is a complete, copy-and-paste ready FastAPI backend that
175
 
176
+ keeps your existing /v1/... endpoints untouched
177
+ adds a brand-new /v2/chat/completions endpoint
178
+ streams exactly like OpenAI – only delta.content is sent, the final usage chunk is stripped out
179
+ works on Hugging Face Spaces (Gradio mount) and locally (uvicorn)
180
 
181
 
182
+ 1. Full app.py (or main.py)
183
+ pythonimport os
184
+ import json
185
+ import time
186
+ import httpx
187
+ import uvicorn
188
+ import gradio as gr
189
+ from fastapi import FastAPI, Request, Header, HTTPException
190
+ from fastapi.responses import JSONResponse, StreamingResponse
191
+
192
  # ---------------------------------------------------------------------
193
+ # Configuration
194
  # ---------------------------------------------------------------------
195
+ BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
196
+ BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
197
+ BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY") # your Bytez key
198
+ LOCAL_API_KEY = os.getenv("LOCAL_API_KEY") # optional local guard
199
+
200
+ # ---------------------------------------------------------------------
201
+ # FastAPI app
202
+ # ---------------------------------------------------------------------
203
+ api = FastAPI(title="Bytez → OpenAI Proxy (v1 + v2)")
204
+
205
+ def check_key(auth: str | None):
206
+ """Validate the Bearer token (optional local key)."""
207
+ if not auth or not auth.startswith("Bearer "):
208
+ raise HTTPException(status_code=401, detail="Missing or invalid API key")
209
+ user_key = auth.split("Bearer ")[1].strip()
210
+ if LOCAL_API_KEY and user_key != LOCAL_API_KEY:
211
+ raise HTTPException(status_code=403, detail="Unauthorized API key")
212
+
213
+ # ---------------------------------------------------------------------
214
+ # Root / health
215
+ # ---------------------------------------------------------------------
216
+ @api.get("/")
217
+ def root():
218
+ return {"status": "ok", "message": "Bytez proxy (v1+v2) running"}
219
+
220
+ # ---------------------------------------------------------------------
221
+ # -------------------------- /v1 ------------------------------------
222
+ # ---------------------------------------------------------------------
223
+
224
+ @api.get("/v1/models")
225
+ async def v1_models(authorization: str = Header(None)):
226
  check_key(authorization)
227
+ if not BYTEZ_AUTH:
228
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
229
+
230
+ async with httpx.AsyncClient(timeout=30) as c:
231
+ r = await c.get(BYTEZ_MODELS_URL, headers={"Authorization": BYTEZ_AUTH})
232
+
233
+ try:
234
+ data = r.json()
235
+ except json.JSONDecodeError:
236
+ raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
237
+
238
+ # Transform Bytez → OpenAI list
239
+ models_list = [
240
+ {"id": m.get("id") or m.get("name"), "object": "model"}
241
+ for m in (data if isinstance(data, list) else data.get("data", []))
242
+ ]
243
+ return JSONResponse(
244
+ {"object": "list", "data": models_list},
245
+ headers={"Access-Control-Allow-Origin": "*"}
246
+ )
247
+
248
+
249
+ @api.post("/v1/chat/completions")
250
+ async def v1_chat(request: Request, authorization: str = Header(None)):
251
+ """Exactly the same implementation you already had – untouched."""
252
+ check_key(authorization)
253
+ if not BYTEZ_AUTH:
254
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
255
 
256
  payload = await request.json()
257
+ stream = payload.get("stream", False)
258
+ headers = {"Authorization": BYTEZ_AUTH, "Content-Type": "application/json"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
259
 
260
+ # ---------- streaming helper ----------
261
+ async def v1_event_stream():
262
+ async with httpx.AsyncClient(timeout=120) as client:
263
+ async with client.stream("POST", BYTEZ_CHAT_URL, headers=headers, json=payload) as upstream:
264
+ async for line in upstream.aiter_lines():
265
+ line = line.strip()
266
+ if not line:
267
+ continue
268
+ json_str = line[6:] if line.startswith("data: ") else line
269
+ try:
270
+ chunk = json.loads(json_str)
271
+ except json.JSONDecodeError:
272
+ continue
273
 
274
+ if json_str == "[DONE]":
275
+ yield "data: [DONE]\n\n"
276
+ break
277
 
278
+ # ----- adapt Bytez chunk to OpenAI -----
279
+ content = ""
280
+ if "token" in chunk:
281
+ content = chunk["token"]
282
+ elif "choices" in chunk and chunk["choices"]:
283
+ delta = chunk["choices"][0].get("delta", {})
284
+ content = delta.get("content", "")
285
+ elif "text" in chunk:
286
+ content = chunk["text"]
287
+ else:
288
+ content = str(chunk)
289
+
290
+ openai_chunk = {
291
+ "id": "chatcmpl-proxy-stream",
292
+ "object": "chat.completion.chunk",
293
+ "created": int(time.time()),
294
+ "model": payload.get("model", "unknown"),
295
+ "choices": [
296
+ {
297
+ "index": 0,
298
+ "delta": {"role": "assistant", "content": content},
299
+ "finish_reason": None,
300
+ }
301
+ ],
302
+ }
303
+ yield f"data: {json.dumps(openai_chunk)}\n\n"
304
+ yield "data: [DONE]\n\n"
305
+
306
+ # ---------- non-stream ----------
307
+ if not stream:
308
+ async with httpx.AsyncClient(timeout=120) as c:
309
+ r = await c.post(BYTEZ_CHAT_URL, headers=headers, json=payload)
310
  try:
311
+ data = r.json()
312
+ except json.JSONDecodeError:
313
+ raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
314
+
315
+ if "choices" not in data:
316
+ content = data.get("output") or data.get("response") or data.get("message") or str(data)
317
+ data = {
318
+ "id": "chatcmpl-proxy",
319
+ "object": "chat.completion",
320
+ "choices": [{"index": 0, "message": {"role": "assistant", "content": content}}],
321
+ }
322
+ return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
323
+
324
+ return StreamingResponse(
325
+ v1_event_stream(),
326
+ media_type="text/event-stream",
327
+ headers={"Access-Control-Allow-Origin": "*"},
328
+ )
329
+
330
+ # ---------------------------------------------------------------------
331
+ # -------------------------- /v2 ------------------------------------
332
+ # ---------------------------------------------------------------------
333
 
334
+ @api.post("/v2/chat/completions")
335
+ async def v2_chat_completions(request: Request, authorization: str = Header(None)):
336
+ """
337
+ **v2** – clean OpenAI-compatible streaming.
338
+ * Only `delta.content` is sent.
339
+ * The final usage-statistics chunk is stripped.
340
+ * `[DONE]` is sent exactly once.
341
+ """
342
+ check_key(authorization)
343
+ if not BYTEZ_AUTH_2:
344
+ raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
345
 
346
+ payload = await request.json()
347
+ stream = payload.get("stream", False)
348
+
349
+ upstream_headers = {
350
+ "Authorization": BYTEZ_AUTH_2,
351
+ "Content-Type": "application/json",
352
+ }
353
 
354
+ # -----------------------------------------------------------------
355
+ # Helper: turn any Bytez chunk into a **minimal** OpenAI delta chunk
356
+ # -----------------------------------------------------------------
357
+ def make_openai_delta(content: str):
358
  return {
359
+ "id": f"chatcmpl-v2-{int(time.time())}",
360
+ "object": "chat.completion.chunk",
361
+ "created": int(time.time()),
362
+ "model": payload.get("model", "unknown"),
363
  "choices": [
364
  {
365
  "index": 0,
366
+ "delta": {"content": content},
367
+ "finish_reason": None,
 
 
 
368
  }
369
+ ],
370
  }
371
 
372
+ # -----------------------------------------------------------------
373
+ # Streaming generator – **filters out the final usage object**
374
+ # -----------------------------------------------------------------
375
+ async def clean_stream():
376
+ async with httpx.AsyncClient(timeout=180) as client:
377
+ async with client.stream(
378
+ "POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload
379
+ ) as upstream:
380
+
381
+ async for line in upstream.aiter_lines():
382
+ line = line.strip()
383
+ if not line:
384
+ continue
385
 
386
+ # Bytez may send "data: {...}" or raw JSON
387
+ json_str = line[6:] if line.startswith("data: ") else line
388
 
389
+ # -----------------------------------------------------------------
390
+ # 1. Skip the final usage chunk (it contains `usage` field)
391
+ # -----------------------------------------------------------------
392
+ if "usage" in json_str.lower():
393
+ continue
394
+
395
+ # -----------------------------------------------------------------
396
+ # 2. [DONE] signal
397
+ # -----------------------------------------------------------------
398
+ if json_str == "[DONE]":
399
+ yield "data: [DONE]\n\n"
400
+ return
401
+
402
+ try:
403
+ chunk = json.loads(json_str)
404
+ except json.JSONDecodeError:
405
+ continue
406
 
407
+ # -----------------------------------------------------------------
408
+ # 3. Extract the text token
409
+ # -----------------------------------------------------------------
410
+ text = ""
411
+ if isinstance(chunk, dict):
412
+ # most common patterns
413
+ if "token" in chunk:
414
+ text = chunk["token"]
415
+ elif "choices" in chunk and chunk["choices"]:
416
+ delta = chunk["choices"][0].get("delta", {})
417
+ text = delta.get("content", "")
418
+ elif "text" in chunk:
419
+ text = chunk["text"]
420
+ # fallback – stringify whole chunk (rare)
421
+ else:
422
+ text = str(chunk)
423
+
424
+ # -----------------------------------------------------------------
425
+ # 4. Yield clean OpenAI delta
426
+ # -----------------------------------------------------------------
427
+ if text:
428
+ yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
429
+
430
+ # If upstream never sent [DONE], send it ourselves
431
+ yield "data: [DONE]\n\n"
432
+
433
+ # -----------------------------------------------------------------
434
+ # Non-streaming path (identical to v1, but we keep it for completeness)
435
+ # -----------------------------------------------------------------
436
+ if not stream:
437
+ async with httpx.AsyncClient(timeout=120) as c:
438
+ r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
439
  try:
440
+ data = r.json()
441
+ except json.JSONDecodeError:
442
+ raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
443
+
444
+ # Normalise to OpenAI shape
445
+ if "choices" not in data:
446
+ content = (
447
+ data.get("output")
448
+ or data.get("response")
449
+ or data.get("message")
450
+ or str(data)
451
+ )
452
+ data = {
453
+ "id": "chatcmpl-v2",
454
+ "object": "chat.completion",
 
 
 
 
455
  "choices": [
456
+ {"index": 0, "message": {"role": "assistant", "content": content}}
457
+ ],
 
 
 
 
458
  }
459
+ return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
 
 
 
460
 
461
+ # -----------------------------------------------------------------
462
+ # Return clean SSE stream
463
+ # -----------------------------------------------------------------
464
  return StreamingResponse(
465
+ clean_stream(),
466
  media_type="text/event-stream",
467
  headers={"Access-Control-Allow-Origin": "*"},
468
  )
 
473
 
474
 
475
 
 
 
476
 
477
  # ---------------------------------------------------------------------
478
  # Minimal Gradio UI (to make HF Space start)