Spaces:

SiddhJagani
/

Backend

Paused

App Files Files Community

SiddhJagani commited on Nov 15, 2025

Commit

3ae3c70

verified ·

1 Parent(s): 96b3c51

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -25

app.py CHANGED Viewed

@@ -12,17 +12,16 @@ from fastapi.responses import JSONResponse, StreamingResponse
 # ---------------------------------------------------------------------
 BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
 BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
-BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY")          # for /v1
-BYTEZ_AUTH_2 = os.getenv("BYTEZ_API_2")          # for /v2 (optional, can be same)
-LOCAL_API_KEY = os.getenv("LOCAL_API_KEY")       # optional local guard
 # ---------------------------------------------------------------------
 # FastAPI app
 # ---------------------------------------------------------------------
-api = FastAPI(title="Bytez -> OpenAI Proxy (v1 + v2)")
 def check_key(auth: str | None):
-    """Validate Bearer token."""
     if not auth or not auth.startswith("Bearer "):
         raise HTTPException(status_code=401, detail="Missing or invalid API key")
     user_key = auth.split("Bearer ")[1].strip()
@@ -37,8 +36,9 @@ def root():
     return {"status": "ok", "message": "Bytez proxy (v1+v2) running"}
 # ---------------------------------------------------------------------
-# /v1/models
 # ---------------------------------------------------------------------
 @api.get("/v1/models")
 async def v1_models(authorization: str = Header(None)):
     check_key(authorization)
@@ -53,6 +53,7 @@ async def v1_models(authorization: str = Header(None)):
     except json.JSONDecodeError:
         raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
     models_list = [
         {"id": m.get("id") or m.get("name"), "object": "model"}
         for m in (data if isinstance(data, list) else data.get("data", []))
@@ -62,11 +63,10 @@ async def v1_models(authorization: str = Header(None)):
         headers={"Access-Control-Allow-Origin": "*"}
     )
-# ---------------------------------------------------------------------
-# /v1/chat/completions (your original – untouched)
-# ---------------------------------------------------------------------
 @api.post("/v1/chat/completions")
 async def v1_chat(request: Request, authorization: str = Header(None)):
     check_key(authorization)
     if not BYTEZ_AUTH:
         raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
@@ -75,6 +75,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
     stream = payload.get("stream", False)
     headers = {"Authorization": BYTEZ_AUTH, "Content-Type": "application/json"}
     async def v1_event_stream():
         async with httpx.AsyncClient(timeout=120) as client:
             async with client.stream("POST", BYTEZ_CHAT_URL, headers=headers, json=payload) as upstream:
@@ -92,6 +93,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
                         yield "data: [DONE]\n\n"
                         break
                     content = ""
                     if "token" in chunk:
                         content = chunk["token"]
@@ -119,6 +121,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
                     yield f"data: {json.dumps(openai_chunk)}\n\n"
         yield "data: [DONE]\n\n"
     if not stream:
         async with httpx.AsyncClient(timeout=120) as c:
             r = await c.post(BYTEZ_CHAT_URL, headers=headers, json=payload)
@@ -126,6 +129,7 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
             data = r.json()
         except json.JSONDecodeError:
             raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
         if "choices" not in data:
             content = data.get("output") or data.get("response") or data.get("message") or str(data)
             data = {
@@ -135,45 +139,80 @@ async def v1_chat(request: Request, authorization: str = Header(None)):
             }
         return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
-    return StreamingResponse(v1_event_stream(), media_type="text/event-stream", headers={"Access-Control-Allow-Origin": "*"})
 # ---------------------------------------------------------------------
-# /v2/chat/completions – CLEAN STREAMING (no usage chunk)
 # ---------------------------------------------------------------------
 @api.post("/v2/chat/completions")
 async def v2_chat_completions(request: Request, authorization: str = Header(None)):
     check_key(authorization)
-    auth_to_use = BYTEZ_AUTH_2 or BYTEZ_AUTH
-    if not auth_to_use:
-        raise HTTPException(status_code=500, detail="BYTEZ_API_KEY or BYTEZ_API_2 not configured")
     payload = await request.json()
     stream = payload.get("stream", False)
-    upstream_headers = {"Authorization": auth_to_use, "Content-Type": "application/json"}
     def make_openai_delta(content: str):
         return {
             "id": f"chatcmpl-v2-{int(time.time())}",
             "object": "chat.completion.chunk",
             "created": int(time.time()),
             "model": payload.get("model", "unknown"),
-            "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}],
         }
     async def clean_stream():
         async with httpx.AsyncClient(timeout=180) as client:
-            async with client.stream("POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload) as upstream:
                 async for line in upstream.aiter_lines():
                     line = line.strip()
                     if not line:
                         continue
                     json_str = line[6:] if line.startswith("data: ") else line
-                    # Skip final usage chunk
                     if "usage" in json_str.lower():
                         continue
                     if json_str == "[DONE]":
                         yield "data: [DONE]\n\n"
                         return
@@ -183,8 +222,12 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
                     except json.JSONDecodeError:
                         continue
                     text = ""
                     if isinstance(chunk, dict):
                         if "token" in chunk:
                             text = chunk["token"]
                         elif "choices" in chunk and chunk["choices"]:
@@ -192,14 +235,22 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
                             text = delta.get("content", "")
                         elif "text" in chunk:
                             text = chunk["text"]
                         else:
                             text = str(chunk)
                     if text:
                         yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
         yield "data: [DONE]\n\n"
     if not stream:
         async with httpx.AsyncClient(timeout=120) as c:
             r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
@@ -207,32 +258,48 @@ async def v2_chat_completions(request: Request, authorization: str = Header(None
             data = r.json()
         except json.JSONDecodeError:
             raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
         if "choices" not in data:
-            content = data.get("output") or data.get("response") or data.get("message") or str(data)
             data = {
                 "id": "chatcmpl-v2",
                 "object": "chat.completion",
-                "choices": [{"index": 0, "message": {"role": "assistant", "content": content}}],
             }
         return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
-    return StreamingResponse(clean_stream(), media_type="text/event-stream", headers={"Access-Control-Allow-Origin": "*"})
 # ---------------------------------------------------------------------
-# Gradio UI (required for HF Space)
 # ---------------------------------------------------------------------
 with gr.Blocks() as ui:
     gr.Markdown(
-        "### Bytez -> OpenAI Proxy\n"
         "- `/v1/models`  \n"
-        "- `/v1/chat/completions` (original)  \n"
         "- **`/v2/chat/completions`** – clean streaming, no usage chunk"
     )
 demo = gr.mount_gradio_app(api, ui, path="/")
 # ---------------------------------------------------------------------
-# Local dev
 # ---------------------------------------------------------------------
 if __name__ == "__main__":
     uvicorn.run(demo, host="0.0.0.0", port=7860)

 # ---------------------------------------------------------------------
 BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
 BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
+BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY")          # your Bytez key
+LOCAL_API_KEY = os.getenv("LOCAL_API_KEY")      # optional local guard
 # ---------------------------------------------------------------------
 # FastAPI app
 # ---------------------------------------------------------------------
+api = FastAPI(title="Bytez → OpenAI Proxy (v1 + v2)")
 def check_key(auth: str | None):
+    """Validate the Bearer token (optional local key)."""
     if not auth or not auth.startswith("Bearer "):
         raise HTTPException(status_code=401, detail="Missing or invalid API key")
     user_key = auth.split("Bearer ")[1].strip()
     return {"status": "ok", "message": "Bytez proxy (v1+v2) running"}
 # ---------------------------------------------------------------------
+# --------------------------  /v1  ------------------------------------
 # ---------------------------------------------------------------------
 @api.get("/v1/models")
 async def v1_models(authorization: str = Header(None)):
     check_key(authorization)
     except json.JSONDecodeError:
         raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
+    # Transform Bytez → OpenAI list
     models_list = [
         {"id": m.get("id") or m.get("name"), "object": "model"}
         for m in (data if isinstance(data, list) else data.get("data", []))
         headers={"Access-Control-Allow-Origin": "*"}
     )
 @api.post("/v1/chat/completions")
 async def v1_chat(request: Request, authorization: str = Header(None)):
+    """Exactly the same implementation you already had – untouched."""
     check_key(authorization)
     if not BYTEZ_AUTH:
         raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
     stream = payload.get("stream", False)
     headers = {"Authorization": BYTEZ_AUTH, "Content-Type": "application/json"}
+    # ---------- streaming helper ----------
     async def v1_event_stream():
         async with httpx.AsyncClient(timeout=120) as client:
             async with client.stream("POST", BYTEZ_CHAT_URL, headers=headers, json=payload) as upstream:
                         yield "data: [DONE]\n\n"
                         break
+                    # ----- adapt Bytez chunk to OpenAI -----
                     content = ""
                     if "token" in chunk:
                         content = chunk["token"]
                     yield f"data: {json.dumps(openai_chunk)}\n\n"
         yield "data: [DONE]\n\n"
+    # ---------- non-stream ----------
     if not stream:
         async with httpx.AsyncClient(timeout=120) as c:
             r = await c.post(BYTEZ_CHAT_URL, headers=headers, json=payload)
             data = r.json()
         except json.JSONDecodeError:
             raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
         if "choices" not in data:
             content = data.get("output") or data.get("response") or data.get("message") or str(data)
             data = {
             }
         return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
+    return StreamingResponse(
+        v1_event_stream(),
+        media_type="text/event-stream",
+        headers={"Access-Control-Allow-Origin": "*"},
+    )
 # ---------------------------------------------------------------------
+# --------------------------  /v2  ------------------------------------
 # ---------------------------------------------------------------------
 @api.post("/v2/chat/completions")
 async def v2_chat_completions(request: Request, authorization: str = Header(None)):
+    """
+    **v2** – clean OpenAI-compatible streaming.
+    * Only `delta.content` is sent.
+    * The final usage-statistics chunk is stripped.
+    * `[DONE]` is sent exactly once.
+    """
     check_key(authorization)
+    if not BYTEZ_AUTH:
+        raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
     payload = await request.json()
     stream = payload.get("stream", False)
+    upstream_headers = {
+        "Authorization": BYTEZ_AUTH,
+        "Content-Type": "application/json",
+    }
+    # -----------------------------------------------------------------
+    # Helper: turn any Bytez chunk into a **minimal** OpenAI delta chunk
+    # -----------------------------------------------------------------
     def make_openai_delta(content: str):
         return {
             "id": f"chatcmpl-v2-{int(time.time())}",
             "object": "chat.completion.chunk",
             "created": int(time.time()),
             "model": payload.get("model", "unknown"),
+            "choices": [
+                {
+                    "index": 0,
+                    "delta": {"content": content},
+                    "finish_reason": None,
+                }
+            ],
         }
+    # -----------------------------------------------------------------
+    # Streaming generator – **filters out the final usage object**
+    # -----------------------------------------------------------------
     async def clean_stream():
         async with httpx.AsyncClient(timeout=180) as client:
+            async with client.stream(
+                "POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload
+            ) as upstream:
                 async for line in upstream.aiter_lines():
                     line = line.strip()
                     if not line:
                         continue
+                    # Bytez may send "data: {...}" or raw JSON
                     json_str = line[6:] if line.startswith("data: ") else line
+                    # -----------------------------------------------------------------
+                    # 1. Skip the final usage chunk (it contains `usage` field)
+                    # -----------------------------------------------------------------
                     if "usage" in json_str.lower():
                         continue
+                    # -----------------------------------------------------------------
+                    # 2. [DONE] signal
+                    # -----------------------------------------------------------------
                     if json_str == "[DONE]":
                         yield "data: [DONE]\n\n"
                         return
                     except json.JSONDecodeError:
                         continue
+                    # -----------------------------------------------------------------
+                    # 3. Extract the text token
+                    # -----------------------------------------------------------------
                     text = ""
                     if isinstance(chunk, dict):
+                        # most common patterns
                         if "token" in chunk:
                             text = chunk["token"]
                         elif "choices" in chunk and chunk["choices"]:
                             text = delta.get("content", "")
                         elif "text" in chunk:
                             text = chunk["text"]
+                        # fallback – stringify whole chunk (rare)
                         else:
                             text = str(chunk)
+                    # -----------------------------------------------------------------
+                    # 4. Yield clean OpenAI delta
+                    # -----------------------------------------------------------------
                     if text:
                         yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
+        # If upstream never sent [DONE], send it ourselves
         yield "data: [DONE]\n\n"
+    # -----------------------------------------------------------------
+    # Non-streaming path (identical to v1, but we keep it for completeness)
+    # -----------------------------------------------------------------
     if not stream:
         async with httpx.AsyncClient(timeout=120) as c:
             r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
             data = r.json()
         except json.JSONDecodeError:
             raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
+        # Normalise to OpenAI shape
         if "choices" not in data:
+            content = (
+                data.get("output")
+                or data.get("response")
+                or data.get("message")
+                or str(data)
+            )
             data = {
                 "id": "chatcmpl-v2",
                 "object": "chat.completion",
+                "choices": [
+                    {"index": 0, "message": {"role": "assistant", "content": content}}
+                ],
             }
         return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
+    # -----------------------------------------------------------------
+    # Return clean SSE stream
+    # -----------------------------------------------------------------
+    return StreamingResponse(
+        clean_stream(),
+        media_type="text/event-stream",
+        headers={"Access-Control-Allow-Origin": "*"},
+    )
 # ---------------------------------------------------------------------
+# Minimal Gradio UI (required for HF Space to start)
 # ---------------------------------------------------------------------
 with gr.Blocks() as ui:
     gr.Markdown(
+        "### Bytez → OpenAI Proxy (v1 + **v2**)\n"
         "- `/v1/models`  \n"
+        "- `/v1/chat/completions` (unchanged)  \n"
         "- **`/v2/chat/completions`** – clean streaming, no usage chunk"
     )
 demo = gr.mount_gradio_app(api, ui, path="/")
 # ---------------------------------------------------------------------
+# Local dev entrypoint
 # ---------------------------------------------------------------------
 if __name__ == "__main__":
     uvicorn.run(demo, host="0.0.0.0", port=7860)