Spaces:

SiddhJagani
/

Backend

Paused

App Files Files Community

SiddhJagani commited on Nov 15, 2025

Commit

4c797fd

verified ·

1 Parent(s): 5b95a7c

Update app.py

Browse files

Files changed (1) hide show

app.py +263 -100

app.py CHANGED Viewed

@@ -9,6 +9,7 @@ os.system("npm install puter-js")
 BYTEZ_CHAT_URL   = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
 BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
 BYTEZ_AUTH  = os.getenv("BYTEZ_API_KEY")
 LOCAL_API_KEY = os.getenv("LOCAL_API_KEY")
 # ---------------------------------------------------------------------
@@ -170,134 +171,298 @@ async def chat(request: Request, authorization: str = Header(None)):
 # ---------------------------------------------------------------------
-# /v2/chat/completions  →  Puter.js → OpenAI compatible
 # ---------------------------------------------------------------------
-@api.post("/v2/chat/completions")
-async def puter_chat(request: Request, authorization: str = Header(None)):
     check_key(authorization)
     payload = await request.json()
-    model       = payload.get("model")
-    messages    = payload.get("messages", [])
-    temperature = payload.get("temperature", 1.0)
-    max_tokens  = payload.get("max_tokens")
-    stream      = payload.get("stream", False)
-    # Convert OpenAI-style messages → single string prompt for Puter.js
-    prompt = "\n".join([f"{m['role']}: {m['content']}" for m in messages])
-    # Node helper input
-    node_payload = json.dumps({
-        "prompt": prompt,
-        "model": model,
-        "temperature": temperature,
-        "max_tokens": max_tokens,
-        "stream": False     # streaming handled later
-    })
-    # ------------------------------------------------------------------
-    # Non-streaming
-    # ------------------------------------------------------------------
-    if not stream:
-        proc = subprocess.Popen(
-            ["node", "puter_helper.js"],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True
-        )
-        stdout, stderr = proc.communicate(node_payload)
-        if stderr:
-            raise HTTPException(status_code=500, detail=f"Node error: {stderr}")
         try:
-            node_out = json.loads(stdout)
-        except:
-            raise HTTPException(status_code=500, detail=f"Bad Node output: {stdout}")
-        if not node_out.get("ok"):
-            raise HTTPException(status_code=502, detail=node_out.get("error"))
-        final_text = node_out["result"]
         return {
-            "id": "chatcmpl-puter",
-            "object": "chat.completion",
-            "model": model,
             "choices": [
                 {
                     "index": 0,
-                    "message": {
-                        "role": "assistant",
-                        "content": final_text
-                    },
-                    "finish_reason": "stop"
                 }
-            ]
         }
-    # ------------------------------------------------------------------
-    # Streaming path: /v2/chat/completions?stream=true
-    # ------------------------------------------------------------------
-    async def stream_generator():
-        # Because Puter.js Node helper is not streaming yet,
-        # we emulate SSE streaming by splitting text gradually.
-        proc = subprocess.Popen(
-            ["node", "puter_helper.js"],
-            stdin=subprocess.PIPE,
-            stdout=subprocess.PIPE,
-            stderr=subprocess.PIPE,
-            text=True
-        )
-        stdout, stderr = proc.communicate(node_payload)
-        if stderr:
-            yield f"data: {{\"error\": \"{stderr}\"}}\n\n"
-            yield "data: [DONE]\n\n"
-            return
         try:
-            node_out = json.loads(stdout)
-        except:
-            yield f"data: {{\"error\": \"Bad Node output\"}}\n\n"
-            yield "data: [DONE]\n\n"
-            return
-        if not node_out.get("ok"):
-            yield f"data: {{\"error\": \"{node_out.get('error')}\"}}\n\n"
-            yield "data: [DONE]\n\n"
-            return
-        full_text = node_out["result"]
-        # Send word-by-word as streaming chunks
-        for word in full_text.split():
-            chunk = {
-                "id": "chatcmpl-puter-stream",
-                "object": "chat.completion.chunk",
-                "model": model,
                 "choices": [
-                    {
-                        "index": 0,
-                        "delta": {"content": word + " "},
-                        "finish_reason": None,
-                    }
-                ]
             }
-            yield f"data: {json.dumps(chunk)}\n\n"
-            await asyncio.sleep(0.02)
-        yield "data: [DONE]\n\n"
     return StreamingResponse(
-        stream_generator(),
         media_type="text/event-stream",
         headers={"Access-Control-Allow-Origin": "*"},
     )
@@ -308,8 +473,6 @@ async def puter_chat(request: Request, authorization: str = Header(None)):
 # ---------------------------------------------------------------------
 # Minimal Gradio UI (to make HF Space start)

 BYTEZ_CHAT_URL   = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
 BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
 BYTEZ_AUTH  = os.getenv("BYTEZ_API_KEY")
+BYTEZ_AUTH_2 = os.getenv("BYTEZ_API_2")
 LOCAL_API_KEY = os.getenv("LOCAL_API_KEY")
 # ---------------------------------------------------------------------
+Below is a complete, copy-and-paste ready FastAPI backend that
+keeps your existing /v1/... endpoints untouched
+adds a brand-new /v2/chat/completions endpoint
+streams exactly like OpenAI – only delta.content is sent, the final usage chunk is stripped out
+works on Hugging Face Spaces (Gradio mount) and locally (uvicorn)
+1. Full app.py (or main.py)
+pythonimport os
+import json
+import time
+import httpx
+import uvicorn
+import gradio as gr
+from fastapi import FastAPI, Request, Header, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
 # ---------------------------------------------------------------------
+# Configuration
 # ---------------------------------------------------------------------
+BYTEZ_CHAT_URL = "https://api.bytez.com/models/v2/openai/v1/chat/completions"
+BYTEZ_MODELS_URL = "https://api.bytez.com/models/v2/list/models"
+BYTEZ_AUTH = os.getenv("BYTEZ_API_KEY")          # your Bytez key
+LOCAL_API_KEY = os.getenv("LOCAL_API_KEY")      # optional local guard
+# ---------------------------------------------------------------------
+# FastAPI app
+# ---------------------------------------------------------------------
+api = FastAPI(title="Bytez → OpenAI Proxy (v1 + v2)")
+def check_key(auth: str | None):
+    """Validate the Bearer token (optional local key)."""
+    if not auth or not auth.startswith("Bearer "):
+        raise HTTPException(status_code=401, detail="Missing or invalid API key")
+    user_key = auth.split("Bearer ")[1].strip()
+    if LOCAL_API_KEY and user_key != LOCAL_API_KEY:
+        raise HTTPException(status_code=403, detail="Unauthorized API key")
+# ---------------------------------------------------------------------
+# Root / health
+# ---------------------------------------------------------------------
+@api.get("/")
+def root():
+    return {"status": "ok", "message": "Bytez proxy (v1+v2) running"}
+# ---------------------------------------------------------------------
+# --------------------------  /v1  ------------------------------------
+# ---------------------------------------------------------------------
+@api.get("/v1/models")
+async def v1_models(authorization: str = Header(None)):
     check_key(authorization)
+    if not BYTEZ_AUTH:
+        raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
+    async with httpx.AsyncClient(timeout=30) as c:
+        r = await c.get(BYTEZ_MODELS_URL, headers={"Authorization": BYTEZ_AUTH})
+    try:
+        data = r.json()
+    except json.JSONDecodeError:
+        raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
+    # Transform Bytez → OpenAI list
+    models_list = [
+        {"id": m.get("id") or m.get("name"), "object": "model"}
+        for m in (data if isinstance(data, list) else data.get("data", []))
+    ]
+    return JSONResponse(
+        {"object": "list", "data": models_list},
+        headers={"Access-Control-Allow-Origin": "*"}
+    )
+@api.post("/v1/chat/completions")
+async def v1_chat(request: Request, authorization: str = Header(None)):
+    """Exactly the same implementation you already had – untouched."""
+    check_key(authorization)
+    if not BYTEZ_AUTH:
+        raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
     payload = await request.json()
+    stream = payload.get("stream", False)
+    headers = {"Authorization": BYTEZ_AUTH, "Content-Type": "application/json"}
+    # ---------- streaming helper ----------
+    async def v1_event_stream():
+        async with httpx.AsyncClient(timeout=120) as client:
+            async with client.stream("POST", BYTEZ_CHAT_URL, headers=headers, json=payload) as upstream:
+                async for line in upstream.aiter_lines():
+                    line = line.strip()
+                    if not line:
+                        continue
+                    json_str = line[6:] if line.startswith("data: ") else line
+                    try:
+                        chunk = json.loads(json_str)
+                    except json.JSONDecodeError:
+                        continue
+                    if json_str == "[DONE]":
+                        yield "data: [DONE]\n\n"
+                        break
+                    # ----- adapt Bytez chunk to OpenAI -----
+                    content = ""
+                    if "token" in chunk:
+                        content = chunk["token"]
+                    elif "choices" in chunk and chunk["choices"]:
+                        delta = chunk["choices"][0].get("delta", {})
+                        content = delta.get("content", "")
+                    elif "text" in chunk:
+                        content = chunk["text"]
+                    else:
+                        content = str(chunk)
+                    openai_chunk = {
+                        "id": "chatcmpl-proxy-stream",
+                        "object": "chat.completion.chunk",
+                        "created": int(time.time()),
+                        "model": payload.get("model", "unknown"),
+                        "choices": [
+                            {
+                                "index": 0,
+                                "delta": {"role": "assistant", "content": content},
+                                "finish_reason": None,
+                            }
+                        ],
+                    }
+                    yield f"data: {json.dumps(openai_chunk)}\n\n"
+        yield "data: [DONE]\n\n"
+    # ---------- non-stream ----------
+    if not stream:
+        async with httpx.AsyncClient(timeout=120) as c:
+            r = await c.post(BYTEZ_CHAT_URL, headers=headers, json=payload)
         try:
+            data = r.json()
+        except json.JSONDecodeError:
+            raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
+        if "choices" not in data:
+            content = data.get("output") or data.get("response") or data.get("message") or str(data)
+            data = {
+                "id": "chatcmpl-proxy",
+                "object": "chat.completion",
+                "choices": [{"index": 0, "message": {"role": "assistant", "content": content}}],
+            }
+        return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
+    return StreamingResponse(
+        v1_event_stream(),
+        media_type="text/event-stream",
+        headers={"Access-Control-Allow-Origin": "*"},
+    )
+# ---------------------------------------------------------------------
+# --------------------------  /v2  ------------------------------------
+# ---------------------------------------------------------------------
+@api.post("/v2/chat/completions")
+async def v2_chat_completions(request: Request, authorization: str = Header(None)):
+    """
+    **v2** – clean OpenAI-compatible streaming.
+    * Only `delta.content` is sent.
+    * The final usage-statistics chunk is stripped.
+    * `[DONE]` is sent exactly once.
+    """
+    check_key(authorization)
+    if not BYTEZ_AUTH_2:
+        raise HTTPException(status_code=500, detail="Server BYTEZ_API_KEY not configured")
+    payload = await request.json()
+    stream = payload.get("stream", False)
+    upstream_headers = {
+        "Authorization": BYTEZ_AUTH_2,
+        "Content-Type": "application/json",
+    }
+    # -----------------------------------------------------------------
+    # Helper: turn any Bytez chunk into a **minimal** OpenAI delta chunk
+    # -----------------------------------------------------------------
+    def make_openai_delta(content: str):
         return {
+            "id": f"chatcmpl-v2-{int(time.time())}",
+            "object": "chat.completion.chunk",
+            "created": int(time.time()),
+            "model": payload.get("model", "unknown"),
             "choices": [
                 {
                     "index": 0,
+                    "delta": {"content": content},
+                    "finish_reason": None,
                 }
+            ],
         }
+    # -----------------------------------------------------------------
+    # Streaming generator – **filters out the final usage object**
+    # -----------------------------------------------------------------
+    async def clean_stream():
+        async with httpx.AsyncClient(timeout=180) as client:
+            async with client.stream(
+                "POST", BYTEZ_CHAT_URL, headers=upstream_headers, json=payload
+            ) as upstream:
+                async for line in upstream.aiter_lines():
+                    line = line.strip()
+                    if not line:
+                        continue
+                    # Bytez may send "data: {...}" or raw JSON
+                    json_str = line[6:] if line.startswith("data: ") else line
+                    # -----------------------------------------------------------------
+                    # 1. Skip the final usage chunk (it contains `usage` field)
+                    # -----------------------------------------------------------------
+                    if "usage" in json_str.lower():
+                        continue
+                    # -----------------------------------------------------------------
+                    # 2. [DONE] signal
+                    # -----------------------------------------------------------------
+                    if json_str == "[DONE]":
+                        yield "data: [DONE]\n\n"
+                        return
+                    try:
+                        chunk = json.loads(json_str)
+                    except json.JSONDecodeError:
+                        continue
+                    # -----------------------------------------------------------------
+                    # 3. Extract the text token
+                    # -----------------------------------------------------------------
+                    text = ""
+                    if isinstance(chunk, dict):
+                        # most common patterns
+                        if "token" in chunk:
+                            text = chunk["token"]
+                        elif "choices" in chunk and chunk["choices"]:
+                            delta = chunk["choices"][0].get("delta", {})
+                            text = delta.get("content", "")
+                        elif "text" in chunk:
+                            text = chunk["text"]
+                        # fallback – stringify whole chunk (rare)
+                        else:
+                            text = str(chunk)
+                    # -----------------------------------------------------------------
+                    # 4. Yield clean OpenAI delta
+                    # -----------------------------------------------------------------
+                    if text:
+                        yield f"data: {json.dumps(make_openai_delta(text))}\n\n"
+        # If upstream never sent [DONE], send it ourselves
+        yield "data: [DONE]\n\n"
+    # -----------------------------------------------------------------
+    # Non-streaming path (identical to v1, but we keep it for completeness)
+    # -----------------------------------------------------------------
+    if not stream:
+        async with httpx.AsyncClient(timeout=120) as c:
+            r = await c.post(BYTEZ_CHAT_URL, headers=upstream_headers, json=payload)
         try:
+            data = r.json()
+        except json.JSONDecodeError:
+            raise HTTPException(status_code=502, detail="Upstream returned invalid JSON")
+        # Normalise to OpenAI shape
+        if "choices" not in data:
+            content = (
+                data.get("output")
+                or data.get("response")
+                or data.get("message")
+                or str(data)
+            )
+            data = {
+                "id": "chatcmpl-v2",
+                "object": "chat.completion",
                 "choices": [
+                    {"index": 0, "message": {"role": "assistant", "content": content}}
+                ],
             }
+        return JSONResponse(data, headers={"Access-Control-Allow-Origin": "*"})
+    # -----------------------------------------------------------------
+    # Return clean SSE stream
+    # -----------------------------------------------------------------
     return StreamingResponse(
+        clean_stream(),
         media_type="text/event-stream",
         headers={"Access-Control-Allow-Origin": "*"},
     )
 # ---------------------------------------------------------------------
 # Minimal Gradio UI (to make HF Space start)