Deepseek_Test

Paused

App Files Files Community

Hivra commited on Apr 27, 2025

Commit

6d886e2

verified ·

1 Parent(s): 6ab9ff8

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +92 -33

app/main.py CHANGED Viewed

@@ -1,33 +1,92 @@
-from fastapi import FastAPI
-from pydantic import BaseModel
-from gradio_client import Client
-# Configure your Gradio Space ID and default endpoint
-SPACE_ID = "prithivMLmods/SAMBANOVA"
-DEFAULT_API = "/chat"
-client = Client(SPACE_ID)
-def chat_with_gradio(message: str, api_name: str = DEFAULT_API):
-    """
-    Send a chat message to the Gradio API and return the response.
-    """
-    try:
-        return client.predict(message=message, api_name=api_name)
-    except Exception as e:
-        return {"error": str(e)}
-app = FastAPI()
-class ChatRequest(BaseModel):
-    message: str
-    api_name: str = DEFAULT_API
-@app.post("/chat")
-async def chat_endpoint(req: ChatRequest):
-    return {"reply": chat_with_gradio(req.message, req.api_name)}
-if __name__ == "__main__":
-    import uvicorn
-    print(f"Starting server on http://0.0.0.0:7860 using {SPACE_ID}{DEFAULT_API}")
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import StreamingResponse, JSONResponse
+from pydantic import BaseModel
+from gradio_client import Client
+import time
+import json
+# Configure your Gradio Space ID and default endpoint
+SPACE_ID = "prithivMLmods/SAMBANOVA"
+DEFAULT_API = "/chat"
+client = Client(SPACE_ID)
+def chat_with_gradio(message: str, api_name: str = DEFAULT_API):
+    """
+    Send a chat message to the Gradio API and return the response.
+    """
+    try:
+        return client.predict(message=message, api_name=api_name)
+    except Exception as e:
+        raise RuntimeError(f"Gradio API error: {e}")
+class ChatRequest(BaseModel):
+    message: str
+    api_name: str = DEFAULT_API
+app = FastAPI()
+@app.post("/chat")
+async def chat_endpoint(req: ChatRequest):
+    """Forward chat requests to the Gradio API."""
+    try:
+        reply = chat_with_gradio(req.message, req.api_name)
+        return {"reply": reply}
+    except RuntimeError as e:
+        raise HTTPException(status_code=502, detail=str(e))
+@app.post("/v1/chat/completions")
+async def openai_chat_completions(request: Request):
+    """
+    OpenAI-compatible chat completions endpoint that forwards to Gradio.
+    Supports both streaming and non-streaming.
+    """
+    body = await request.json()
+    messages = body.get("messages")
+    model = body.get("model")
+    stream = body.get("stream", False)
+    if not messages or not isinstance(messages, list):
+        raise HTTPException(status_code=400, detail="`messages` must be a list of dicts.")
+    user_msg = messages[-1].get("content", "")
+    # Call Gradio
+    try:
+        reply = chat_with_gradio(user_msg, DEFAULT_API)
+    except RuntimeError as e:
+        raise HTTPException(status_code=502, detail=str(e))
+    # Build usage (simple token count by words)
+    prompt_tokens = sum(len(m.get("content", "").split()) for m in messages)
+    completion_tokens = len(str(reply).split())
+    usage = {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": prompt_tokens + completion_tokens}
+    if stream:
+        # Stream word by word as OpenAI SSE
+        def event_generator():
+            for word in str(reply).split():
+                chunk = {"choices": [{"delta": {"content": word+" "}, "index": 0, "finish_reason": None}]}
+                yield f"data: {json.dumps(chunk)}\n\n"
+                time.sleep(0.05)
+            # send done
+            done = {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}
+            yield f"data: {json.dumps(done)}\n\n"
+        return StreamingResponse(event_generator(), media_type="text/event-stream")
+    else:
+        response = {
+            "id": f"chatcmpl-{int(time.time())}",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": model,
+            "choices": [{"index": 0, "message": {"role": "assistant", "content": reply}, "finish_reason": "stop"}],
+            "usage": usage
+        }
+        return JSONResponse(response)
+if __name__ == "__main__":
+    import uvicorn
+    print(f"Starting server on http://0.0.0.0:7860 using {SPACE_ID}{DEFAULT_API} and OpenAI-compatible endpoint /v1/chat/completions")
+    uvicorn.run(app, host="0.0.0.0", port=7860)