Deepseek_Test

Paused

App Files Files Community

Hivra commited on Apr 27, 2025

Commit

072a239

verified ·

1 Parent(s): d5f83a2

Update app/main.py

Browse files

Files changed (1) hide show

app/main.py +26 -77

app/main.py CHANGED Viewed

@@ -1,100 +1,51 @@
-import os
-from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException, Request, Depends, Header
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
-from gradio_client import Client, utils
-import httpx
 import time
 import json
-# Load environment variables
-load_dotenv()
-# Configuration
-SPACE_ID = os.getenv("SPACE_ID", "prithivMLmods/SAMBANOVA")
-DEFAULT_API = os.getenv("DEFAULT_API", "/chat")
-GRADIO_TIMEOUT = int(os.getenv("GRADIO_TIMEOUT", "60"))
-API_KEY = os.getenv("API_KEY")
-if not API_KEY:
-    raise RuntimeError("Missing API_KEY in environment")
-# Lazy Gradio client initialization
-global_client = None
-def get_gradio_client():
-    """Initialize or return cached Gradio client, retrying on rate limits or timeouts."""
-    global global_client
-    if global_client:
-        return global_client
-    # Try up to 3 times with exponential backoff
-    for attempt in range(3):
-        try:
-            client = Client(SPACE_ID)
-            # set HTTPX timeouts (connect quick, allow longer reads)
-            client.client.timeout = httpx.Timeout(connect=5.0, read=GRADIO_TIMEOUT)
-            global_client = client
-            return client
-        except utils.TooManyRequestsError:
-            if attempt < 2:
-                time.sleep(2 ** attempt)
-                continue
-            raise RuntimeError("Gradio API config rate-limited. Please try again later.")
-        except Exception as e:
-            msg = str(e)
-            if "ReadTimeout" in msg and attempt < 2:
-                # retry on read timeouts
-                time.sleep(2 ** attempt)
-                continue
-            raise RuntimeError(f"Failed to initialize Gradio client: {e}")
-    except utils.TooManyRequestsError:
-        raise RuntimeError("Gradio API config rate-limited. Please try again later.")
-    except Exception as e:
-        raise RuntimeError(f"Failed to initialize Gradio client: {e}")
 def chat_with_gradio(message: str, api_name: str = DEFAULT_API):
-    client = get_gradio_client()
     try:
         return client.predict(message=message, api_name=api_name)
     except Exception as e:
-        msg = str(e)
-        if "ReadTimeout" in msg:
-            raise RuntimeError(f"Gradio API timed out after {GRADIO_TIMEOUT}s")
         raise RuntimeError(f"Gradio API error: {e}")
-def verify_api_key(
-    x_api_key: str = Header(None),
-    authorization: str = Header(None)
-):
-    """Accepts either X-API-Key or Authorization: Bearer <key>"""
-    token = x_api_key
-    if not token and authorization:
-        scheme, _, cred = authorization.partition(' ')
-        if scheme.lower() == 'bearer':
-            token = cred
-    if token != API_KEY:
-        raise HTTPException(status_code=401, detail="Invalid or missing API Key")
 class ChatRequest(BaseModel):
     message: str
     api_name: str = DEFAULT_API
 app = FastAPI()
-@app.post("/chat", dependencies=[Depends(verify_api_key)])
 async def chat_endpoint(req: ChatRequest):
     try:
         reply = chat_with_gradio(req.message, req.api_name)
         return {"reply": reply}
     except RuntimeError as e:
         raise HTTPException(status_code=502, detail=str(e))
-@app.post("/v1/chat/completions", dependencies=[Depends(verify_api_key)])
 async def openai_chat_completions(request: Request):
     body = await request.json()
     messages = body.get("messages")
     stream = body.get("stream", False)
     if not messages or not isinstance(messages, list):
@@ -102,25 +53,25 @@ async def openai_chat_completions(request: Request):
     user_msg = messages[-1].get("content", "")
     try:
         reply = chat_with_gradio(user_msg, DEFAULT_API)
     except RuntimeError as e:
         raise HTTPException(status_code=502, detail=str(e))
     prompt_tokens = sum(len(m.get("content", "").split()) for m in messages)
     completion_tokens = len(str(reply).split())
-    usage = {
-        "prompt_tokens": prompt_tokens,
-        "completion_tokens": completion_tokens,
-        "total_tokens": prompt_tokens + completion_tokens
-    }
     if stream:
         def event_generator():
             for word in str(reply).split():
-                chunk = {"choices": [{"delta": {"content": word + " "}, "index": 0, "finish_reason": None}]}
                 yield f"data: {json.dumps(chunk)}\n\n"
                 time.sleep(0.05)
             done = {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}
             yield f"data: {json.dumps(done)}\n\n"
         return StreamingResponse(event_generator(), media_type="text/event-stream")
@@ -129,7 +80,7 @@ async def openai_chat_completions(request: Request):
             "id": f"chatcmpl-{int(time.time())}",
             "object": "chat.completion",
             "created": int(time.time()),
-            "model": body.get("model"),
             "choices": [{"index": 0, "message": {"role": "assistant", "content": reply}, "finish_reason": "stop"}],
             "usage": usage
         }
@@ -137,7 +88,5 @@ async def openai_chat_completions(request: Request):
 if __name__ == "__main__":
     import uvicorn
-    print(
-        f"Starting server on http://0.0.0.0:7860 using Space {SPACE_ID}{DEFAULT_API}"
-    )
-    uvicorn.run(app, host="0.0.0.0", port=7860)

+from fastapi import FastAPI, HTTPException, Request
 from fastapi.responses import StreamingResponse, JSONResponse
 from pydantic import BaseModel
+from gradio_client import Client
 import time
 import json
+# Configure your Gradio Space ID and default endpoint
+SPACE_ID = "prithivMLmods/SAMBANOVA"
+DEFAULT_API = "/chat"
+client = Client(SPACE_ID)
 def chat_with_gradio(message: str, api_name: str = DEFAULT_API):
+    """
+    Send a chat message to the Gradio API and return the response.
+    """
     try:
         return client.predict(message=message, api_name=api_name)
     except Exception as e:
         raise RuntimeError(f"Gradio API error: {e}")
 class ChatRequest(BaseModel):
     message: str
     api_name: str = DEFAULT_API
 app = FastAPI()
+@app.post("/chat")
 async def chat_endpoint(req: ChatRequest):
+    """Forward chat requests to the Gradio API."""
     try:
         reply = chat_with_gradio(req.message, req.api_name)
         return {"reply": reply}
     except RuntimeError as e:
         raise HTTPException(status_code=502, detail=str(e))
+@app.post("/v1/chat/completions")
 async def openai_chat_completions(request: Request):
+    """
+    OpenAI-compatible chat completions endpoint that forwards to Gradio.
+    Supports both streaming and non-streaming.
+    """
     body = await request.json()
     messages = body.get("messages")
+    model = body.get("model")
     stream = body.get("stream", False)
     if not messages or not isinstance(messages, list):
     user_msg = messages[-1].get("content", "")
+    # Call Gradio
     try:
         reply = chat_with_gradio(user_msg, DEFAULT_API)
     except RuntimeError as e:
         raise HTTPException(status_code=502, detail=str(e))
+    # Build usage (simple token count by words)
     prompt_tokens = sum(len(m.get("content", "").split()) for m in messages)
     completion_tokens = len(str(reply).split())
+    usage = {"prompt_tokens": prompt_tokens, "completion_tokens": completion_tokens, "total_tokens": prompt_tokens + completion_tokens}
     if stream:
+        # Stream word by word as OpenAI SSE
         def event_generator():
             for word in str(reply).split():
+                chunk = {"choices": [{"delta": {"content": word+" "}, "index": 0, "finish_reason": None}]}
                 yield f"data: {json.dumps(chunk)}\n\n"
                 time.sleep(0.05)
+            # send done
             done = {"choices": [{"delta": {}, "index": 0, "finish_reason": "stop"}]}
             yield f"data: {json.dumps(done)}\n\n"
         return StreamingResponse(event_generator(), media_type="text/event-stream")
             "id": f"chatcmpl-{int(time.time())}",
             "object": "chat.completion",
             "created": int(time.time()),
+            "model": model,
             "choices": [{"index": 0, "message": {"role": "assistant", "content": reply}, "finish_reason": "stop"}],
             "usage": usage
         }
 if __name__ == "__main__":
     import uvicorn
+    print(f"Starting server on http://0.0.0.0:7860 using {SPACE_ID}{DEFAULT_API} and OpenAI-compatible endpoint /v1/chat/completions")
+    uvicorn.run(app, host="0.0.0.0", port=7860)