Spaces:

truegleai
/

o87LLM-VM

Sleeping

truegleai commited on Feb 1

Commit

36ffa96

0 Parent(s):

Deploy FastAPI proxy for LLM

Files changed (4) hide show

Dockerfile ADDED Viewed

+FROM python:3.10-slim
+WORKDIR /app
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+COPY . .
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]

README.md ADDED Viewed

+---
+title: LLM Proxy API
+emoji: 🔌
+colorFrom: green
+colorTo: blue
+sdk: docker
+pinned: false
+license: apache-2.0
+---
+# LLM Proxy API
+FastAPI proxy service for the CodeLlama model space.

main.py ADDED Viewed

+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import httpx
+import asyncio
+app = FastAPI(title="LLM Proxy API")
+MODEL_SPACE_URL = "https://truegleai-deepseek-coder-api.hf.space"
+class QueryRequest(BaseModel):
+    prompt: str
+    max_length: int = 256
+@app.post("/query")
+async def query_model(request: QueryRequest):
+    """Proxy requests to the Model Space."""
+    async with httpx.AsyncClient(timeout=120.0) as client:
+        try:
+            response = await client.post(
+                f"${MODEL_SPACE_URL}/run/predict",
+                json={"data": [request.prompt, request.max_length]}
+            )
+            if response.status_code == 200:
+                result = response.json()
+                return {"response": result.get("data", ["No response"])[0]}
+            else:
+                raise HTTPException(
+                    status_code=response.status_code,
+                    detail=f"Model Space error: {response.text[:200]}"
+                )
+        except httpx.TimeoutException:
+            raise HTTPException(
+                status_code=504,
+                detail="Model Space timeout (likely waking up). Try again in 30s."
+            )
+        except Exception as e:
+            raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+async def health():
+    return {"status": "active", "service": "LLM Proxy"}

requirements.txt ADDED Viewed

+fastapi[standard]
+uvicorn
+httpx
+pydantic