truegleai commited on
Commit
36ffa96
·
0 Parent(s):

Deploy FastAPI proxy for LLM

Browse files
Files changed (4) hide show
  1. Dockerfile +6 -0
  2. README.md +12 -0
  3. main.py +40 -0
  4. requirements.txt +4 -0
Dockerfile ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim
2
+ WORKDIR /app
3
+ COPY requirements.txt .
4
+ RUN pip install --no-cache-dir -r requirements.txt
5
+ COPY . .
6
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "7860"]
README.md ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ title: LLM Proxy API
3
+ emoji: 🔌
4
+ colorFrom: green
5
+ colorTo: blue
6
+ sdk: docker
7
+ pinned: false
8
+ license: apache-2.0
9
+ ---
10
+
11
+ # LLM Proxy API
12
+ FastAPI proxy service for the CodeLlama model space.
main.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException
2
+ from pydantic import BaseModel
3
+ import httpx
4
+ import asyncio
5
+
6
+ app = FastAPI(title="LLM Proxy API")
7
+ MODEL_SPACE_URL = "https://truegleai-deepseek-coder-api.hf.space"
8
+
9
+ class QueryRequest(BaseModel):
10
+ prompt: str
11
+ max_length: int = 256
12
+
13
+ @app.post("/query")
14
+ async def query_model(request: QueryRequest):
15
+ """Proxy requests to the Model Space."""
16
+ async with httpx.AsyncClient(timeout=120.0) as client:
17
+ try:
18
+ response = await client.post(
19
+ f"${MODEL_SPACE_URL}/run/predict",
20
+ json={"data": [request.prompt, request.max_length]}
21
+ )
22
+ if response.status_code == 200:
23
+ result = response.json()
24
+ return {"response": result.get("data", ["No response"])[0]}
25
+ else:
26
+ raise HTTPException(
27
+ status_code=response.status_code,
28
+ detail=f"Model Space error: {response.text[:200]}"
29
+ )
30
+ except httpx.TimeoutException:
31
+ raise HTTPException(
32
+ status_code=504,
33
+ detail="Model Space timeout (likely waking up). Try again in 30s."
34
+ )
35
+ except Exception as e:
36
+ raise HTTPException(status_code=500, detail=str(e))
37
+
38
+ @app.get("/health")
39
+ async def health():
40
+ return {"status": "active", "service": "LLM Proxy"}
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ fastapi[standard]
2
+ uvicorn
3
+ httpx
4
+ pydantic