o87LLM-VM / main.py
truegleai's picture
Update main.py
5c7210c verified
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel
import httpx
import os
app = FastAPI(title="Private LLM Proxy")
# URL of your NEW 6.7B model Space from STEP 1
MODEL_SPACE_URL = os.getenv("MODEL_URL", "https://truegleai-deepseek-coder-6b-api.hf.space") # <<< CHANGE THIS
class QueryRequest(BaseModel):
prompt: str
max_tokens: int = 512
@app.post("/v1/completions")
async def generate_code(request: QueryRequest):
"""
Your private API endpoint.
Format matches common MCP expectations.
"""
async with httpx.AsyncClient(timeout=30.0) as client:
try:
# Call the Gradio Space's API
response = await client.post(
f"{MODEL_SPACE_URL}/run/predict",
json={"data": [request.prompt]}
)
result = response.json()
# Extract the generated text from Gradio's response
generated_text = result["data"][0]
return {"choices": [{"text": generated_text}]}
except httpx.TimeoutException:
raise HTTPException(status_code=504, detail="Model server timeout")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
@app.get("/health")
async def health():
return {"status": "ok", "service": "Private LLM Proxy"}