| from fastapi import FastAPI, HTTPException |
| from pydantic import BaseModel |
| import httpx |
| import os |
|
|
| app = FastAPI(title="Private LLM Proxy") |
|
|
| |
| MODEL_SPACE_URL = os.getenv("MODEL_URL", "https://truegleai-deepseek-coder-6b-api.hf.space") |
|
|
| class QueryRequest(BaseModel): |
| prompt: str |
| max_tokens: int = 512 |
|
|
| @app.post("/v1/completions") |
| async def generate_code(request: QueryRequest): |
| """ |
| Your private API endpoint. |
| Format matches common MCP expectations. |
| """ |
| async with httpx.AsyncClient(timeout=30.0) as client: |
| try: |
| |
| response = await client.post( |
| f"{MODEL_SPACE_URL}/run/predict", |
| json={"data": [request.prompt]} |
| ) |
| result = response.json() |
| |
| generated_text = result["data"][0] |
| return {"choices": [{"text": generated_text}]} |
| except httpx.TimeoutException: |
| raise HTTPException(status_code=504, detail="Model server timeout") |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.get("/health") |
| async def health(): |
| return {"status": "ok", "service": "Private LLM Proxy"} |