Spaces:
Sleeping
Sleeping
File size: 2,761 Bytes
a012807 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 | import os
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from typing import List
import httpx
import logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Get HF token from Space Secrets
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
raise RuntimeError("HF_TOKEN environment variable not set")
app = FastAPI(title="HF Inference Proxy")
# Enable CORS for Unity
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
class Message(BaseModel):
role: str
content: str
class InferenceRequest(BaseModel):
model: str
temperature: float
messages: List[Message]
max_tokens: int = 512
@app.get("/")
async def root():
return {
"status": "running",
"message": "HF Inference Proxy Active",
"endpoints": {
"extract": "POST /extract"
}
}
@app.post("/extract")
async def proxy_inference(request: InferenceRequest):
try:
logger.info(f"Request: {request.model}, {len(request.messages)} messages")
async with httpx.AsyncClient(timeout=120.0) as client:
hf_response = await client.post(
"https://router.huggingface.co/v1/chat/completions",
json={
"model": f"{request.model}:fastest",
"temperature": request.temperature,
"max_tokens": request.max_tokens,
"messages": [
{"role": msg.role, "content": msg.content}
for msg in request.messages
]
},
headers={
"Authorization": f"Bearer {HF_TOKEN}",
"Content-Type": "application/json"
}
)
if hf_response.status_code != 200:
logger.error(f"HF API error: {hf_response.text}")
raise HTTPException(
status_code=hf_response.status_code,
detail=hf_response.text
)
result = hf_response.json()
logger.info("Request successful")
return result
except httpx.HTTPError as e:
logger.error(f"HTTP error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Proxy error: {str(e)}")
except Exception as e:
logger.error(f"Error: {str(e)}")
raise HTTPException(status_code=500, detail=f"Server error: {str(e)}")
@app.get("/health")
async def health():
return {"status": "healthy", "hf_configured": bool(HF_TOKEN)} |