Spaces:
Sleeping
Sleeping
| import os | |
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from pydantic import BaseModel | |
| from typing import List | |
| import httpx | |
| import logging | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| # Get HF token from Space Secrets | |
| HF_TOKEN = os.environ.get("HF_TOKEN") | |
| if not HF_TOKEN: | |
| raise RuntimeError("HF_TOKEN environment variable not set") | |
| app = FastAPI(title="HF Inference Proxy") | |
| # Enable CORS for Unity | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| class Message(BaseModel): | |
| role: str | |
| content: str | |
| class InferenceRequest(BaseModel): | |
| model: str | |
| temperature: float | |
| messages: List[Message] | |
| max_tokens: int = 512 | |
| async def root(): | |
| return { | |
| "status": "running", | |
| "message": "HF Inference Proxy Active", | |
| "endpoints": { | |
| "extract": "POST /extract" | |
| } | |
| } | |
| async def proxy_inference(request: InferenceRequest): | |
| try: | |
| logger.info(f"Request: {request.model}, {len(request.messages)} messages") | |
| async with httpx.AsyncClient(timeout=120.0) as client: | |
| hf_response = await client.post( | |
| "https://router.huggingface.co/v1/chat/completions", | |
| json={ | |
| "model": f"{request.model}:fastest", | |
| "temperature": request.temperature, | |
| "max_tokens": request.max_tokens, | |
| "messages": [ | |
| {"role": msg.role, "content": msg.content} | |
| for msg in request.messages | |
| ] | |
| }, | |
| headers={ | |
| "Authorization": f"Bearer {HF_TOKEN}", | |
| "Content-Type": "application/json" | |
| } | |
| ) | |
| if hf_response.status_code != 200: | |
| logger.error(f"HF API error: {hf_response.text}") | |
| raise HTTPException( | |
| status_code=hf_response.status_code, | |
| detail=hf_response.text | |
| ) | |
| result = hf_response.json() | |
| logger.info("Request successful") | |
| return result | |
| except httpx.HTTPError as e: | |
| logger.error(f"HTTP error: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Proxy error: {str(e)}") | |
| except Exception as e: | |
| logger.error(f"Error: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Server error: {str(e)}") | |
| async def health(): | |
| return {"status": "healthy", "hf_configured": bool(HF_TOKEN)} |