"""Main FastAPI application for Hugging Face inference API.""" import logging from contextlib import asynccontextmanager from fastapi import FastAPI, HTTPException from .config import get_settings from .inference import InferenceEngine from .models import HealthResponse, InferenceRequest, InferenceResponse logging.basicConfig( level=logging.INFO, format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", ) logger = logging.getLogger(__name__) settings = get_settings() engine = InferenceEngine(settings) @asynccontextmanager async def lifespan(app: FastAPI): """Handle application startup and shutdown.""" logger.info("Starting inference API...") engine.load_model() yield logger.info("Shutting down inference API...") app = FastAPI( title="Hugging Face Inference API", description="REST API for Hugging Face model inference", version="1.0.0", lifespan=lifespan, ) @app.get("/health", response_model=HealthResponse) async def health_check() -> HealthResponse: """Check API and model health status.""" return HealthResponse( status="ok", model_loaded=engine.model_loaded, model_name=settings.model_name if engine.model_loaded else None, ) @app.post("/predict", response_model=InferenceResponse) async def predict(request: InferenceRequest) -> InferenceResponse: """Run inference on the provided input(s).""" if not engine.model_loaded: raise HTTPException(status_code=503, detail="Model not loaded") try: predictions = engine.predict(request.inputs, request.parameters) return InferenceResponse( predictions=predictions, model_name=settings.model_name, ) except Exception as e: logger.exception("Inference failed") raise HTTPException(status_code=500, detail=str(e)) @app.get("/") async def root(): """Root endpoint with API information.""" return { "name": "Hugging Face Inference API", "version": "1.0.0", "model": settings.model_name, "task": settings.task, "docs": "/docs", } if __name__ == "__main__": import uvicorn uvicorn.run( "app.main:app", host=settings.host, port=settings.port, reload=True, )