Spaces:
Runtime error
Runtime error
| """ | |
| LLM Inference Server main application using LitServe framework. | |
| """ | |
| import litserve as ls | |
| import logging | |
| import os | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from huggingface_hub import login | |
| from .routes import router, init_router | |
| from .api import InferenceApi | |
| from .utils import load_config | |
| # Store process list globally so it doesn't get garbage collected | |
| _WORKER_PROCESSES = [] | |
| _MANAGER = None | |
| # Load configuration | |
| config = load_config() | |
| def setup_logging(): | |
| """Set up basic logging configuration""" | |
| logging.basicConfig( | |
| level=logging.INFO, | |
| format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' | |
| ) | |
| return logging.getLogger(__name__) | |
| def create_app(): | |
| """Create and configure the application instance.""" | |
| global _WORKER_PROCESSES, _MANAGER, config | |
| logger = setup_logging() | |
| # Log into Hugging Face Hub | |
| access_token = os.environ.get("InfAPITokenWrite") | |
| if access_token: | |
| try: | |
| login(token=access_token) | |
| logger.info("Successfully logged into Hugging Face Hub") | |
| except Exception as e: | |
| logger.error(f"Failed to login to Hugging Face Hub: {str(e)}") | |
| else: | |
| logger.warning("No Hugging Face access token found") | |
| server_config = config.get('server', {}) | |
| # Initialize API with config | |
| api = InferenceApi(config) | |
| # Initialize router with API instance | |
| init_router(api, config) | |
| # Create LitServer instance | |
| server = ls.LitServer( | |
| api, | |
| timeout=server_config.get('timeout', 60), | |
| max_batch_size=server_config.get('max_batch_size', 1), | |
| track_requests=True | |
| ) | |
| # Launch inference workers (assuming single uvicorn worker for now) | |
| _MANAGER, _WORKER_PROCESSES = server.launch_inference_worker(num_uvicorn_servers=1) | |
| # Get the FastAPI app | |
| app = server.app | |
| # Add CORS middleware | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| # Add routes with configured prefix | |
| api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1') | |
| app.include_router(router, prefix=api_prefix) | |
| # Set the response queue ID for the app | |
| app.response_queue_id = 0 # Since we're using a single worker | |
| return app | |
| # Create the app instance for uvicorn | |
| app = create_app() | |
| if __name__ == "__main__": | |
| # Run the app with uvicorn | |
| import uvicorn | |
| host = config["server"]["host"] | |
| port = config["server"]["port"] | |
| uvicorn.run( | |
| app, | |
| host=host, | |
| port=port, | |
| log_level=config["logging"]["level"].lower() | |
| ) |