Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

Inference-API / main /main.py

AurelioAguirre

Added resoruces folder for configs

24b4bfe 12 months ago

raw

history blame

2.69 kB

	"""
	LLM Inference Server main application using LitServe framework.
	"""
	import litserve as ls
	import logging
	import os
	from fastapi.middleware.cors import CORSMiddleware
	from huggingface_hub import login
	from .routes import router, init_router
	from .api import InferenceApi
	from .utils import load_config

	# Store process list globally so it doesn't get garbage collected
	_WORKER_PROCESSES = []
	_MANAGER = None

	# Load configuration
	config = load_config()


	def setup_logging():
	"""Set up basic logging configuration"""
	logging.basicConfig(
	level=logging.INFO,
	format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
	)
	return logging.getLogger(__name__)


	def create_app():
	"""Create and configure the application instance."""
	global _WORKER_PROCESSES, _MANAGER, config

	logger = setup_logging()

	# Log into Hugging Face Hub
	access_token = os.environ.get("InfAPITokenWrite")
	if access_token:
	try:
	login(token=access_token)
	logger.info("Successfully logged into Hugging Face Hub")
	except Exception as e:
	logger.error(f"Failed to login to Hugging Face Hub: {str(e)}")
	else:
	logger.warning("No Hugging Face access token found")

	server_config = config.get('server', {})

	# Initialize API with config
	api = InferenceApi(config)

	# Initialize router with API instance
	init_router(api, config)

	# Create LitServer instance
	server = ls.LitServer(
	api,
	timeout=server_config.get('timeout', 60),
	max_batch_size=server_config.get('max_batch_size', 1),
	track_requests=True
	)

	# Launch inference workers (assuming single uvicorn worker for now)
	_MANAGER, _WORKER_PROCESSES = server.launch_inference_worker(num_uvicorn_servers=1)

	# Get the FastAPI app
	app = server.app

	# Add CORS middleware
	app.add_middleware(
	CORSMiddleware,
	allow_origins=["*"],
	allow_credentials=True,
	allow_methods=["*"],
	allow_headers=["*"],
	)

	# Add routes with configured prefix
	api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
	app.include_router(router, prefix=api_prefix)

	# Set the response queue ID for the app
	app.response_queue_id = 0 # Since we're using a single worker

	return app

	# Create the app instance for uvicorn
	app = create_app()

	if __name__ == "__main__":
	# Run the app with uvicorn
	import uvicorn
	host = config["server"]["host"]
	port = config["server"]["port"]
	uvicorn.run(
	app,
	host=host,
	port=port,
	log_level=config["logging"]["level"].lower()
	)