Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

AurelioAguirre commited on Jan 10, 2025

Commit

c6b21e3

1 Parent(s): 1bcc710

changed to uvicorn setup for HF

Browse files

Files changed (2) hide show

Dockerfile +1 -3
main/main.py +32 -14

Dockerfile CHANGED Viewed

@@ -17,6 +17,4 @@ COPY --chown=user main/ /app/main
 EXPOSE 7860
 # Command to run the application
-#CMD ["uvicorn", "main.main:app", "--host", "0.0.0.0", "--port", "7860"]
-CMD ["python", "-m", "main.main"]

 EXPOSE 7860
 # Command to run the application
+CMD ["uvicorn", "main.main:app", "--host", "0.0.0.0", "--port", "7860"]

main/main.py CHANGED Viewed

@@ -6,6 +6,7 @@ import yaml
 import logging
 import asyncio
 from pathlib import Path
 from fastapi.middleware.cors import CORSMiddleware
 from .routes import router, init_router
 from .api import InferenceApi
@@ -24,8 +25,8 @@ def load_config():
     with open(config_path) as f:
         return yaml.safe_load(f)
-async def async_main():
-    """Create and configure the application instance asynchronously."""
     logger = setup_logging()
     try:
@@ -36,7 +37,7 @@ async def async_main():
         # Initialize API with config
         api = InferenceApi(config)
-        # Initialize router with the already setup API instance
         await init_router(api)
         # Create LitServer instance with config
@@ -47,8 +48,11 @@ async def async_main():
             track_requests=True
         )
         # Add CORS middleware
-        server.app.add_middleware(
             CORSMiddleware,
             allow_origins=["*"],
             allow_credentials=True,
@@ -58,22 +62,36 @@ async def async_main():
         # Add routes with configured prefix
         api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
-        server.app.include_router(router, prefix=api_prefix)
-        # Get configured port
-        port = server_config.get('port', 8001)
-        host = server_config.get('host', '0.0.0.0')
-        # Run server
-        server.run(host=host, port=port)
     except Exception as e:
-        logger.error(f"Server initialization failed: {str(e)}")
         raise
 def main():
-    """Entry point that runs the async main"""
-    asyncio.run(async_main())
 if __name__ == "__main__":
     main()

 import logging
 import asyncio
 from pathlib import Path
+from fastapi import FastAPI
 from fastapi.middleware.cors import CORSMiddleware
 from .routes import router, init_router
 from .api import InferenceApi
     with open(config_path) as f:
         return yaml.safe_load(f)
+async def init_app() -> tuple[FastAPI, InferenceApi, dict]:
+    """Initialize and configure the FastAPI application."""
     logger = setup_logging()
     try:
         # Initialize API with config
         api = InferenceApi(config)
+        # Initialize router with the API instance
         await init_router(api)
         # Create LitServer instance with config
             track_requests=True
         )
+        # Get the FastAPI app from the LitServer
+        app = server.app
         # Add CORS middleware
+        app.add_middleware(
             CORSMiddleware,
             allow_origins=["*"],
             allow_credentials=True,
         # Add routes with configured prefix
         api_prefix = config.get('llm_server', {}).get('api_prefix', '/api/v1')
+        app.include_router(router, prefix=api_prefix)
+        return app, api, config
     except Exception as e:
+        logger.error(f"Application initialization failed: {str(e)}")
         raise
+# Create the FastAPI app instance for uvicorn
+app, api_instance, config_dict = asyncio.get_event_loop().run_until_complete(init_app())
+async def run_server():
+    """Run the server directly (not through uvicorn)"""
+    server_config = config_dict.get('server', {})
+    port = server_config.get('port', 8001)
+    host = server_config.get('host', '0.0.0.0')
+    # Create LitServer instance with all required parameters
+    server = ls.LitServer(
+        api_instance,
+        timeout=server_config.get('timeout', 60),
+        max_batch_size=server_config.get('max_batch_size', 1),
+        track_requests=True
+    )
+    server.run(host=host, port=port)
 def main():
+    """Entry point that runs the server directly"""
+    asyncio.run(run_server())
 if __name__ == "__main__":
     main()