from llama_cpp.server.app import create_app, Settings from fastapi.middleware.cors import CORSMiddleware from fastapi import Request import os import time # Configuration from environment variables model_path = os.environ.get("MODEL_FILE", "./model.gguf") n_ctx = int(os.environ.get("N_CTX", 4096)) n_batch = int(os.environ.get("N_BATCH", 512)) settings = Settings( model=model_path, n_ctx=n_ctx, n_batch=n_batch ) app = create_app(settings=settings) # DEBUG MIDDLEWARE to catch 400s and see paths @app.middleware("http") async def log_requests(request: Request, call_next): start_time = time.time() path = request.url.path method = request.method print(f"DEBUG: Incoming {method} {path}") # Check if we are receiving the request correctly response = await call_next(request) process_time = time.time() - start_time print(f"DEBUG: {method} {path} - Status: {response.status_code} - Time: {process_time:.4f}s") return response # Standard CORS Middleware app.add_middleware( CORSMiddleware, allow_origins=["*"], allow_credentials=True, allow_methods=["*"], allow_headers=["*"], ) if __name__ == "__main__": import uvicorn host = os.environ.get("HOST", "0.0.0.0") port = int(os.environ.get("PORT", 7860)) print(f"Starting Intelligent Node with DEBUG LOGS & CORS on {host}:{port}") uvicorn.run(app, host=host, port=port)