llama-3.2-api-backend / server_root.py
zenotaiz's picture
Fixed: Transitioned to native python model downloader for stability
dc9a9af verified
from llama_cpp.server.app import create_app, Settings
from fastapi.middleware.cors import CORSMiddleware
from fastapi import Request
import os
import time
# Configuration from environment variables
model_path = os.environ.get("MODEL_FILE", "./model.gguf")
n_ctx = int(os.environ.get("N_CTX", 4096))
n_batch = int(os.environ.get("N_BATCH", 512))
settings = Settings(
model=model_path,
n_ctx=n_ctx,
n_batch=n_batch
)
app = create_app(settings=settings)
# DEBUG MIDDLEWARE to catch 400s and see paths
@app.middleware("http")
async def log_requests(request: Request, call_next):
start_time = time.time()
path = request.url.path
method = request.method
print(f"DEBUG: Incoming {method} {path}")
# Check if we are receiving the request correctly
response = await call_next(request)
process_time = time.time() - start_time
print(f"DEBUG: {method} {path} - Status: {response.status_code} - Time: {process_time:.4f}s")
return response
# Standard CORS Middleware
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
if __name__ == "__main__":
import uvicorn
host = os.environ.get("HOST", "0.0.0.0")
port = int(os.environ.get("PORT", 7860))
print(f"Starting Intelligent Node with DEBUG LOGS & CORS on {host}:{port}")
uvicorn.run(app, host=host, port=port)