Spaces:
Running
Running
| """ | |
| Knowledge Universe API - Main FastAPI Application | |
| """ | |
| import asyncio | |
| import json | |
| import logging | |
| import os | |
| from contextlib import asynccontextmanager | |
| from fastapi import FastAPI, Request | |
| from fastapi.middleware.cors import CORSMiddleware | |
| from fastapi.responses import JSONResponse, FileResponse | |
| from fastapi.exceptions import RequestValidationError | |
| from fastapi.staticfiles import StaticFiles | |
| from prometheus_client import make_asgi_app | |
| from config.settings import get_settings | |
| from src.api.routes import router as block2_router | |
| from src.api.models import SourceFormat | |
| from src.api.middleware import RateLimitMiddleware, MetricsMiddleware | |
| from src.cache.redis_manager import RedisManager | |
| from src.cache.eviction_manager import EvictionManager | |
| from src.orchestration.background_tasks import BackgroundTaskManager | |
| settings = get_settings() | |
| logger = logging.getLogger(__name__) | |
| # ------------------------------------------------------------------ | |
| # 🔒 REQUEST SANITIZATION MIDDLEWARE (CRITICAL FIX) | |
| # ------------------------------------------------------------------ | |
| class RequestSanitizerMiddleware: | |
| """ | |
| Sanitizes incoming JSON before Pydantic validation. | |
| Fixes enum pollution (formats causing 422). | |
| """ | |
| def __init__(self, app: FastAPI): | |
| self.app = app | |
| async def __call__(self, scope, receive, send): | |
| if scope["type"] != "http": | |
| await self.app(scope, receive, send) | |
| return | |
| request = Request(scope, receive=receive) | |
| if request.method == "POST" and request.url.path.endswith("/discover"): | |
| try: | |
| body = await request.json() | |
| # Sanitize formats — strip invalid enum values with a warning | |
| if "formats" in body and isinstance(body["formats"], list): | |
| cleaned = [] | |
| for f in body["formats"]: | |
| try: | |
| cleaned.append(SourceFormat(f)) | |
| except Exception: | |
| logger.warning(f"Dropped invalid format value: '{f}'") | |
| body["formats"] = cleaned | |
| # Re-inject sanitized body | |
| async def receive_override(): | |
| return { | |
| "type": "http.request", | |
| "body": json.dumps(body).encode(), | |
| "more_body": False, | |
| } | |
| await self.app(scope, receive_override, send) | |
| return | |
| except Exception as e: | |
| # ── BUG 6 FIX ────────────────────────────────────────────── | |
| # GET requests have no body. Trying request.json() on them | |
| # always raises "Expecting value: line 1 column 1 (char 0)". | |
| # This is normal — not an error. Log at DEBUG not ERROR. | |
| # Only log as WARNING if this is genuinely a POST with bad JSON. | |
| if request.method == "POST": | |
| logger.warning(f"Malformed JSON on POST {request.url.path}: {e}") | |
| else: | |
| logger.debug(f"Sanitizer skipped non-POST: {e}") | |
| await self.app(scope, receive, send) | |
| # ------------------------------------------------------------------ | |
| # LIFESPAN | |
| # ------------------------------------------------------------------ | |
| async def lifespan(app: FastAPI): | |
| logger.info("Starting Knowledge Universe API...") | |
| # ✅ ADD THIS DIAGNOSTIC | |
| logger.info("=" * 60) | |
| logger.info("ENVIRONMENT DIAGNOSTIC") | |
| logger.info(f"GITHUB_TOKEN: {'SET ✓' if os.getenv('GITHUB_TOKEN') else 'MISSING ✗'}") | |
| logger.info(f"KAGGLE_USERNAME: {'SET ✓' if os.getenv('KAGGLE_USERNAME') else 'MISSING ✗'}") | |
| logger.info(f"KAGGLE_KEY: {'SET ✓' if os.getenv('KAGGLE_KEY') else 'MISSING ✗'}") | |
| logger.info("=" * 60) | |
| # ... rest of code | |
| redis_manager = RedisManager() | |
| await redis_manager.connect() | |
| app.state.redis = redis_manager | |
| from src.utils.key_rotator import init_key_rotators | |
| init_key_rotators(redis_manager) | |
| eviction_manager = EvictionManager(redis_manager) | |
| app.state.eviction_manager = eviction_manager | |
| background_tasks = BackgroundTaskManager(redis_manager) | |
| cleanup_task = asyncio.create_task(background_tasks.start_cleanup_daemon()) | |
| app.state.cleanup_task = cleanup_task | |
| # Pre-warm the embedding model so the first real request doesn't pay | |
| # the initialization cost (~500-800ms on HuggingFace Spaces). | |
| # This runs synchronously in the lifespan — model is guaranteed ready | |
| # before any request reaches the /v1/discover handler. | |
| try: | |
| from src.integrations.shared_model import prewarm_model | |
| logger.info("Pre-warming embedding model...") | |
| prewarm_model() | |
| except Exception as e: | |
| logger.warning(f"Model pre-warm failed (non-fatal): {e}") | |
| if settings.WARMUP_ENABLED: | |
| asyncio.create_task(background_tasks.warmup_cache()) | |
| logger.info("API ready") | |
| yield | |
| cleanup_task.cancel() | |
| await redis_manager.close() | |
| logger.info("Shutdown complete") | |
| # ------------------------------------------------------------------ | |
| # APP | |
| # ------------------------------------------------------------------ | |
| app = FastAPI( | |
| title=settings.API_TITLE, | |
| version=settings.API_VERSION, | |
| description="Intelligent content aggregation and ranking for educational materials", | |
| lifespan=lifespan, | |
| docs_url="/api-docs", # moved — /docs serves our human-readable docs page | |
| redoc_url="/api-redoc", | |
| ) | |
| # 🔒 ORDER MATTERS | |
| app.add_middleware(RequestSanitizerMiddleware) | |
| app.add_middleware(RateLimitMiddleware) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=settings.ALLOWED_ORIGINS, | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| if settings.ENABLE_METRICS: | |
| app.add_middleware(MetricsMiddleware) | |
| app.mount("/metrics", make_asgi_app()) | |
| app.include_router(block2_router) | |
| # ------------------------------------------------------------------ | |
| # 🧯 VALIDATION ERROR HANDLER (422) | |
| # ------------------------------------------------------------------ | |
| async def validation_exception_handler(request: Request, exc: RequestValidationError): | |
| logger.warning(f"422 Validation error on {request.url}: {exc.errors()}") | |
| return JSONResponse( | |
| status_code=422, | |
| content={ | |
| "error": "Validation error", | |
| "details": exc.errors(), | |
| "body": exc.body, | |
| }, | |
| ) | |
| # ------------------------------------------------------------------ | |
| # GLOBAL ERROR HANDLER | |
| # ------------------------------------------------------------------ | |
| async def global_exception_handler(request: Request, exc: Exception): | |
| logger.error(f"Unhandled exception: {exc}", exc_info=True) | |
| return JSONResponse( | |
| status_code=500, | |
| content={ | |
| "error": "Internal server error", | |
| "message": str(exc) if settings.DEBUG else "An error occurred", | |
| "path": str(request.url), | |
| }, | |
| ) | |
| # ------------------------------------------------------------------ | |
| # HEALTH | |
| # ------------------------------------------------------------------ | |
| async def health_check(request: Request): | |
| redis_manager = request.app.state.redis | |
| redis_healthy = await redis_manager.ping() | |
| return { | |
| "status": "healthy" if redis_healthy else "degraded", | |
| "version": settings.API_VERSION, | |
| "redis": "connected" if redis_healthy else "disconnected", | |
| } | |
| async def readiness_check(request: Request): | |
| redis_manager = request.app.state.redis | |
| if not await redis_manager.ping(): | |
| return JSONResponse( | |
| status_code=503, | |
| content={"status": "not ready", "reason": "redis unavailable"}, | |
| ) | |
| return {"status": "ready"} | |
| # Project root = two levels up from src/api/main.py | |
| _PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "..")) | |
| _LANDING_DIR = os.path.join(_PROJECT_ROOT, "landing") | |
| async def root(): | |
| """Serve landing page.""" | |
| landing = os.path.join(_LANDING_DIR, "index.html") | |
| if os.path.exists(landing): | |
| return FileResponse(landing, media_type="text/html") | |
| return { | |
| "name": settings.API_TITLE, | |
| "version": settings.API_VERSION, | |
| "status": "operational", | |
| "docs": "/docs", | |
| "signup": "/v1/signup", | |
| } | |
| async def serve_docs(): | |
| """Serve the human-readable API documentation page.""" | |
| docs_path = os.path.join(_LANDING_DIR, "docs.html") | |
| if os.path.exists(docs_path): | |
| return FileResponse(docs_path, media_type="text/html") | |
| return JSONResponse( | |
| status_code=404, | |
| content={"error": "docs.html not found in landing/. Check the file exists."}, | |
| ) |