Spaces:
Running
Running
| """Single-file FastAPI entrypoint for the 9XAIPal Hugging Face Space. | |
| The Space uses the Docker SDK, so we serve everything from one plain FastAPI | |
| app on port 7860: | |
| * ``/api/v1/*`` -> the real 9XAIPal backend router | |
| * ``/*`` -> the compiled React build (SPA, with client-side routing) | |
| The React build (``dist/index.html``) references its bundles with absolute | |
| root paths (``/assets/index-*.js``), so the SPA MUST be served from the root. | |
| Mounting it under a sub-path (e.g. ``/ui``) makes those asset requests 404 and | |
| the page renders blank. The API router is registered before the static mount, | |
| so ``/api/v1`` and ``/api/docs`` still win over the catch-all. | |
| We deliberately do NOT use Gradio as a wrapper: as of Gradio 6.0 the FastAPI | |
| instance exposed as ``demo.app`` is no longer the app that ``launch()`` serves, | |
| so routers/static mounts added to it silently 404. Running uvicorn on our own | |
| FastAPI app removes that ambiguity. All config is read from environment | |
| variables so the container works cleanly with Hugging Face Space secrets. | |
| """ | |
| import asyncio | |
| import logging | |
| import os | |
| from contextlib import asynccontextmanager | |
| from pathlib import Path | |
| from typing import AsyncIterator | |
| import uvicorn | |
| from fastapi import FastAPI | |
| from fastapi.responses import JSONResponse | |
| from fastapi.staticfiles import StaticFiles | |
| from sqlalchemy import text | |
| from starlette.responses import FileResponse | |
| logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s") | |
| logger = logging.getLogger("9xaipal.space") | |
| # ----------------------------------------------------------------------------- | |
| # Environment variables only — never read a local .env file. | |
| # ----------------------------------------------------------------------------- | |
| PORT = int(os.getenv("PORT", os.getenv("GRADIO_SERVER_PORT", "7860"))) | |
| HOST = os.getenv("HOST", "0.0.0.0") | |
| UI_DIR = Path(os.getenv("UI_DIR", "/code/dist")).resolve() | |
| # Reuse the real backend's startup lifecycle so the database schema is created | |
| # on boot. The in-container Postgres starts empty, and app/main.py applies | |
| # schema.sql + vector setup inside its `lifespan` hook — without attaching it | |
| # here, the tables (documents, chunks, ...) never exist and every query fails | |
| # with `relation "documents" does not exist`. Degrade gracefully if the backend | |
| # package can't be imported (static-UI-only mode). | |
| try: | |
| from app.core.lifecycle import lifespan as _backend_lifespan # type: ignore | |
| except Exception as _exc: # pragma: no cover | |
| logger.warning("Backend lifespan not importable (%s); skipping DB migrations", _exc) | |
| _backend_lifespan = None | |
| # Import backend pieces for health checks and the watchdog. If the backend is | |
| # not importable we degrade to a static-UI-only health response. | |
| try: | |
| from app.core.celery_app import celery_app as _celery_app # type: ignore | |
| from app.core.config import settings as _settings # type: ignore | |
| from app.database.connection import engine as _engine # type: ignore | |
| except Exception as _exc: # pragma: no cover | |
| logger.warning("Backend services not importable (%s); health checks degraded", _exc) | |
| _celery_app = None | |
| _settings = None | |
| _engine = None | |
| async def lifespan(app: FastAPI) -> AsyncIterator[None]: | |
| """Run the backend lifecycle, then start the ingestion watchdog. | |
| The watchdog is Space-specific: it detects stuck ingestion jobs and | |
| force-terminates + recreates them so uploads can't hang forever. | |
| """ | |
| if _backend_lifespan is not None: | |
| async with _backend_lifespan(app): | |
| try: | |
| from app.core.watchdog import start_watchdog # type: ignore | |
| start_watchdog() | |
| except Exception as exc: | |
| logger.warning("Could not start ingestion watchdog: %s", exc) | |
| yield | |
| else: | |
| yield | |
| app = FastAPI( | |
| title="9XAIPal", | |
| docs_url="/api/docs", | |
| openapi_url="/api/openapi.json", | |
| lifespan=lifespan, | |
| ) | |
| # ----------------------------------------------------------------------------- | |
| # Mount the real backend API router so /api/v1/... works. If the backend package | |
| # is not importable, degrade to serving the static React UI only. | |
| # ----------------------------------------------------------------------------- | |
| try: | |
| from app.api.v1.router import api_router # type: ignore | |
| from app.api.errors import register_exception_handlers # type: ignore | |
| from app.core.config import settings # type: ignore | |
| from app.core.paths import ( # type: ignore | |
| images_dir, | |
| extracted_dir, | |
| assets_dir, | |
| research_images_dir, | |
| ) | |
| from app.core.security import ( # type: ignore | |
| RateLimitMiddleware, | |
| SecurityHeadersMiddleware, | |
| ) | |
| from fastapi.middleware.cors import CORSMiddleware | |
| # Replicate the middleware stack from app.main so CORS / security / rate | |
| # limiting behave the same as the standalone backend. Middleware is applied | |
| # inside-out: the last add_middleware call becomes the outermost. | |
| app.add_middleware(SecurityHeadersMiddleware) | |
| app.add_middleware( | |
| RateLimitMiddleware, | |
| limit_per_minute=settings.rate_limit_per_minute, | |
| ) | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=settings.cors_origin_list, | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| app.include_router(api_router, prefix="/api/v1") | |
| register_exception_handlers(app) | |
| # Mount storage-backed static files (figures, raw PDFs, extracted artifacts, | |
| # research-agent images) BEFORE the SPA catch-all so they don't fall back to | |
| # index.html. These mounts mirror app/main.py. | |
| app.mount( | |
| "/static/images", | |
| StaticFiles(directory=str(images_dir()), check_dir=False), | |
| name="images", | |
| ) | |
| app.mount( | |
| "/static/extracted", | |
| StaticFiles(directory=str(extracted_dir()), check_dir=False), | |
| name="extracted", | |
| ) | |
| app.mount( | |
| "/static/assets", | |
| StaticFiles(directory=str(assets_dir()), check_dir=False), | |
| name="assets", | |
| ) | |
| app.mount( | |
| "/static/images/research", | |
| StaticFiles(directory=str(research_images_dir()), check_dir=False), | |
| name="research-images", | |
| ) | |
| logger.info("Backend package found; mounted API at /api/v1") | |
| except Exception as exc: # pragma: no cover | |
| logger.warning("Backend package not importable (%s); serving static UI only", exc) | |
| # ----------------------------------------------------------------------------- | |
| # Allow embedding in the Hugging Face Space "App" tab. HF renders the Space in | |
| # an <iframe> on huggingface.co, but the app is served from *.hf.space — a | |
| # different origin — so the backend's default "X-Frame-Options: SAMEORIGIN" | |
| # makes the browser blank the iframe. We drop that header and replace it with a | |
| # CSP frame-ancestors allow-list scoped to HF. Added last → outermost, so it | |
| # runs after SecurityHeadersMiddleware has set the header and can override it. | |
| # ----------------------------------------------------------------------------- | |
| async def allow_hf_iframe(request, call_next): | |
| response = await call_next(request) | |
| if "x-frame-options" in response.headers: | |
| del response.headers["x-frame-options"] | |
| response.headers["Content-Security-Policy"] = ( | |
| "frame-ancestors 'self' https://huggingface.co https://*.hf.space" | |
| ) | |
| return response | |
| async def _check_postgres() -> str: | |
| if _engine is None: | |
| return "unavailable: backend not importable" | |
| try: | |
| async with _engine.begin() as conn: | |
| await conn.execute(text("SELECT 1")) | |
| return "ok" | |
| except Exception as exc: | |
| return f"unavailable: {exc}" | |
| async def _check_redis() -> str: | |
| if _settings is None: | |
| return "unavailable: backend not importable" | |
| try: | |
| import redis | |
| client = redis.from_url( | |
| _settings.effective_celery_broker_url, socket_connect_timeout=3 | |
| ) | |
| await asyncio.to_thread(client.ping) | |
| return "ok" | |
| except Exception as exc: | |
| return f"unavailable: {exc}" | |
| async def _check_ollama() -> str: | |
| try: | |
| from app.llm.resolver import ollama_reachable # type: ignore | |
| if await ollama_reachable(): | |
| return "ok" | |
| return "unreachable" | |
| except Exception as exc: | |
| return f"unavailable: {exc}" | |
| async def _check_celery_workers() -> str: | |
| if _celery_app is None: | |
| return "unavailable: backend not importable" | |
| try: | |
| inspect = _celery_app.control.inspect(timeout=5.0) | |
| pongs = await asyncio.to_thread(lambda: inspect.ping()) | |
| if not pongs: | |
| return "no workers responded" | |
| worker_count = len(pongs) | |
| return f"ok ({worker_count} worker{'s' if worker_count != 1 else ''})" | |
| except Exception as exc: | |
| return f"unavailable: {exc}" | |
| async def healthz() -> dict: | |
| """Container health probe used by Docker HEALTHCHECK and HF Space. | |
| Reports the state of Postgres, Redis, Ollama, and Celery workers. If any | |
| required dependency is down the response is 503 so the container can be | |
| restarted / healed. | |
| """ | |
| checks = { | |
| "database": await _check_postgres(), | |
| "redis": await _check_redis(), | |
| "ollama": await _check_ollama(), | |
| "celery": await _check_celery_workers(), | |
| } | |
| required = ("database", "redis", "ollama") | |
| overall = ( | |
| "ok" | |
| if all(checks[name] == "ok" for name in required) | |
| else "degraded" | |
| ) | |
| # Celery is also required for uploads to actually process. | |
| if not checks["celery"].startswith("ok"): | |
| overall = "degraded" | |
| response = {"status": overall, **checks} | |
| if overall != "ok": | |
| return JSONResponse(status_code=503, content=response) | |
| return response | |
| # ----------------------------------------------------------------------------- | |
| # Serve the compiled React build at the ROOT with SPA fallback so client-side | |
| # routes (deep links) resolve to index.html instead of 404. This MUST be the | |
| # last route added: as a "/" mount it matches every path, so the API router and | |
| # /healthz above (registered first) take precedence. | |
| # ----------------------------------------------------------------------------- | |
| _index = UI_DIR / "index.html" | |
| if UI_DIR.is_dir() and _index.exists(): | |
| class SPAStaticFiles(StaticFiles): | |
| async def get_response(self, path, scope): | |
| response = await super().get_response(path, scope) | |
| if response.status_code == 404: | |
| # Unknown client-side route: hand it back to the SPA router. | |
| return FileResponse(_index) | |
| return response | |
| app.mount( | |
| "/", | |
| SPAStaticFiles(directory=str(UI_DIR), html=True, check_dir=False), | |
| name="ui", | |
| ) | |
| logger.info("React UI mounted at / (source: %s)", UI_DIR) | |
| else: | |
| logger.warning( | |
| "No compiled React build found at %s; the UI will be unavailable. " | |
| "Run `npm run build` in the frontend and copy `dist/` here.", | |
| UI_DIR, | |
| ) | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host=HOST, port=PORT, log_level="info") | |