Spaces:
Running
Running
File size: 3,931 Bytes
5539271 c8cbf41 5539271 cc59214 5539271 d0db55e 5539271 cc59214 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 | """Docling Studio β unified FastAPI backend.
Single service providing document management (upload, CRUD), analysis
orchestration (async Docling processing), and PDF preview β all backed
by SQLite.
Conversion engine is selected via CONVERSION_ENGINE env var:
- "local" β Docling runs in-process as a Python library (default)
- "remote" β delegates to a Docling Serve instance via HTTP
"""
from __future__ import annotations
import logging
from collections.abc import AsyncIterator
from contextlib import asynccontextmanager
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from api.analyses import router as analyses_router
from api.documents import router as documents_router
from infra.rate_limiter import RateLimiterMiddleware
from infra.settings import settings
from persistence.database import get_connection, init_db
from services.analysis_service import AnalysisService
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s β %(message)s",
)
logger = logging.getLogger(__name__)
def _build_converter():
"""Build the converter adapter based on configuration."""
if settings.conversion_engine == "remote":
from infra.serve_converter import ServeConverter
logger.info("Using remote Docling Serve at %s", settings.docling_serve_url)
return ServeConverter(
base_url=settings.docling_serve_url,
api_key=settings.docling_serve_api_key,
)
else:
from infra.local_converter import LocalConverter
logger.info("Using local Docling converter")
return LocalConverter()
def _build_chunker():
"""Build the chunker adapter β only available in local mode."""
if settings.conversion_engine == "local":
from infra.local_chunker import LocalChunker
return LocalChunker()
return None
def _build_analysis_service() -> AnalysisService:
converter = _build_converter()
chunker = _build_chunker()
return AnalysisService(
converter=converter,
chunker=chunker,
conversion_timeout=settings.conversion_timeout,
max_concurrent=settings.max_concurrent_analyses,
)
# ---------------------------------------------------------------------------
# FastAPI app
# ---------------------------------------------------------------------------
@asynccontextmanager
async def lifespan(app: FastAPI) -> AsyncIterator[None]:
await init_db()
app.state.analysis_service = _build_analysis_service()
logger.info("Docling Studio backend ready (engine=%s)", settings.conversion_engine)
yield
app = FastAPI(
title="Docling Studio",
description="Document analysis studio powered by Docling",
lifespan=lifespan,
)
app.add_middleware(
CORSMiddleware,
allow_origins=settings.cors_origins,
allow_credentials=True,
allow_methods=["GET", "POST", "DELETE", "OPTIONS"],
allow_headers=["Content-Type", "Authorization"],
)
app.add_middleware(RateLimiterMiddleware, requests_per_window=100, window_seconds=60)
app.include_router(documents_router)
app.include_router(analyses_router)
@app.get("/api/health")
async def health() -> dict[str, str | int]:
"""Health check endpoint β verifies database connectivity."""
db_status = "ok"
try:
async with get_connection() as db:
await db.execute("SELECT 1")
except Exception:
db_status = "error"
logger.warning("Health check: database unreachable", exc_info=True)
status = "ok" if db_status == "ok" else "degraded"
result: dict[str, str | int] = {
"status": status,
"version": settings.app_version,
"engine": settings.conversion_engine,
"deploymentMode": settings.deployment_mode,
"database": db_status,
}
if settings.max_page_count > 0:
result["maxPageCount"] = settings.max_page_count
return result
|