from fastapi import FastAPI, UploadFile, File, HTTPException from docling.document_converter import DocumentConverter, PdfFormatOption from docling.datamodel.base_models import InputFormat from docling.datamodel.pipeline_options import PdfPipelineOptions, EasyOcrOptions import shutil import os import tempfile from pydantic import BaseModel import logging # Configure logging logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) app = FastAPI() # Global converter instance to reuse models _converter_instance = None def get_converter(): global _converter_instance if _converter_instance is None: logger.info("Initializing DocumentConverter with TableFormer and EasyOCR...") # Configure pipeline options for enhanced table structure and OCR pipeline_options = PdfPipelineOptions() pipeline_options.do_table_structure = True # Enable TableFormer pipeline_options.do_ocr = True pipeline_options.ocr_options = EasyOcrOptions() # Use EasyOCR for robust recognition on images # Initialize converter with explicit formats and options # Note: PdfPipelineOptions is passed via PdfFormatOption for both PDF and Image pipeline config in newer Docling versions _converter_instance = DocumentConverter( allowed_formats=[InputFormat.IMAGE, InputFormat.PDF], format_options={ InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options), InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options) } ) return _converter_instance class RecognitionResult(BaseModel): markdown: str @app.post("/process", response_model=RecognitionResult) async def process_document(file: UploadFile = File(...)): logger.info(f"Processing file: {file.filename}") # Create a temporary file to save the uploaded image suffix = os.path.splitext(file.filename)[1] if not suffix: suffix = ".tmp" with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp: shutil.copyfileobj(file.file, tmp) tmp_path = tmp.name try: converter = get_converter() logger.info(f"Converting file at {tmp_path}...") result = converter.convert(tmp_path) logger.info("Exporting to markdown...") markdown = result.document.export_to_markdown() return RecognitionResult(markdown=markdown) except Exception as e: logger.error(f"Error processing document: {str(e)}") raise HTTPException(status_code=500, detail=str(e)) finally: if os.path.exists(tmp_path): os.remove(tmp_path) @app.get("/") def health_check(): return {"status": "ok", "service": "docling-ocr"}