ocrser / main.py
sanali209's picture
Update main.py
7c37660 verified
from fastapi import FastAPI, UploadFile, File, HTTPException
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions, EasyOcrOptions
import shutil
import os
import tempfile
from pydantic import BaseModel
import logging
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
app = FastAPI()
# Global converter instance to reuse models
_converter_instance = None
def get_converter():
global _converter_instance
if _converter_instance is None:
logger.info("Initializing DocumentConverter with TableFormer and EasyOCR...")
# Configure pipeline options for enhanced table structure and OCR
pipeline_options = PdfPipelineOptions()
pipeline_options.do_table_structure = True # Enable TableFormer
pipeline_options.do_ocr = True
pipeline_options.ocr_options = EasyOcrOptions() # Use EasyOCR for robust recognition on images
# Initialize converter with explicit formats and options
# Note: PdfPipelineOptions is passed via PdfFormatOption for both PDF and Image pipeline config in newer Docling versions
_converter_instance = DocumentConverter(
allowed_formats=[InputFormat.IMAGE, InputFormat.PDF],
format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options)
}
)
return _converter_instance
class RecognitionResult(BaseModel):
markdown: str
@app.post("/process", response_model=RecognitionResult)
async def process_document(file: UploadFile = File(...)):
logger.info(f"Processing file: {file.filename}")
# Create a temporary file to save the uploaded image
suffix = os.path.splitext(file.filename)[1]
if not suffix:
suffix = ".tmp"
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp:
shutil.copyfileobj(file.file, tmp)
tmp_path = tmp.name
try:
converter = get_converter()
logger.info(f"Converting file at {tmp_path}...")
result = converter.convert(tmp_path)
logger.info("Exporting to markdown...")
markdown = result.document.export_to_markdown()
return RecognitionResult(markdown=markdown)
except Exception as e:
logger.error(f"Error processing document: {str(e)}")
raise HTTPException(status_code=500, detail=str(e))
finally:
if os.path.exists(tmp_path):
os.remove(tmp_path)
@app.get("/")
def health_check():
return {"status": "ok", "service": "docling-ocr"}