OCR_PROSPECTUS / core /docling_engine.py
klydekushy's picture
Update core/docling_engine.py
9d8ba5a verified
raw
history blame contribute delete
824 Bytes
import json
import os
from pathlib import Path
from docling.document_converter import DocumentConverter
class IngestionEngine:
def __init__(self):
# Utilise les dossiers configurés dans le Dockerfile (ENV)
self.converter = DocumentConverter()
def process_document(self, file_path: Path, output_dir: Path):
try:
result = self.converter.convert(str(file_path))
doc_dict = result.document.export_to_dict()
output_file = output_dir / f"{file_path.stem}.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(doc_dict, f, ensure_ascii=False, indent=2)
return {"status": "success", "message": "Lecture OCR terminée"}
except Exception as e:
return {"status": "error", "message": str(e)}