Spaces:
Sleeping
Sleeping
File size: 822 Bytes
51917ba 1272fdd 51917ba 1272fdd 51917ba |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
import json
import os
from pathlib import Path
from docling.document_converter import DocumentConverter
class IngestionEngine:
def __init__(self):
# Utilise les dossiers configurés dans le Dockerfile
self.converter = DocumentConverter()
def process_document(self, file_path: Path, output_dir: Path):
try:
result = self.converter.convert(str(file_path))
doc_dict = result.document.export_to_dict()
output_file = output_dir / f"{file_path.stem}.json"
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(doc_dict, f, ensure_ascii=False, indent=2)
return {"status": "success", "message": "OCR terminé avec succès"}
except Exception as e:
return {"status": "error", "message": str(e)} |