Spaces:
Sleeping
Sleeping
| import json | |
| import os | |
| from pathlib import Path | |
| from docling.document_converter import DocumentConverter | |
| class IngestionEngine: | |
| def __init__(self): | |
| # Utilise les dossiers configurés dans le Dockerfile (ENV) | |
| self.converter = DocumentConverter() | |
| def process_document(self, file_path: Path, output_dir: Path): | |
| try: | |
| result = self.converter.convert(str(file_path)) | |
| doc_dict = result.document.export_to_dict() | |
| output_file = output_dir / f"{file_path.stem}.json" | |
| with open(output_file, 'w', encoding='utf-8') as f: | |
| json.dump(doc_dict, f, ensure_ascii=False, indent=2) | |
| return {"status": "success", "message": "Lecture OCR terminée"} | |
| except Exception as e: | |
| return {"status": "error", "message": str(e)} |