File size: 822 Bytes
51917ba
 
 
 
 
 
 
1272fdd
 
51917ba
 
 
 
 
 
 
 
1272fdd
51917ba
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
import json
import os
from pathlib import Path
from docling.document_converter import DocumentConverter

class IngestionEngine:
    def __init__(self):
        # Utilise les dossiers configurés dans le Dockerfile
        self.converter = DocumentConverter()

    def process_document(self, file_path: Path, output_dir: Path):
        try:
            result = self.converter.convert(str(file_path))
            doc_dict = result.document.export_to_dict()
            output_file = output_dir / f"{file_path.stem}.json"
            with open(output_file, 'w', encoding='utf-8') as f:
                json.dump(doc_dict, f, ensure_ascii=False, indent=2)
            return {"status": "success", "message": "OCR terminé avec succès"}
        except Exception as e:
            return {"status": "error", "message": str(e)}