klydekushy commited on
Commit
1272fdd
·
verified ·
1 Parent(s): 5de3226

Update core/docling_engine.py

Browse files
Files changed (1) hide show
  1. core/docling_engine.py +3 -11
core/docling_engine.py CHANGED
@@ -1,28 +1,20 @@
1
  import json
2
  import os
3
- import streamlit as st
4
  from pathlib import Path
5
  from docling.document_converter import DocumentConverter
6
 
7
  class IngestionEngine:
8
  def __init__(self):
9
- try:
10
- # Sur HF Docker, le cache est configuré via les ENV du Dockerfile
11
- self.converter = DocumentConverter()
12
- self.is_ready = True
13
- except Exception as e:
14
- st.error(f"Erreur initialisation Docling: {e}")
15
- self.is_ready = False
16
 
17
  def process_document(self, file_path: Path, output_dir: Path):
18
- if not self.is_ready: return {"status": "error", "message": "Moteur non prêt"}
19
  try:
20
  result = self.converter.convert(str(file_path))
21
  doc_dict = result.document.export_to_dict()
22
-
23
  output_file = output_dir / f"{file_path.stem}.json"
24
  with open(output_file, 'w', encoding='utf-8') as f:
25
  json.dump(doc_dict, f, ensure_ascii=False, indent=2)
26
- return {"status": "success", "message": f"Analyse réussie : {output_file.name}"}
27
  except Exception as e:
28
  return {"status": "error", "message": str(e)}
 
1
  import json
2
  import os
 
3
  from pathlib import Path
4
  from docling.document_converter import DocumentConverter
5
 
6
  class IngestionEngine:
7
  def __init__(self):
8
+ # Utilise les dossiers configurés dans le Dockerfile
9
+ self.converter = DocumentConverter()
 
 
 
 
 
10
 
11
  def process_document(self, file_path: Path, output_dir: Path):
 
12
  try:
13
  result = self.converter.convert(str(file_path))
14
  doc_dict = result.document.export_to_dict()
 
15
  output_file = output_dir / f"{file_path.stem}.json"
16
  with open(output_file, 'w', encoding='utf-8') as f:
17
  json.dump(doc_dict, f, ensure_ascii=False, indent=2)
18
+ return {"status": "success", "message": "OCR terminé avec succès"}
19
  except Exception as e:
20
  return {"status": "error", "message": str(e)}