Spaces:

klydekushy
/

OCR_PROSPECTUS

Sleeping

App Files Files Community

OCR_PROSPECTUS / app.py

klydekushy

Create app.py

5de3226 verified about 1 month ago

raw

history blame

2.68 kB

	import streamlit as st
	import os
	from pathlib import Path
	from core.docling_engine import IngestionEngine
	from core.extractor import ExtractorEngine

	# Configuration de la page
	st.set_page_config(page_title="Prõspectus Veritas", layout="wide")

	# Initialisation des moteurs dans la session
	if 'ingestor' not in st.session_state:
	st.session_state.ingestor = IngestionEngine()
	if 'extractor' not in st.session_state:
	st.session_state.extractor = ExtractorEngine()

	st.title("🕵️ Prõspectus Veritas - Intelligence Documentaire")

	# Création des dossiers nécessaires
	UPLOAD_DIR = Path("uploads")
	JSON_DIR = Path("json_output")
	UPLOAD_DIR.mkdir(exist_ok=True)
	JSON_DIR.mkdir(exist_ok=True)

	col1, col2 = st.columns([1, 2])

	with col1:
	st.header("1. Ingestion")
	uploaded_files = st.file_uploader("Charger des documents (PDF, Image)", accept_multiple_files=True)

	if st.button("Lancer l'analyse OCR"):
	for uploaded_file in uploaded_files:
	file_path = UPLOAD_DIR / uploaded_file.name
	with open(file_path, "wb") as f:
	f.write(uploaded_file.getbuffer())

	with st.spinner(f"Lecture de {uploaded_file.name}..."):
	result = st.session_state.ingestor.process_document(file_path, JSON_DIR)
	if result["status"] == "success":
	st.success(result["message"])
	else:
	st.error(f"Erreur sur {uploaded_file.name}: {result['message']}")

	with col2:
	st.header("2. Analyse & Graphe")
	json_files = list(JSON_DIR.glob("*.json"))

	if json_files:
	selected_json = st.selectbox("Sélectionner un document analysé", json_files, format_func=lambda x: x.name)

	with open(selected_json, 'r', encoding='utf-8') as f:
	import json
	data = json.load(f)
	# Extraction du texte pour l'IA
	text_to_analyze = ""
	if "texts" in data:
	text_to_analyze = " ".join([t.get("text", "") for t in data["texts"]])

	st.text_area("Texte extrait par l'OCR", text_to_analyze[:2000], height=200)

	if st.button("🧠 Extraire le Graphe de Connaissance"):
	with st.spinner("Appel du cerveau Qwen (Timeout 3min)..."):
	graph_raw = st.session_state.extractor.extract_graph(text_to_analyze)
	if graph_raw:
	st.session_state.last_graph = graph_raw
	st.json(graph_raw)
	else:
	st.error("Le cerveau n'a pas répondu à temps ou le format est invalide.")