import streamlit as st import os from pathlib import Path from core.docling_engine import IngestionEngine from core.extractor import ExtractorEngine # Configuration de la page st.set_page_config(page_title="Prõspectus Veritas", layout="wide") # Initialisation des moteurs dans la session if 'ingestor' not in st.session_state: st.session_state.ingestor = IngestionEngine() if 'extractor' not in st.session_state: st.session_state.extractor = ExtractorEngine() st.title("🕵️ Prõspectus Veritas - Intelligence Documentaire") # Création des dossiers nécessaires UPLOAD_DIR = Path("uploads") JSON_DIR = Path("json_output") UPLOAD_DIR.mkdir(exist_ok=True) JSON_DIR.mkdir(exist_ok=True) col1, col2 = st.columns([1, 2]) with col1: st.header("1. Ingestion") uploaded_files = st.file_uploader("Charger des documents (PDF, Image)", accept_multiple_files=True) if st.button("Lancer l'analyse OCR"): for uploaded_file in uploaded_files: file_path = UPLOAD_DIR / uploaded_file.name with open(file_path, "wb") as f: f.write(uploaded_file.getbuffer()) with st.spinner(f"Lecture de {uploaded_file.name}..."): result = st.session_state.ingestor.process_document(file_path, JSON_DIR) if result["status"] == "success": st.success(result["message"]) else: st.error(f"Erreur sur {uploaded_file.name}: {result['message']}") with col2: st.header("2. Analyse & Graphe") json_files = list(JSON_DIR.glob("*.json")) if json_files: selected_json = st.selectbox("Sélectionner un document analysé", json_files, format_func=lambda x: x.name) with open(selected_json, 'r', encoding='utf-8') as f: import json data = json.load(f) # Extraction du texte pour l'IA text_to_analyze = "" if "texts" in data: text_to_analyze = " ".join([t.get("text", "") for t in data["texts"]]) st.text_area("Texte extrait par l'OCR", text_to_analyze[:2000], height=200) if st.button("🧠 Extraire le Graphe de Connaissance"): with st.spinner("Appel du cerveau Qwen (Timeout 3min)..."): graph_raw = st.session_state.extractor.extract_graph(text_to_analyze) if graph_raw: st.session_state.last_graph = graph_raw st.json(graph_raw) else: st.error("Le cerveau n'a pas répondu à temps ou le format est invalide.")