Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import os | |
| from pathlib import Path | |
| from core.docling_engine import IngestionEngine | |
| from core.extractor import ExtractorEngine | |
| # Configuration de la page | |
| st.set_page_config(page_title="Prõspectus Veritas", layout="wide") | |
| # Initialisation des moteurs dans la session | |
| if 'ingestor' not in st.session_state: | |
| st.session_state.ingestor = IngestionEngine() | |
| if 'extractor' not in st.session_state: | |
| st.session_state.extractor = ExtractorEngine() | |
| st.title("🕵️ Prõspectus Veritas - Intelligence Documentaire") | |
| # Création des dossiers nécessaires | |
| UPLOAD_DIR = Path("uploads") | |
| JSON_DIR = Path("json_output") | |
| UPLOAD_DIR.mkdir(exist_ok=True) | |
| JSON_DIR.mkdir(exist_ok=True) | |
| col1, col2 = st.columns([1, 2]) | |
| with col1: | |
| st.header("1. Ingestion") | |
| uploaded_files = st.file_uploader("Charger des documents (PDF, Image)", accept_multiple_files=True) | |
| if st.button("Lancer l'analyse OCR"): | |
| for uploaded_file in uploaded_files: | |
| file_path = UPLOAD_DIR / uploaded_file.name | |
| with open(file_path, "wb") as f: | |
| f.write(uploaded_file.getbuffer()) | |
| with st.spinner(f"Lecture de {uploaded_file.name}..."): | |
| result = st.session_state.ingestor.process_document(file_path, JSON_DIR) | |
| if result["status"] == "success": | |
| st.success(result["message"]) | |
| else: | |
| st.error(f"Erreur sur {uploaded_file.name}: {result['message']}") | |
| with col2: | |
| st.header("2. Analyse & Graphe") | |
| json_files = list(JSON_DIR.glob("*.json")) | |
| if json_files: | |
| selected_json = st.selectbox("Sélectionner un document analysé", json_files, format_func=lambda x: x.name) | |
| with open(selected_json, 'r', encoding='utf-8') as f: | |
| import json | |
| data = json.load(f) | |
| # Extraction du texte pour l'IA | |
| text_to_analyze = "" | |
| if "texts" in data: | |
| text_to_analyze = " ".join([t.get("text", "") for t in data["texts"]]) | |
| st.text_area("Texte extrait par l'OCR", text_to_analyze[:2000], height=200) | |
| if st.button("🧠 Extraire le Graphe de Connaissance"): | |
| with st.spinner("Appel du cerveau Qwen (Timeout 3min)..."): | |
| graph_raw = st.session_state.extractor.extract_graph(text_to_analyze) | |
| if graph_raw: | |
| st.session_state.last_graph = graph_raw | |
| st.json(graph_raw) | |
| else: | |
| st.error("Le cerveau n'a pas répondu à temps ou le format est invalide.") |