Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -4,6 +4,8 @@ import os
|
|
| 4 |
import json
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
|
|
|
|
|
|
| 7 |
from core.docling_engine import IngestionEngine
|
| 8 |
from core.extractor import ExtractorEngine
|
| 9 |
|
|
@@ -69,7 +71,12 @@ if 'extractor' not in st.session_state:
|
|
| 69 |
with st.sidebar:
|
| 70 |
st.title("PrõspectùsV-ritas")
|
| 71 |
st.markdown("---")
|
| 72 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 73 |
for f in list(INPUT_DIR.glob("*")) + list(OUTPUT_DIR.glob("*")):
|
| 74 |
os.remove(f)
|
| 75 |
st.success("Système nettoyé.")
|
|
@@ -89,7 +96,7 @@ col4.metric("Confiance IA", "98.4%")
|
|
| 89 |
st.markdown("---")
|
| 90 |
|
| 91 |
# --- NAVIGATION PAR ONGLETS (STYLE PALANTIR) ---
|
| 92 |
-
tab_ingestion, tab_intelligence = st.tabs(["INGESTION & OCR", "
|
| 93 |
|
| 94 |
# --- TAB 1: INGESTION ---
|
| 95 |
with tab_ingestion:
|
|
@@ -117,110 +124,112 @@ with tab_ingestion:
|
|
| 117 |
st.session_state.engine.process_document(temp_path, OUTPUT_DIR)
|
| 118 |
st.rerun()
|
| 119 |
|
|
|
|
| 120 |
# --- TAB 2: INTELLIGENCE ---
|
| 121 |
with tab_intelligence:
|
|
|
|
| 122 |
json_files = list(OUTPUT_DIR.glob("*.json"))
|
| 123 |
if not json_files:
|
| 124 |
st.info("Aucun document analysé disponible. Allez dans l'onglet INGESTION.")
|
| 125 |
else:
|
| 126 |
-
# Barre d'outils
|
| 127 |
col_select, col_delete = st.columns([3, 1])
|
| 128 |
with col_select:
|
| 129 |
selected_file = st.selectbox("Sélectionner un artefact", json_files, format_func=lambda x: x.name)
|
| 130 |
with col_delete:
|
| 131 |
-
st.write("")
|
| 132 |
if st.button("🗑️ SUPPRIMER", use_container_width=True):
|
| 133 |
os.remove(selected_file)
|
| 134 |
st.rerun()
|
| 135 |
|
| 136 |
st.markdown("---")
|
| 137 |
|
| 138 |
-
#
|
| 139 |
with open(selected_file, 'r', encoding='utf-8') as f:
|
| 140 |
data = json.load(f)
|
| 141 |
-
# Récupération du texte structuré (Docling export_to_dict format)
|
| 142 |
text_extracted = " ".join([t.get("text", "") for t in data.get("texts", [])])
|
| 143 |
|
| 144 |
-
col_inf1, col_inf2 = st.columns([1, 2])
|
| 145 |
|
| 146 |
with col_inf1:
|
| 147 |
st.markdown("### 📄 TEXTE SOURCE")
|
| 148 |
st.text_area("Données issues de l'OCR", text_extracted, height=500)
|
| 149 |
|
| 150 |
with col_inf2:
|
| 151 |
-
st.markdown("### 🕸️ GRAPHE DE CONNAISSANCE")
|
| 152 |
|
|
|
|
| 153 |
if st.button("🧬 GÉNÉRER L'INTELLIGENCE VISUELLE", use_container_width=True):
|
| 154 |
-
with st.spinner("
|
| 155 |
-
|
|
|
|
| 156 |
|
| 157 |
if graph_data:
|
| 158 |
st.session_state.last_graph = graph_data
|
| 159 |
-
st.success(f"Analyse terminée : {len(graph_data.get('entities', []))} entités
|
| 160 |
else:
|
| 161 |
-
st.error("L'IA n'a pas pu structurer les données.
|
| 162 |
|
| 163 |
-
#
|
| 164 |
if 'last_graph' in st.session_state and st.session_state.last_graph:
|
| 165 |
-
# Configuration PyVis
|
| 166 |
-
net = Network(
|
| 167 |
-
height="500px",
|
| 168 |
-
width="100%",
|
| 169 |
-
bgcolor="#0b0d11",
|
| 170 |
-
font_color="#e6edf3",
|
| 171 |
-
directed=True
|
| 172 |
-
)
|
| 173 |
-
|
| 174 |
-
# Couleurs par type d'entité (Palette Gotham/Cyber)
|
| 175 |
-
colors = {
|
| 176 |
-
"Person": "#29b5e8", # Bleu brillant
|
| 177 |
-
"Organization": "#ffcc00", # Jaune
|
| 178 |
-
"Location": "#00ffcc", # Turquoise
|
| 179 |
-
"Concept": "#9966ff", # Violet
|
| 180 |
-
"Event": "#ff6666" # Rouge doux
|
| 181 |
-
}
|
| 182 |
-
|
| 183 |
-
# Ajout des Noeuds
|
| 184 |
-
for ent in st.session_state.last_graph.get("entities", []):
|
| 185 |
-
color = colors.get(ent.get("type"), "#808080")
|
| 186 |
-
net.add_node(
|
| 187 |
-
ent["id"],
|
| 188 |
-
label=ent["name"],
|
| 189 |
-
title=f"{ent['type']}: {ent['description']}",
|
| 190 |
-
color=color,
|
| 191 |
-
shape="dot",
|
| 192 |
-
size=20
|
| 193 |
-
)
|
| 194 |
-
|
| 195 |
-
# Ajout des Relations
|
| 196 |
-
for rel in st.session_state.last_graph.get("relationships", []):
|
| 197 |
-
net.add_edge(
|
| 198 |
-
rel["from"],
|
| 199 |
-
rel["to"],
|
| 200 |
-
label=rel["type"],
|
| 201 |
-
color="#30363d",
|
| 202 |
-
arrows="to"
|
| 203 |
-
)
|
| 204 |
-
|
| 205 |
-
# Options physiques pour un mouvement fluide
|
| 206 |
-
net.set_options("""
|
| 207 |
-
var options = {
|
| 208 |
-
"physics": {
|
| 209 |
-
"forceAtlas2Based": { "gravitationalConstant": -50, "centralGravity": 0.01, "springLength": 100 },
|
| 210 |
-
"maxVelocity": 50,
|
| 211 |
-
"solver": "forceAtlas2Based",
|
| 212 |
-
"timestep": 0.35
|
| 213 |
-
}
|
| 214 |
-
}
|
| 215 |
-
""")
|
| 216 |
-
|
| 217 |
-
# Rendu du graphe dans Streamlit
|
| 218 |
try:
|
| 219 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
net.save_graph(path)
|
| 221 |
with open(path, 'r', encoding='utf-8') as f:
|
| 222 |
html_content = f.read()
|
| 223 |
-
|
|
|
|
|
|
|
|
|
|
| 224 |
except Exception as e:
|
| 225 |
st.error(f"Erreur de rendu visuel : {e}")
|
| 226 |
-
|
|
|
|
|
|
| 4 |
import json
|
| 5 |
import time
|
| 6 |
from pathlib import Path
|
| 7 |
+
from pyvis.network import Network
|
| 8 |
+
import streamlit.components.v1 as components
|
| 9 |
from core.docling_engine import IngestionEngine
|
| 10 |
from core.extractor import ExtractorEngine
|
| 11 |
|
|
|
|
| 71 |
with st.sidebar:
|
| 72 |
st.title("PrõspectùsV-ritas")
|
| 73 |
st.markdown("---")
|
| 74 |
+
|
| 75 |
+
st.caption("PARAMÈTRES IA")
|
| 76 |
+
# AJOUT DU CURSEUR DE TEMPÉRATURE
|
| 77 |
+
ia_temp = st.slider("Température Créative", 0.1, 1.0, 0.2, help="0.1 = Précis, 0.8 = Créatif")
|
| 78 |
+
|
| 79 |
+
if st.button("PURGER LE SYSTÈME"):
|
| 80 |
for f in list(INPUT_DIR.glob("*")) + list(OUTPUT_DIR.glob("*")):
|
| 81 |
os.remove(f)
|
| 82 |
st.success("Système nettoyé.")
|
|
|
|
| 96 |
st.markdown("---")
|
| 97 |
|
| 98 |
# --- NAVIGATION PAR ONGLETS (STYLE PALANTIR) ---
|
| 99 |
+
tab_ingestion, tab_intelligence = st.tabs(["INGESTION & OCR", "ENTITES & RELATIONS"])
|
| 100 |
|
| 101 |
# --- TAB 1: INGESTION ---
|
| 102 |
with tab_ingestion:
|
|
|
|
| 124 |
st.session_state.engine.process_document(temp_path, OUTPUT_DIR)
|
| 125 |
st.rerun()
|
| 126 |
|
| 127 |
+
|
| 128 |
# --- TAB 2: INTELLIGENCE ---
|
| 129 |
with tab_intelligence:
|
| 130 |
+
# 1. Vérification des fichiers disponibles
|
| 131 |
json_files = list(OUTPUT_DIR.glob("*.json"))
|
| 132 |
if not json_files:
|
| 133 |
st.info("Aucun document analysé disponible. Allez dans l'onglet INGESTION.")
|
| 134 |
else:
|
| 135 |
+
# Barre d'outils (Sélection et Suppression)
|
| 136 |
col_select, col_delete = st.columns([3, 1])
|
| 137 |
with col_select:
|
| 138 |
selected_file = st.selectbox("Sélectionner un artefact", json_files, format_func=lambda x: x.name)
|
| 139 |
with col_delete:
|
| 140 |
+
st.write("")
|
| 141 |
if st.button("🗑️ SUPPRIMER", use_container_width=True):
|
| 142 |
os.remove(selected_file)
|
| 143 |
st.rerun()
|
| 144 |
|
| 145 |
st.markdown("---")
|
| 146 |
|
| 147 |
+
# 2. Chargement du texte extrait
|
| 148 |
with open(selected_file, 'r', encoding='utf-8') as f:
|
| 149 |
data = json.load(f)
|
|
|
|
| 150 |
text_extracted = " ".join([t.get("text", "") for t in data.get("texts", [])])
|
| 151 |
|
| 152 |
+
col_inf1, col_inf2 = st.columns([1, 2])
|
| 153 |
|
| 154 |
with col_inf1:
|
| 155 |
st.markdown("### 📄 TEXTE SOURCE")
|
| 156 |
st.text_area("Données issues de l'OCR", text_extracted, height=500)
|
| 157 |
|
| 158 |
with col_inf2:
|
| 159 |
+
st.markdown("### 🕸️ GRAPHE DE CONNAISSANCE AUTOMATIQUE")
|
| 160 |
|
| 161 |
+
# Bouton de génération avec découpage (Chunking)
|
| 162 |
if st.button("🧬 GÉNÉRER L'INTELLIGENCE VISUELLE", use_container_width=True):
|
| 163 |
+
with st.spinner("Analyse fragmentée et colorisation dynamique en cours..."):
|
| 164 |
+
# ia_temp est récupéré depuis le slider de votre sidebar
|
| 165 |
+
graph_data = st.session_state.extractor.extract_long_text(text_extracted, temperature=ia_temp)
|
| 166 |
|
| 167 |
if graph_data:
|
| 168 |
st.session_state.last_graph = graph_data
|
| 169 |
+
st.success(f"Analyse terminée : {len(graph_data.get('entities', []))} entités trouvées.")
|
| 170 |
else:
|
| 171 |
+
st.error("L'IA n'a pas pu structurer les données.")
|
| 172 |
|
| 173 |
+
# 3. Visualisation Interactive avec PyVis
|
| 174 |
if 'last_graph' in st.session_state and st.session_state.last_graph:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 175 |
try:
|
| 176 |
+
# Initialisation du graphe (Correction NameError effectuée par l'import en haut d'app.py)
|
| 177 |
+
net = Network(height="600px", width="100%", bgcolor="#0b0d11", font_color="#e6edf3", directed=True)
|
| 178 |
+
|
| 179 |
+
# --- LOGIQUE DE COULEURS ALÉATOIRES ET AUTOMATIQUES ---
|
| 180 |
+
import hashlib
|
| 181 |
+
def auto_color(text):
|
| 182 |
+
# Génère une couleur Hex unique basée sur le texte (le type d'entité)
|
| 183 |
+
hash_hex = hashlib.md5(text.lower().encode()).hexdigest()
|
| 184 |
+
return f"#{hash_hex[:6]}" # Prend les 6 premiers caractères du hash pour le code couleur
|
| 185 |
+
|
| 186 |
+
# Collecte des types pour la légende
|
| 187 |
+
found_types = {}
|
| 188 |
+
|
| 189 |
+
# Ajout des Noeuds
|
| 190 |
+
for ent in st.session_state.last_graph.get("entities", []):
|
| 191 |
+
e_type = ent.get("type", "Unknown")
|
| 192 |
+
e_color = auto_color(e_type)
|
| 193 |
+
found_types[e_type] = e_color # Stocke pour la légende
|
| 194 |
+
|
| 195 |
+
net.add_node(
|
| 196 |
+
ent["id"],
|
| 197 |
+
label=ent["name"],
|
| 198 |
+
title=f"TYPE: {e_type}\n{ent.get('description')}",
|
| 199 |
+
color=e_color,
|
| 200 |
+
shape="dot",
|
| 201 |
+
size=25
|
| 202 |
+
)
|
| 203 |
+
|
| 204 |
+
# Ajout des Relations
|
| 205 |
+
for rel in st.session_state.last_graph.get("relationships", []):
|
| 206 |
+
net.add_edge(
|
| 207 |
+
rel["from"],
|
| 208 |
+
rel["to"],
|
| 209 |
+
label=rel.get("type", "LINK"),
|
| 210 |
+
color="#30363d",
|
| 211 |
+
arrows="to"
|
| 212 |
+
)
|
| 213 |
+
|
| 214 |
+
# Configuration physique pour le mouvement
|
| 215 |
+
net.set_options('{"physics": {"forceAtlas2Based": {"gravitationalConstant": -50, "centralGravity": 0.01}, "solver": "forceAtlas2Based"}}')
|
| 216 |
+
|
| 217 |
+
# 4. Affichage de la légende dynamique
|
| 218 |
+
st.write("**Légende détectée :**")
|
| 219 |
+
leg_cols = st.columns(len(found_types) if len(found_types) > 0 else 1)
|
| 220 |
+
for idx, (t_name, t_color) in enumerate(found_types.items()):
|
| 221 |
+
leg_cols[idx % len(leg_cols)].markdown(f"<span style='color:{t_color}'>●</span> {t_name}", unsafe_allow_html=True)
|
| 222 |
+
|
| 223 |
+
# Rendu HTML final
|
| 224 |
+
path = "temp_graph.html"
|
| 225 |
net.save_graph(path)
|
| 226 |
with open(path, 'r', encoding='utf-8') as f:
|
| 227 |
html_content = f.read()
|
| 228 |
+
|
| 229 |
+
# Utilisation du module components pour l'affichage
|
| 230 |
+
st.components.v1.html(html_content, height=650)
|
| 231 |
+
|
| 232 |
except Exception as e:
|
| 233 |
st.error(f"Erreur de rendu visuel : {e}")
|
| 234 |
+
|
| 235 |
+
|