Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -96,7 +96,7 @@ col4.metric("Confiance IA", "98.4%")
|
|
| 96 |
st.markdown("---")
|
| 97 |
|
| 98 |
# --- NAVIGATION PAR ONGLETS (STYLE PALANTIR) ---
|
| 99 |
-
tab_ingestion, tab_intelligence = st.tabs(["INGESTION & OCR", "ENTITES & RELATIONS"])
|
| 100 |
|
| 101 |
# --- TAB 1: INGESTION ---
|
| 102 |
with tab_ingestion:
|
|
@@ -105,7 +105,7 @@ with tab_ingestion:
|
|
| 105 |
with col_u1:
|
| 106 |
st.subheader("◯⎯| CHARGEMENT DOCUMENTS")
|
| 107 |
uploaded_files = st.file_uploader("Fichiers PDF/IMG", accept_multiple_files=True)
|
| 108 |
-
if uploaded_files and st.button("INITIER LA SÉQUENCE
|
| 109 |
for uploaded_file in uploaded_files:
|
| 110 |
file_path = INPUT_DIR / uploaded_file.name
|
| 111 |
with open(file_path, "wb") as f:
|
|
@@ -118,16 +118,15 @@ with tab_ingestion:
|
|
| 118 |
with col_u2:
|
| 119 |
st.subheader("◯⎯| TEXTE LIBRE")
|
| 120 |
free_text = st.text_area("Coller du texte ici", height=150)
|
| 121 |
-
if st.button("
|
| 122 |
temp_path = INPUT_DIR / f"text_{int(time.time())}.md"
|
| 123 |
with open(temp_path, "w", encoding="utf-8") as f: f.write(free_text)
|
| 124 |
st.session_state.engine.process_document(temp_path, OUTPUT_DIR)
|
| 125 |
st.rerun()
|
| 126 |
|
| 127 |
|
| 128 |
-
# --- TAB 2:
|
| 129 |
-
with
|
| 130 |
-
# 1. Vérification des fichiers disponibles
|
| 131 |
json_files = list(OUTPUT_DIR.glob("*.json"))
|
| 132 |
if not json_files:
|
| 133 |
st.info("Aucun document analysé disponible. Allez dans l'onglet INGESTION.")
|
|
@@ -135,101 +134,103 @@ with tab_intelligence:
|
|
| 135 |
# Barre d'outils (Sélection et Suppression)
|
| 136 |
col_select, col_delete = st.columns([3, 1])
|
| 137 |
with col_select:
|
| 138 |
-
selected_file = st.selectbox("Sélectionner un artefact", json_files, format_func=lambda x: x.name)
|
| 139 |
with col_delete:
|
| 140 |
st.write("")
|
| 141 |
-
if st.button("SUPPRIMER", use_container_width=True):
|
| 142 |
os.remove(selected_file)
|
| 143 |
st.rerun()
|
| 144 |
|
| 145 |
st.markdown("---")
|
| 146 |
|
| 147 |
-
#
|
| 148 |
with open(selected_file, 'r', encoding='utf-8') as f:
|
| 149 |
data = json.load(f)
|
| 150 |
text_extracted = " ".join([t.get("text", "") for t in data.get("texts", [])])
|
| 151 |
|
| 152 |
-
col_inf1, col_inf2 = st.columns([1,
|
| 153 |
|
| 154 |
with col_inf1:
|
| 155 |
st.markdown("### TEXTE SOURCE")
|
| 156 |
st.text_area("Données issues de l'OCR", text_extracted, height=500)
|
| 157 |
|
| 158 |
with col_inf2:
|
| 159 |
-
st.markdown("###
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
with st.spinner("Analyse fragmentée et colorisation dynamique en cours..."):
|
| 164 |
-
# ia_temp est récupéré depuis le slider de votre sidebar
|
| 165 |
graph_data = st.session_state.extractor.extract_long_text(text_extracted, temperature=ia_temp)
|
| 166 |
|
| 167 |
if graph_data:
|
| 168 |
st.session_state.last_graph = graph_data
|
| 169 |
-
st.success(f"
|
| 170 |
else:
|
| 171 |
st.error("L'IA n'a pas pu structurer les données.")
|
| 172 |
|
| 173 |
-
|
| 174 |
-
|
| 175 |
-
|
| 176 |
-
|
| 177 |
-
|
| 178 |
-
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
|
| 182 |
-
|
| 183 |
-
|
| 184 |
-
|
| 185 |
-
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
| 195 |
-
|
| 196 |
-
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
|
| 200 |
-
|
| 201 |
-
|
| 202 |
-
|
| 203 |
-
|
| 204 |
-
|
| 205 |
-
|
| 206 |
-
|
| 207 |
-
|
| 208 |
-
|
| 209 |
-
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
|
| 230 |
-
|
| 231 |
-
|
| 232 |
-
|
| 233 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 234 |
|
| 235 |
|
|
|
|
| 96 |
st.markdown("---")
|
| 97 |
|
| 98 |
# --- NAVIGATION PAR ONGLETS (STYLE PALANTIR) ---
|
| 99 |
+
tab_ingestion, tab_intelligence = st.tabs(["INGESTION & OCR", "ENTITES & RELATIONS","VISUALISATION GRAPHE"])
|
| 100 |
|
| 101 |
# --- TAB 1: INGESTION ---
|
| 102 |
with tab_ingestion:
|
|
|
|
| 105 |
with col_u1:
|
| 106 |
st.subheader("◯⎯| CHARGEMENT DOCUMENTS")
|
| 107 |
uploaded_files = st.file_uploader("Fichiers PDF/IMG", accept_multiple_files=True)
|
| 108 |
+
if uploaded_files and st.button("INITIER LA SÉQUENCE"):
|
| 109 |
for uploaded_file in uploaded_files:
|
| 110 |
file_path = INPUT_DIR / uploaded_file.name
|
| 111 |
with open(file_path, "wb") as f:
|
|
|
|
| 118 |
with col_u2:
|
| 119 |
st.subheader("◯⎯| TEXTE LIBRE")
|
| 120 |
free_text = st.text_area("Coller du texte ici", height=150)
|
| 121 |
+
if st.button("INITIER LA SÉQUENCE"):
|
| 122 |
temp_path = INPUT_DIR / f"text_{int(time.time())}.md"
|
| 123 |
with open(temp_path, "w", encoding="utf-8") as f: f.write(free_text)
|
| 124 |
st.session_state.engine.process_document(temp_path, OUTPUT_DIR)
|
| 125 |
st.rerun()
|
| 126 |
|
| 127 |
|
| 128 |
+
# --- TAB 2: ENTITÉS & RELATIONS ---
|
| 129 |
+
with tab_entities:
|
|
|
|
| 130 |
json_files = list(OUTPUT_DIR.glob("*.json"))
|
| 131 |
if not json_files:
|
| 132 |
st.info("Aucun document analysé disponible. Allez dans l'onglet INGESTION.")
|
|
|
|
| 134 |
# Barre d'outils (Sélection et Suppression)
|
| 135 |
col_select, col_delete = st.columns([3, 1])
|
| 136 |
with col_select:
|
| 137 |
+
selected_file = st.selectbox("Sélectionner un artefact", json_files, format_func=lambda x: x.name, key="select_entity")
|
| 138 |
with col_delete:
|
| 139 |
st.write("")
|
| 140 |
+
if st.button("SUPPRIMER", key="del_entity", use_container_width=True):
|
| 141 |
os.remove(selected_file)
|
| 142 |
st.rerun()
|
| 143 |
|
| 144 |
st.markdown("---")
|
| 145 |
|
| 146 |
+
# Chargement du texte extrait
|
| 147 |
with open(selected_file, 'r', encoding='utf-8') as f:
|
| 148 |
data = json.load(f)
|
| 149 |
text_extracted = " ".join([t.get("text", "") for t in data.get("texts", [])])
|
| 150 |
|
| 151 |
+
col_inf1, col_inf2 = st.columns([1, 1])
|
| 152 |
|
| 153 |
with col_inf1:
|
| 154 |
st.markdown("### TEXTE SOURCE")
|
| 155 |
st.text_area("Données issues de l'OCR", text_extracted, height=500)
|
| 156 |
|
| 157 |
with col_inf2:
|
| 158 |
+
st.markdown("### EXTRACTION DES DONNÉES")
|
| 159 |
+
if st.button("GÉNÉRER LES ENTITÉS & RELATIONS", use_container_width=True):
|
| 160 |
+
with st.spinner("Analyse fragmentée par le cerveau Qwen..."):
|
| 161 |
+
# ia_temp vient du slider de votre sidebar
|
|
|
|
|
|
|
| 162 |
graph_data = st.session_state.extractor.extract_long_text(text_extracted, temperature=ia_temp)
|
| 163 |
|
| 164 |
if graph_data:
|
| 165 |
st.session_state.last_graph = graph_data
|
| 166 |
+
st.success(f"Extraction réussie : {len(graph_data.get('entities', []))} entités trouvées.")
|
| 167 |
else:
|
| 168 |
st.error("L'IA n'a pas pu structurer les données.")
|
| 169 |
|
| 170 |
+
if 'last_graph' in st.session_state:
|
| 171 |
+
st.markdown("#### FORMAT JSON (BRUT)")
|
| 172 |
+
# Affiche uniquement le JSON comme demandé
|
| 173 |
+
st.json(st.session_state.last_graph)
|
| 174 |
+
|
| 175 |
+
|
| 176 |
+
# --- TAB 3: VISUALISATION GRAPHE ---
|
| 177 |
+
with tab_visualisation:
|
| 178 |
+
st.subheader("◯⎯| INTERFACE CINÉTIQUE VISUELLE")
|
| 179 |
+
|
| 180 |
+
if 'last_graph' in st.session_state and st.session_state.last_graph:
|
| 181 |
+
try:
|
| 182 |
+
# Initialisation du graphe PyVis
|
| 183 |
+
net = Network(height="700px", width="100%", bgcolor="#0b0d11", font_color="#e6edf3", directed=True)
|
| 184 |
+
|
| 185 |
+
import hashlib
|
| 186 |
+
def auto_color(text):
|
| 187 |
+
hash_hex = hashlib.md5(text.lower().encode()).hexdigest()
|
| 188 |
+
return f"#{hash_hex[:6]}"
|
| 189 |
+
|
| 190 |
+
found_types = {}
|
| 191 |
+
|
| 192 |
+
# Ajout des Noeuds
|
| 193 |
+
for ent in st.session_state.last_graph.get("entities", []):
|
| 194 |
+
e_type = ent.get("type", "Unknown")
|
| 195 |
+
e_color = auto_color(e_type)
|
| 196 |
+
found_types[e_type] = e_color
|
| 197 |
+
|
| 198 |
+
net.add_node(
|
| 199 |
+
ent["id"],
|
| 200 |
+
label=ent["name"],
|
| 201 |
+
title=f"TYPE: {e_type}\n{ent.get('description')}",
|
| 202 |
+
color=e_color,
|
| 203 |
+
shape="dot",
|
| 204 |
+
size=25
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
# Ajout des Relations
|
| 208 |
+
for rel in st.session_state.last_graph.get("relationships", []):
|
| 209 |
+
net.add_edge(
|
| 210 |
+
rel["from"],
|
| 211 |
+
rel["to"],
|
| 212 |
+
label=rel.get("type", "LINK"),
|
| 213 |
+
color="#30363d",
|
| 214 |
+
arrows="to"
|
| 215 |
+
)
|
| 216 |
+
|
| 217 |
+
net.set_options('{"physics": {"forceAtlas2Based": {"gravitationalConstant": -100, "centralGravity": 0.01}, "solver": "forceAtlas2Based"}}')
|
| 218 |
+
|
| 219 |
+
# Affichage de la légende
|
| 220 |
+
st.write("**Légende détectée :**")
|
| 221 |
+
leg_cols = st.columns(len(found_types) if len(found_types) > 0 else 1)
|
| 222 |
+
for idx, (t_name, t_color) in enumerate(found_types.items()):
|
| 223 |
+
leg_cols[idx % len(leg_cols)].markdown(f"<span style='color:{t_color}'>●</span> {t_name}", unsafe_allow_html=True)
|
| 224 |
+
|
| 225 |
+
# Rendu du graphe
|
| 226 |
+
path = "temp_graph_viz.html"
|
| 227 |
+
net.save_graph(path)
|
| 228 |
+
with open(path, 'r', encoding='utf-8') as f:
|
| 229 |
+
st.components.v1.html(f.read(), height=750)
|
| 230 |
+
|
| 231 |
+
except Exception as e:
|
| 232 |
+
st.error(f"Erreur de rendu visuel : {e}")
|
| 233 |
+
else:
|
| 234 |
+
st.warning("⚠️ Aucune donnée disponible. Veuillez d'abord générer l'intelligence dans l'onglet 'ENTITÉS & RELATIONS'.")
|
| 235 |
|
| 236 |
|