Update app.py
Browse files
app.py
CHANGED
|
@@ -2,6 +2,7 @@ import streamlit as st
|
|
| 2 |
import os
|
| 3 |
import shutil
|
| 4 |
import csv
|
|
|
|
| 5 |
import pandas as pd
|
| 6 |
from datetime import datetime
|
| 7 |
from neo4j import GraphDatabase
|
|
@@ -9,10 +10,14 @@ from pyvis.network import Network
|
|
| 9 |
import streamlit.components.v1 as components
|
| 10 |
from dotenv import load_dotenv
|
| 11 |
|
| 12 |
-
#
|
| 13 |
-
|
| 14 |
-
from src.
|
| 15 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
|
| 17 |
# --- CONFIGURAZIONE INIZIALE ---
|
| 18 |
load_dotenv()
|
|
@@ -23,7 +28,7 @@ st.set_page_config(
|
|
| 23 |
page_icon="🧠"
|
| 24 |
)
|
| 25 |
|
| 26 |
-
# --- FUNZIONI DI UTILITÀ NEO4J ---
|
| 27 |
def get_driver(uri, user, password):
|
| 28 |
if not uri or not password:
|
| 29 |
return None
|
|
@@ -45,7 +50,6 @@ def reject_relationship(driver, rel_id, subj, pred, obj, reason="Human Rejection
|
|
| 45 |
1. Cancella dal DB (Azione Reale).
|
| 46 |
2. Salva in CSV per Active Learning (Data Lineage del rifiuto).
|
| 47 |
"""
|
| 48 |
-
# 1. Cancellazione Reale
|
| 49 |
query = "MATCH ()-[r]->() WHERE elementId(r) = $id DELETE r"
|
| 50 |
try:
|
| 51 |
run_query(driver, query, {"id": rel_id})
|
|
@@ -53,7 +57,6 @@ def reject_relationship(driver, rel_id, subj, pred, obj, reason="Human Rejection
|
|
| 53 |
st.error(f"Errore durante la cancellazione: {e}")
|
| 54 |
return False
|
| 55 |
|
| 56 |
-
# 2. Logging per Fine-Tuning
|
| 57 |
log_file = "data/processed/rejected_triples.csv"
|
| 58 |
os.makedirs("data/processed", exist_ok=True)
|
| 59 |
file_exists = os.path.isfile(log_file)
|
|
@@ -72,24 +75,27 @@ def reject_relationship(driver, rel_id, subj, pred, obj, reason="Human Rejection
|
|
| 72 |
# --- UI: SIDEBAR & CONFIGURAZIONE ---
|
| 73 |
st.sidebar.title("⚙️ Configurazione")
|
| 74 |
|
| 75 |
-
# Gestione credenziali: Priorità a .env, fallback su input utente
|
| 76 |
env_uri = os.getenv("NEO4J_URI", "")
|
| 77 |
env_user = os.getenv("NEO4J_USER", "neo4j")
|
| 78 |
-
# Non pre-compiliamo la password se non è nell'env per sicurezza
|
| 79 |
env_password = os.getenv("NEO4J_PASSWORD", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 80 |
|
| 81 |
st.sidebar.subheader("Connessione Neo4j AuraDB")
|
| 82 |
uri = st.sidebar.text_input("URI", value=env_uri, placeholder="neo4j+s://...")
|
| 83 |
user = st.sidebar.text_input("User", value=env_user)
|
| 84 |
password = st.sidebar.text_input("Password", value=env_password, type="password")
|
| 85 |
|
| 86 |
-
# Inizializza Driver
|
| 87 |
driver = None
|
| 88 |
if uri and password:
|
| 89 |
driver = get_driver(uri, user, password)
|
| 90 |
if driver:
|
| 91 |
st.sidebar.success("🟢 Connesso al Knowledge Graph")
|
| 92 |
-
# Setta variabili d'ambiente per i moduli backend che le usano
|
| 93 |
os.environ["NEO4J_URI"] = uri
|
| 94 |
os.environ["NEO4J_USER"] = user
|
| 95 |
os.environ["NEO4J_PASSWORD"] = password
|
|
@@ -113,15 +119,16 @@ tab_gen, tab_val, tab_vis = st.tabs([
|
|
| 113 |
with tab_gen:
|
| 114 |
st.header("Pipeline di Ingestione ed Estrazione")
|
| 115 |
|
| 116 |
-
# Selezione Sorgente Dati (Golden Path)
|
| 117 |
col_src, col_act = st.columns([1, 2])
|
| 118 |
|
|
|
|
| 119 |
with col_src:
|
| 120 |
st.subheader("Sorgente Dati")
|
| 121 |
data_source = st.radio("Modalità:", ("📂 Esempi Demo", "⬆️ Upload (Sperimentale)"))
|
| 122 |
|
| 123 |
selected_file_path = None
|
| 124 |
os.makedirs("data/raw", exist_ok=True)
|
|
|
|
| 125 |
os.makedirs("data/examples", exist_ok=True)
|
| 126 |
|
| 127 |
if data_source == "📂 Esempi Demo":
|
|
@@ -129,7 +136,9 @@ with tab_gen:
|
|
| 129 |
if files:
|
| 130 |
choice = st.selectbox("Seleziona scenario:", files)
|
| 131 |
if choice:
|
| 132 |
-
|
|
|
|
|
|
|
| 133 |
selected_file_path = choice
|
| 134 |
else:
|
| 135 |
st.warning("Nessun file in data/examples")
|
|
@@ -140,35 +149,122 @@ with tab_gen:
|
|
| 140 |
f.write(uploaded.getbuffer())
|
| 141 |
selected_file_path = uploaded.name
|
| 142 |
|
|
|
|
| 143 |
with col_act:
|
| 144 |
if selected_file_path:
|
| 145 |
st.info(f"File attivo: **{selected_file_path}**")
|
| 146 |
c1, c2, c3 = st.columns(3)
|
| 147 |
|
| 148 |
-
#
|
| 149 |
with c1:
|
| 150 |
if st.button("A. Semantic Chunking"):
|
| 151 |
-
with st.
|
| 152 |
-
|
| 153 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 154 |
|
| 155 |
-
#
|
| 156 |
with c2:
|
| 157 |
if st.button("B. Info Extraction"):
|
| 158 |
-
|
| 159 |
-
|
| 160 |
-
st.
|
| 161 |
-
|
| 162 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
with c3:
|
| 164 |
if st.button("C. Popola Neo4j", type="primary"):
|
| 165 |
-
|
|
|
|
|
|
|
|
|
|
| 166 |
st.error("Connettiti al DB prima!")
|
| 167 |
else:
|
| 168 |
-
with st.
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
else:
|
| 173 |
st.write("👈 Seleziona un file per iniziare.")
|
| 174 |
|
|
@@ -177,20 +273,16 @@ with tab_gen:
|
|
| 177 |
# ==============================================================================
|
| 178 |
with tab_val:
|
| 179 |
st.header("Curation & Feedback Loop")
|
| 180 |
-
st.markdown(""
|
| 181 |
-
In questa sezione, l'esperto di dominio valida le triple estratte.
|
| 182 |
-
Le relazioni rifiutate vengono rimosse dal grafo e loggate per il **fine-tuning** futuro dei modelli.
|
| 183 |
-
""")
|
| 184 |
|
| 185 |
if driver:
|
| 186 |
-
#
|
| 187 |
-
# Nota: Assicurati che i tuoi nodi abbiano una proprietà 'name' o 'label'
|
| 188 |
cypher_val = """
|
| 189 |
MATCH (s)-[r]->(o)
|
| 190 |
RETURN elementId(r) as id,
|
| 191 |
-
COALESCE(s.
|
| 192 |
type(r) as Predicato,
|
| 193 |
-
COALESCE(o.
|
| 194 |
COALESCE(r.confidence, 0.85) as Confidenza
|
| 195 |
ORDER BY Confidenza ASC LIMIT 50
|
| 196 |
"""
|
|
@@ -198,9 +290,7 @@ with tab_val:
|
|
| 198 |
|
| 199 |
if triples_data:
|
| 200 |
df = pd.DataFrame(triples_data)
|
| 201 |
-
|
| 202 |
-
# Tabella interattiva
|
| 203 |
-
st.write(f"Mostrando {len(df)} relazioni candidate (ordinate per confidenza):")
|
| 204 |
|
| 205 |
event = st.dataframe(
|
| 206 |
df.drop(columns=["id"]),
|
|
@@ -210,7 +300,6 @@ with tab_val:
|
|
| 210 |
hide_index=True
|
| 211 |
)
|
| 212 |
|
| 213 |
-
# Logica di rifiuto
|
| 214 |
if len(event.selection.rows) > 0:
|
| 215 |
idx = event.selection.rows[0]
|
| 216 |
row = df.iloc[idx]
|
|
@@ -218,17 +307,17 @@ with tab_val:
|
|
| 218 |
st.divider()
|
| 219 |
col_warn, col_btn = st.columns([3, 1])
|
| 220 |
with col_warn:
|
| 221 |
-
st.warning(f"
|
| 222 |
with col_btn:
|
| 223 |
if st.button("🗑️ CONFERMA RIFIUTO", type="primary"):
|
| 224 |
success = reject_relationship(driver, row['id'], row['Soggetto'], row['Predicato'], row['Oggetto'])
|
| 225 |
if success:
|
| 226 |
-
st.success("Relazione eliminata
|
| 227 |
st.rerun()
|
| 228 |
else:
|
| 229 |
-
st.info("Nessuna relazione
|
| 230 |
else:
|
| 231 |
-
st.warning("
|
| 232 |
|
| 233 |
# ==============================================================================
|
| 234 |
# TAB 3: VISUALIZZAZIONE (PYVIS)
|
|
@@ -237,36 +326,29 @@ with tab_vis:
|
|
| 237 |
st.header("Esplorazione Topologica")
|
| 238 |
|
| 239 |
if driver:
|
| 240 |
-
physics = st.checkbox("Abilita Fisica
|
| 241 |
-
|
| 242 |
-
# Limita a 100 nodi per performance rendering
|
| 243 |
cypher_vis = """
|
| 244 |
MATCH (s)-[r]->(o)
|
| 245 |
-
RETURN COALESCE(s.name, head(labels(s))) as src,
|
| 246 |
type(r) as rel,
|
| 247 |
-
COALESCE(o.name, head(labels(o))) as dst
|
| 248 |
LIMIT 100
|
| 249 |
"""
|
| 250 |
graph_data = run_query(driver, cypher_vis)
|
| 251 |
|
| 252 |
if graph_data:
|
| 253 |
-
# Creazione Network PyVis
|
| 254 |
net = Network(height="600px", width="100%", bgcolor="#222222", font_color="white", notebook=False)
|
| 255 |
|
| 256 |
for item in graph_data:
|
| 257 |
-
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
rel_lbl = str(item['rel'])
|
| 261 |
|
| 262 |
-
net.add_node(
|
| 263 |
-
net.add_node(
|
| 264 |
-
net.add_edge(
|
| 265 |
|
| 266 |
-
# Opzioni fisica
|
| 267 |
net.toggle_physics(physics)
|
| 268 |
-
|
| 269 |
-
# Salva e renderizza HTML
|
| 270 |
path = "data/processed/graph_viz.html"
|
| 271 |
os.makedirs("data/processed", exist_ok=True)
|
| 272 |
net.save_graph(path)
|
|
@@ -274,12 +356,10 @@ with tab_vis:
|
|
| 274 |
with open(path, 'r', encoding='utf-8') as f:
|
| 275 |
html_string = f.read()
|
| 276 |
components.html(html_string, height=600, scrolling=True)
|
| 277 |
-
|
| 278 |
-
st.caption(f"Visualizzazione dei primi {len(graph_data)} archi.")
|
| 279 |
else:
|
| 280 |
-
st.info("Grafo vuoto
|
| 281 |
else:
|
| 282 |
-
st.warning("
|
| 283 |
|
| 284 |
# Footer
|
| 285 |
st.markdown("---")
|
|
|
|
| 2 |
import os
|
| 3 |
import shutil
|
| 4 |
import csv
|
| 5 |
+
import json
|
| 6 |
import pandas as pd
|
| 7 |
from datetime import datetime
|
| 8 |
from neo4j import GraphDatabase
|
|
|
|
| 10 |
import streamlit.components.v1 as components
|
| 11 |
from dotenv import load_dotenv
|
| 12 |
|
| 13 |
+
# --- IMPORT MODULI SPECIFICI ---
|
| 14 |
+
# 1. Ingestion
|
| 15 |
+
from src.ingestion.semantic_splitter import ActivaSemanticSplitter
|
| 16 |
+
# 2. Extraction (Importiamo anche la classe GraphTriple per la ricostruzione dei dati)
|
| 17 |
+
from src.extraction.extractor import NeuroSymbolicExtractor, GraphTriple
|
| 18 |
+
# 3. Graph Building (Loader & Resolver)
|
| 19 |
+
from src.graph.graph_loader import KnowledgeGraphPersister
|
| 20 |
+
from src.graph.entity_resolver import EntityResolver
|
| 21 |
|
| 22 |
# --- CONFIGURAZIONE INIZIALE ---
|
| 23 |
load_dotenv()
|
|
|
|
| 28 |
page_icon="🧠"
|
| 29 |
)
|
| 30 |
|
| 31 |
+
# --- FUNZIONI DI UTILITÀ NEO4J (Frontend) ---
|
| 32 |
def get_driver(uri, user, password):
|
| 33 |
if not uri or not password:
|
| 34 |
return None
|
|
|
|
| 50 |
1. Cancella dal DB (Azione Reale).
|
| 51 |
2. Salva in CSV per Active Learning (Data Lineage del rifiuto).
|
| 52 |
"""
|
|
|
|
| 53 |
query = "MATCH ()-[r]->() WHERE elementId(r) = $id DELETE r"
|
| 54 |
try:
|
| 55 |
run_query(driver, query, {"id": rel_id})
|
|
|
|
| 57 |
st.error(f"Errore durante la cancellazione: {e}")
|
| 58 |
return False
|
| 59 |
|
|
|
|
| 60 |
log_file = "data/processed/rejected_triples.csv"
|
| 61 |
os.makedirs("data/processed", exist_ok=True)
|
| 62 |
file_exists = os.path.isfile(log_file)
|
|
|
|
| 75 |
# --- UI: SIDEBAR & CONFIGURAZIONE ---
|
| 76 |
st.sidebar.title("⚙️ Configurazione")
|
| 77 |
|
|
|
|
| 78 |
env_uri = os.getenv("NEO4J_URI", "")
|
| 79 |
env_user = os.getenv("NEO4J_USER", "neo4j")
|
|
|
|
| 80 |
env_password = os.getenv("NEO4J_PASSWORD", "")
|
| 81 |
+
env_hf_token = os.getenv("HF_TOKEN", "")
|
| 82 |
+
|
| 83 |
+
st.sidebar.subheader("Backend LLM")
|
| 84 |
+
# Se il token HF è presente nei secrets/env, lo mostriamo mascherato, altrimenti input
|
| 85 |
+
hf_token_input = st.sidebar.text_input("HuggingFace Token (Opzionale per Cloud)", value=env_hf_token, type="password")
|
| 86 |
+
if hf_token_input:
|
| 87 |
+
os.environ["HF_TOKEN"] = hf_token_input
|
| 88 |
|
| 89 |
st.sidebar.subheader("Connessione Neo4j AuraDB")
|
| 90 |
uri = st.sidebar.text_input("URI", value=env_uri, placeholder="neo4j+s://...")
|
| 91 |
user = st.sidebar.text_input("User", value=env_user)
|
| 92 |
password = st.sidebar.text_input("Password", value=env_password, type="password")
|
| 93 |
|
|
|
|
| 94 |
driver = None
|
| 95 |
if uri and password:
|
| 96 |
driver = get_driver(uri, user, password)
|
| 97 |
if driver:
|
| 98 |
st.sidebar.success("🟢 Connesso al Knowledge Graph")
|
|
|
|
| 99 |
os.environ["NEO4J_URI"] = uri
|
| 100 |
os.environ["NEO4J_USER"] = user
|
| 101 |
os.environ["NEO4J_PASSWORD"] = password
|
|
|
|
| 119 |
with tab_gen:
|
| 120 |
st.header("Pipeline di Ingestione ed Estrazione")
|
| 121 |
|
|
|
|
| 122 |
col_src, col_act = st.columns([1, 2])
|
| 123 |
|
| 124 |
+
# 1. Selezione File
|
| 125 |
with col_src:
|
| 126 |
st.subheader("Sorgente Dati")
|
| 127 |
data_source = st.radio("Modalità:", ("📂 Esempi Demo", "⬆️ Upload (Sperimentale)"))
|
| 128 |
|
| 129 |
selected_file_path = None
|
| 130 |
os.makedirs("data/raw", exist_ok=True)
|
| 131 |
+
os.makedirs("data/processed", exist_ok=True)
|
| 132 |
os.makedirs("data/examples", exist_ok=True)
|
| 133 |
|
| 134 |
if data_source == "📂 Esempi Demo":
|
|
|
|
| 136 |
if files:
|
| 137 |
choice = st.selectbox("Seleziona scenario:", files)
|
| 138 |
if choice:
|
| 139 |
+
src = os.path.join("data/examples", choice)
|
| 140 |
+
dst = os.path.join("data/raw", choice)
|
| 141 |
+
shutil.copy(src, dst)
|
| 142 |
selected_file_path = choice
|
| 143 |
else:
|
| 144 |
st.warning("Nessun file in data/examples")
|
|
|
|
| 149 |
f.write(uploaded.getbuffer())
|
| 150 |
selected_file_path = uploaded.name
|
| 151 |
|
| 152 |
+
# 2. Esecuzione Step
|
| 153 |
with col_act:
|
| 154 |
if selected_file_path:
|
| 155 |
st.info(f"File attivo: **{selected_file_path}**")
|
| 156 |
c1, c2, c3 = st.columns(3)
|
| 157 |
|
| 158 |
+
# --- STEP A: CHUNKING ---
|
| 159 |
with c1:
|
| 160 |
if st.button("A. Semantic Chunking"):
|
| 161 |
+
with st.status("Analisi vettoriale in corso...", expanded=True) as status:
|
| 162 |
+
try:
|
| 163 |
+
# 1. Leggi il testo raw
|
| 164 |
+
with open(os.path.join("data/raw", selected_file_path), "r", encoding="utf-8") as f:
|
| 165 |
+
text_content = f.read()
|
| 166 |
+
|
| 167 |
+
# 2. Inizializza Splitter e processa
|
| 168 |
+
status.write("Caricamento modelli di embedding...")
|
| 169 |
+
splitter = ActivaSemanticSplitter() # Usa default huggingface
|
| 170 |
+
|
| 171 |
+
status.write("Calcolo distanze coseno...")
|
| 172 |
+
chunks, dists, threshold = splitter.create_chunks(text_content)
|
| 173 |
+
|
| 174 |
+
# 3. Salvataggio intermedio
|
| 175 |
+
chunk_file = "data/processed/chunks.json"
|
| 176 |
+
with open(chunk_file, "w", encoding="utf-8") as f:
|
| 177 |
+
json.dump(chunks, f, ensure_ascii=False, indent=2)
|
| 178 |
+
|
| 179 |
+
status.update(label="Chunking Completato!", state="complete", expanded=False)
|
| 180 |
+
st.success(f"Generati {len(chunks)} frammenti semantici.")
|
| 181 |
+
|
| 182 |
+
# Optional: Mostra grafico se generato
|
| 183 |
+
if os.path.exists("chunking_analysis.png"):
|
| 184 |
+
st.image("chunking_analysis.png", caption="Analisi Coerenza")
|
| 185 |
+
|
| 186 |
+
except Exception as e:
|
| 187 |
+
status.update(label="Errore Chunking", state="error")
|
| 188 |
+
st.error(f"Errore: {e}")
|
| 189 |
|
| 190 |
+
# --- STEP B: EXTRACTION ---
|
| 191 |
with c2:
|
| 192 |
if st.button("B. Info Extraction"):
|
| 193 |
+
chunk_file = "data/processed/chunks.json"
|
| 194 |
+
if not os.path.exists(chunk_file):
|
| 195 |
+
st.error("Esegui prima il Chunking!")
|
| 196 |
+
else:
|
| 197 |
+
with st.status("Estrazione Neuro-Simbolica...", expanded=True) as status:
|
| 198 |
+
try:
|
| 199 |
+
# 1. Carica Chunks
|
| 200 |
+
with open(chunk_file, "r", encoding="utf-8") as f:
|
| 201 |
+
chunks = json.load(f)
|
| 202 |
+
|
| 203 |
+
# 2. Init Extractor (Rileva HF_TOKEN da env)
|
| 204 |
+
status.write("Inizializzazione LLM (Locale/Cloud)...")
|
| 205 |
+
extractor = NeuroSymbolicExtractor() # Usa default params
|
| 206 |
+
|
| 207 |
+
all_triples = []
|
| 208 |
+
progress_bar = st.progress(0)
|
| 209 |
+
|
| 210 |
+
# 3. Loop su chunk
|
| 211 |
+
for i, chunk in enumerate(chunks):
|
| 212 |
+
status.write(f"Processando chunk {i+1}/{len(chunks)}...")
|
| 213 |
+
result = extractor.extract(chunk, source_id=selected_file_path)
|
| 214 |
+
# Converti oggetti Pydantic in dict per serializzazione JSON
|
| 215 |
+
triples_dicts = [t.model_dump() for t in result.triples]
|
| 216 |
+
all_triples.extend(triples_dicts)
|
| 217 |
+
progress_bar.progress((i + 1) / len(chunks))
|
| 218 |
+
|
| 219 |
+
# 4. Salvataggio Raw Triples
|
| 220 |
+
triples_file = "data/processed/triples_raw.json"
|
| 221 |
+
with open(triples_file, "w", encoding="utf-8") as f:
|
| 222 |
+
json.dump(all_triples, f, ensure_ascii=False, indent=2)
|
| 223 |
+
|
| 224 |
+
status.update(label="Estrazione Completata!", state="complete", expanded=False)
|
| 225 |
+
st.success(f"Estratte {len(all_triples)} triple candidate.")
|
| 226 |
+
|
| 227 |
+
except Exception as e:
|
| 228 |
+
status.update(label="Errore Estrazione", state="error")
|
| 229 |
+
st.error(f"Errore: {e}")
|
| 230 |
+
|
| 231 |
+
# --- STEP C: GRAPH BUILDING ---
|
| 232 |
with c3:
|
| 233 |
if st.button("C. Popola Neo4j", type="primary"):
|
| 234 |
+
triples_file = "data/processed/triples_raw.json"
|
| 235 |
+
if not os.path.exists(triples_file):
|
| 236 |
+
st.error("Esegui prima l'estrazione!")
|
| 237 |
+
elif not driver:
|
| 238 |
st.error("Connettiti al DB prima!")
|
| 239 |
else:
|
| 240 |
+
with st.status("Costruzione Grafo...", expanded=True) as status:
|
| 241 |
+
try:
|
| 242 |
+
# 1. Carica Raw Triples
|
| 243 |
+
with open(triples_file, "r", encoding="utf-8") as f:
|
| 244 |
+
raw_data = json.load(f)
|
| 245 |
+
|
| 246 |
+
# Ricostruisci oggetti GraphTriple (necessari per il Resolver)
|
| 247 |
+
triples_objs = [GraphTriple(**t) for t in raw_data]
|
| 248 |
+
|
| 249 |
+
# 2. Entity Resolution
|
| 250 |
+
status.write("Entity Resolution (DBSCAN Clustering)...")
|
| 251 |
+
resolver = EntityResolver(similarity_threshold=0.85)
|
| 252 |
+
resolved_triples = resolver.resolve_entities(triples_objs)
|
| 253 |
+
status.write(f"Entità normalizzate. Triple da inserire: {len(resolved_triples)}")
|
| 254 |
+
|
| 255 |
+
# 3. Persistenza Neo4j
|
| 256 |
+
status.write("Scrittura Batch su Neo4j...")
|
| 257 |
+
persister = KnowledgeGraphPersister() # Prende credenziali da env
|
| 258 |
+
persister.save_triples(resolved_triples)
|
| 259 |
+
persister.close()
|
| 260 |
+
|
| 261 |
+
status.update(label="Grafo Aggiornato!", state="complete", expanded=False)
|
| 262 |
+
st.success("🚀 Grafo costruito con successo su AuraDB!")
|
| 263 |
+
st.balloons()
|
| 264 |
+
|
| 265 |
+
except Exception as e:
|
| 266 |
+
status.update(label="Errore Costruzione", state="error")
|
| 267 |
+
st.error(f"Errore: {e}")
|
| 268 |
else:
|
| 269 |
st.write("👈 Seleziona un file per iniziare.")
|
| 270 |
|
|
|
|
| 273 |
# ==============================================================================
|
| 274 |
with tab_val:
|
| 275 |
st.header("Curation & Feedback Loop")
|
| 276 |
+
st.markdown("Validazione delle triple estratte. Le relazioni rifiutate vengono usate per il fine-tuning.")
|
|
|
|
|
|
|
|
|
|
| 277 |
|
| 278 |
if driver:
|
| 279 |
+
# Query aggiornata per mostrare nodi reali
|
|
|
|
| 280 |
cypher_val = """
|
| 281 |
MATCH (s)-[r]->(o)
|
| 282 |
RETURN elementId(r) as id,
|
| 283 |
+
COALESCE(s.label, s.name, head(labels(s))) as Soggetto,
|
| 284 |
type(r) as Predicato,
|
| 285 |
+
COALESCE(o.label, o.name, head(labels(o))) as Oggetto,
|
| 286 |
COALESCE(r.confidence, 0.85) as Confidenza
|
| 287 |
ORDER BY Confidenza ASC LIMIT 50
|
| 288 |
"""
|
|
|
|
| 290 |
|
| 291 |
if triples_data:
|
| 292 |
df = pd.DataFrame(triples_data)
|
| 293 |
+
st.write(f"Relazioni candidate ({len(df)}):")
|
|
|
|
|
|
|
| 294 |
|
| 295 |
event = st.dataframe(
|
| 296 |
df.drop(columns=["id"]),
|
|
|
|
| 300 |
hide_index=True
|
| 301 |
)
|
| 302 |
|
|
|
|
| 303 |
if len(event.selection.rows) > 0:
|
| 304 |
idx = event.selection.rows[0]
|
| 305 |
row = df.iloc[idx]
|
|
|
|
| 307 |
st.divider()
|
| 308 |
col_warn, col_btn = st.columns([3, 1])
|
| 309 |
with col_warn:
|
| 310 |
+
st.warning(f"Rifiutare: **{row['Soggetto']}** --[{row['Predicato']}]--> **{row['Oggetto']}**?")
|
| 311 |
with col_btn:
|
| 312 |
if st.button("🗑️ CONFERMA RIFIUTO", type="primary"):
|
| 313 |
success = reject_relationship(driver, row['id'], row['Soggetto'], row['Predicato'], row['Oggetto'])
|
| 314 |
if success:
|
| 315 |
+
st.success("Relazione eliminata!")
|
| 316 |
st.rerun()
|
| 317 |
else:
|
| 318 |
+
st.info("Nessuna relazione nel grafo.")
|
| 319 |
else:
|
| 320 |
+
st.warning("Database non connesso.")
|
| 321 |
|
| 322 |
# ==============================================================================
|
| 323 |
# TAB 3: VISUALIZZAZIONE (PYVIS)
|
|
|
|
| 326 |
st.header("Esplorazione Topologica")
|
| 327 |
|
| 328 |
if driver:
|
| 329 |
+
physics = st.checkbox("Abilita Fisica", value=True)
|
|
|
|
|
|
|
| 330 |
cypher_vis = """
|
| 331 |
MATCH (s)-[r]->(o)
|
| 332 |
+
RETURN COALESCE(s.label, s.name, head(labels(s))) as src,
|
| 333 |
type(r) as rel,
|
| 334 |
+
COALESCE(o.label, o.name, head(labels(o))) as dst
|
| 335 |
LIMIT 100
|
| 336 |
"""
|
| 337 |
graph_data = run_query(driver, cypher_vis)
|
| 338 |
|
| 339 |
if graph_data:
|
|
|
|
| 340 |
net = Network(height="600px", width="100%", bgcolor="#222222", font_color="white", notebook=False)
|
| 341 |
|
| 342 |
for item in graph_data:
|
| 343 |
+
src = str(item['src'])
|
| 344 |
+
dst = str(item['dst'])
|
| 345 |
+
rel = str(item['rel'])
|
|
|
|
| 346 |
|
| 347 |
+
net.add_node(src, label=src, color="#4facfe", title=src)
|
| 348 |
+
net.add_node(dst, label=dst, color="#00f2fe", title=dst)
|
| 349 |
+
net.add_edge(src, dst, title=rel, label=rel)
|
| 350 |
|
|
|
|
| 351 |
net.toggle_physics(physics)
|
|
|
|
|
|
|
| 352 |
path = "data/processed/graph_viz.html"
|
| 353 |
os.makedirs("data/processed", exist_ok=True)
|
| 354 |
net.save_graph(path)
|
|
|
|
| 356 |
with open(path, 'r', encoding='utf-8') as f:
|
| 357 |
html_string = f.read()
|
| 358 |
components.html(html_string, height=600, scrolling=True)
|
|
|
|
|
|
|
| 359 |
else:
|
| 360 |
+
st.info("Grafo vuoto.")
|
| 361 |
else:
|
| 362 |
+
st.warning("Database non connesso.")
|
| 363 |
|
| 364 |
# Footer
|
| 365 |
st.markdown("---")
|