Spaces:

NextGenTech
/

AutomatedSemanticDiscovery

Sleeping

App Files Files Community

GaetanoParente commited on Dec 31, 2025

Commit

24213b8

verified ·

1 Parent(s): c4f394e

Update app.py

Browse files

Files changed (1) hide show

app.py +291 -260

app.py CHANGED Viewed

@@ -11,15 +11,12 @@ import streamlit.components.v1 as components
 from dotenv import load_dotenv
 # --- IMPORT MODULI SPECIFICI ---
-# 1. Ingestion
 from src.ingestion.semantic_splitter import ActivaSemanticSplitter
-# 2. Extraction (Importiamo anche la classe GraphTriple per la ricostruzione dei dati)
 from src.extraction.extractor import NeuroSymbolicExtractor, GraphTriple
-# 3. Graph Building (Loader & Resolver)
 from src.graph.graph_loader import KnowledgeGraphPersister
 from src.graph.entity_resolver import EntityResolver
-# --- CONFIGURAZIONE INIZIALE ---
 load_dotenv()
 st.set_page_config(
     page_title="Activa Semantic Discovery",
@@ -28,51 +25,73 @@ st.set_page_config(
     page_icon="🧠"
 )
-# --- FUNZIONI DI UTILITÀ NEO4J (Frontend) ---
 def get_driver(uri, user, password):
-    if not uri or not password:
-        return None
     try:
         return GraphDatabase.driver(uri, auth=(user, password))
-    except Exception as e:
-        return None
 def run_query(driver, query, params=None):
-    if driver is None:
-        return []
     with driver.session() as session:
         result = session.run(query, params)
         return [r.data() for r in result]
-# --- LOGICA FEEDBACK LOOP (HUMAN-IN-THE-LOOP) ---
-def reject_relationship(driver, rel_id, subj, pred, obj, reason="Human Rejection"):
-    """
-    1. Cancella dal DB (Azione Reale).
-    2. Salva in CSV per Active Learning (Data Lineage del rifiuto).
-    """
-    query = "MATCH ()-[r]->() WHERE elementId(r) = $id DELETE r"
-    try:
-        run_query(driver, query, {"id": rel_id})
-    except Exception as e:
-        st.error(f"Errore durante la cancellazione: {e}")
-        return False
-    log_file = "data/processed/rejected_triples.csv"
-    os.makedirs("data/processed", exist_ok=True)
-    file_exists = os.path.isfile(log_file)
-    try:
-        with open(log_file, mode='a', newline='', encoding='utf-8') as f:
-            writer = csv.writer(f)
-            if not file_exists:
-                writer.writerow(["timestamp", "subject", "predicate", "object", "reason"])
-            writer.writerow([datetime.now(), subj, pred, obj, reason])
-        return True
-    except Exception as e:
-        st.warning(f"Relazione cancellata dal DB, ma errore nel log CSV: {e}")
-        return True
-# --- UI: SIDEBAR & CONFIGURAZIONE ---
 st.sidebar.title("⚙️ Configurazione")
 env_uri = os.getenv("NEO4J_URI", "")
@@ -80,14 +99,12 @@ env_user = os.getenv("NEO4J_USER", "neo4j")
 env_password = os.getenv("NEO4J_PASSWORD", "")
 env_hf_token = os.getenv("HF_TOKEN", "")
-st.sidebar.subheader("Backend LLM")
-# Se il token HF è presente nei secrets/env, lo mostriamo mascherato, altrimenti input
-hf_token_input = st.sidebar.text_input("HuggingFace Token (Opzionale per Cloud)", value=env_hf_token, type="password")
-if hf_token_input:
-    os.environ["HF_TOKEN"] = hf_token_input
-st.sidebar.subheader("Connessione Neo4j AuraDB")
-uri = st.sidebar.text_input("URI", value=env_uri, placeholder="neo4j+s://...")
 user = st.sidebar.text_input("User", value=env_user)
 password = st.sidebar.text_input("Password", value=env_password, type="password")
@@ -95,188 +112,235 @@ driver = None
 if uri and password:
     driver = get_driver(uri, user, password)
     if driver:
-        st.sidebar.success("🟢 Connesso al Knowledge Graph")
         os.environ["NEO4J_URI"] = uri
         os.environ["NEO4J_USER"] = user
         os.environ["NEO4J_PASSWORD"] = password
     else:
         st.sidebar.error("🔴 Errore connessione")
-# --- UI: HEADER PRINCIPALE ---
 st.title("🧠 Automated Semantic Discovery Prototype")
-st.markdown("**Generazione Neuro-Simbolica & Validazione Human-in-the-Loop**")
-# --- TAB NAVIGAZIONE ---
 tab_gen, tab_val, tab_vis = st.tabs([
-    "⚙️ 1. Generazione (Pipeline)",
-    "🔍 2. Validazione (Active Learning)",
-    "🕸️ 3. Visualizzazione (Graph)"
 ])
 # ==============================================================================
-# TAB 1: GENERAZIONE (PIPELINE DI ESECUZIONE)
 # ==============================================================================
 with tab_gen:
-    st.header("Pipeline di Ingestione ed Estrazione")
-    col_src, col_act = st.columns([1, 2])
-    # 1. Selezione File
-    with col_src:
-        st.subheader("Sorgente Dati")
-        data_source = st.radio("Modalità:", ("📂 Esempi Demo", "⬆️ Upload (Sperimentale)"))
-        selected_file_path = None
-        os.makedirs("data/raw", exist_ok=True)
-        os.makedirs("data/processed", exist_ok=True)
-        os.makedirs("data/examples", exist_ok=True)
-        if data_source == "📂 Esempi Demo":
-            files = [f for f in os.listdir("data/examples") if f.endswith(".txt")]
-            if files:
-                choice = st.selectbox("Seleziona scenario:", files)
-                if choice:
-                    src = os.path.join("data/examples", choice)
-                    dst = os.path.join("data/raw", choice)
-                    shutil.copy(src, dst)
-                    selected_file_path = choice
-            else:
-                st.warning("Nessun file in data/examples")
         else:
-            uploaded = st.file_uploader("Carica .txt", type="txt")
-            if uploaded:
-                with open(os.path.join("data/raw", uploaded.name), "wb") as f:
-                    f.write(uploaded.getbuffer())
-                selected_file_path = uploaded.name
-    # 2. Esecuzione Step
-    with col_act:
-        if selected_file_path:
-            st.info(f"File attivo: **{selected_file_path}**")
-            c1, c2, c3 = st.columns(3)
-            # --- STEP A: CHUNKING ---
-            with c1:
-                if st.button("A. Semantic Chunking"):
-                    with st.status("Analisi vettoriale in corso...", expanded=True) as status:
                         try:
-                            # 1. Leggi il testo raw
-                            with open(os.path.join("data/raw", selected_file_path), "r", encoding="utf-8") as f:
-                                text_content = f.read()
-                            # 2. Inizializza Splitter e processa
-                            status.write("Caricamento modelli di embedding...")
-                            splitter = ActivaSemanticSplitter() # Usa default huggingface
-                            status.write("Calcolo distanze coseno...")
-                            chunks, dists, threshold = splitter.create_chunks(text_content)
-                            # 3. Salvataggio intermedio
-                            chunk_file = "data/processed/chunks.json"
-                            with open(chunk_file, "w", encoding="utf-8") as f:
-                                json.dump(chunks, f, ensure_ascii=False, indent=2)
-                            status.update(label="Chunking Completato!", state="complete", expanded=False)
-                            st.success(f"Generati {len(chunks)} frammenti semantici.")
-                            # Optional: Mostra grafico se generato
-                            if os.path.exists("chunking_analysis.png"):
-                                st.image("chunking_analysis.png", caption="Analisi Coerenza")
                         except Exception as e:
-                            status.update(label="Errore Chunking", state="error")
                             st.error(f"Errore: {e}")
-            # --- STEP B: EXTRACTION ---
-            with c2:
-                if st.button("B. Info Extraction"):
-                    chunk_file = "data/processed/chunks.json"
-                    if not os.path.exists(chunk_file):
-                        st.error("Esegui prima il Chunking!")
-                    else:
-                        with st.status("Estrazione Neuro-Simbolica...", expanded=True) as status:
-                            try:
-                                # 1. Carica Chunks
-                                with open(chunk_file, "r", encoding="utf-8") as f:
-                                    chunks = json.load(f)
-                                # 2. Init Extractor (Rileva HF_TOKEN da env)
-                                status.write("Inizializzazione LLM (Locale/Cloud)...")
-                                extractor = NeuroSymbolicExtractor() # Usa default params
-                                all_triples = []
-                                progress_bar = st.progress(0)
-                                # 3. Loop su chunk
-                                for i, chunk in enumerate(chunks):
-                                    status.write(f"Processando chunk {i+1}/{len(chunks)}...")
-                                    result = extractor.extract(chunk, source_id=selected_file_path)
-                                    # Converti oggetti Pydantic in dict per serializzazione JSON
-                                    triples_dicts = [t.model_dump() for t in result.triples]
-                                    all_triples.extend(triples_dicts)
-                                    progress_bar.progress((i + 1) / len(chunks))
-                                # 4. Salvataggio Raw Triples
-                                triples_file = "data/processed/triples_raw.json"
-                                with open(triples_file, "w", encoding="utf-8") as f:
-                                    json.dump(all_triples, f, ensure_ascii=False, indent=2)
-                                status.update(label="Estrazione Completata!", state="complete", expanded=False)
-                                st.success(f"Estratte {len(all_triples)} triple candidate.")
-                            except Exception as e:
-                                status.update(label="Errore Estrazione", state="error")
-                                st.error(f"Errore: {e}")
-            # --- STEP C: GRAPH BUILDING ---
-            with c3:
-                if st.button("C. Popola Neo4j", type="primary"):
-                    triples_file = "data/processed/triples_raw.json"
-                    if not os.path.exists(triples_file):
-                        st.error("Esegui prima l'estrazione!")
-                    elif not driver:
-                        st.error("Connettiti al DB prima!")
-                    else:
-                        with st.status("Costruzione Grafo...", expanded=True) as status:
-                            try:
-                                # 1. Carica Raw Triples
-                                with open(triples_file, "r", encoding="utf-8") as f:
-                                    raw_data = json.load(f)
-                                # Ricostruisci oggetti GraphTriple (necessari per il Resolver)
-                                triples_objs = [GraphTriple(**t) for t in raw_data]
-                                # 2. Entity Resolution
-                                status.write("Entity Resolution (DBSCAN Clustering)...")
-                                resolver = EntityResolver(similarity_threshold=0.85)
-                                resolved_triples = resolver.resolve_entities(triples_objs)
-                                status.write(f"Entità normalizzate. Triple da inserire: {len(resolved_triples)}")
-                                # 3. Persistenza Neo4j
-                                status.write("Scrittura Batch su Neo4j...")
-                                persister = KnowledgeGraphPersister() # Prende credenziali da env
-                                persister.save_triples(resolved_triples)
-                                persister.close()
-                                status.update(label="Grafo Aggiornato!", state="complete", expanded=False)
-                                st.success("🚀 Grafo costruito con successo su AuraDB!")
-                                st.balloons()
-                            except Exception as e:
-                                status.update(label="Errore Costruzione", state="error")
-                                st.error(f"Errore: {e}")
-        else:
-            st.write("👈 Seleziona un file per iniziare.")
 # ==============================================================================
-# TAB 2: VALIDAZIONE (HUMAN IN THE LOOP)
 # ==============================================================================
 with tab_val:
     st.header("Curation & Feedback Loop")
-    st.markdown("Validazione delle triple estratte. Le relazioni rifiutate vengono usate per il fine-tuning.")
     if driver:
-        # Query aggiornata per mostrare nodi reali
         cypher_val = """
         MATCH (s)-[r]->(o)
         RETURN elementId(r) as id,
@@ -290,77 +354,44 @@ with tab_val:
         if triples_data:
             df = pd.DataFrame(triples_data)
-            st.write(f"Relazioni candidate ({len(df)}):")
-            event = st.dataframe(
-                df.drop(columns=["id"]),
-                selection_mode="single-row",
-                on_select="rerun",
-                use_container_width=True,
-                hide_index=True
-            )
-            if len(event.selection.rows) > 0:
-                idx = event.selection.rows[0]
-                row = df.iloc[idx]
-                st.divider()
-                col_warn, col_btn = st.columns([3, 1])
-                with col_warn:
-                    st.warning(f"Rifiutare: **{row['Soggetto']}** --[{row['Predicato']}]--> **{row['Oggetto']}**?")
-                with col_btn:
-                    if st.button("🗑️ CONFERMA RIFIUTO", type="primary"):
-                        success = reject_relationship(driver, row['id'], row['Soggetto'], row['Predicato'], row['Oggetto'])
-                        if success:
-                            st.success("Relazione eliminata!")
-                            st.rerun()
         else:
-            st.info("Nessuna relazione nel grafo.")
     else:
         st.warning("Database non connesso.")
 # ==============================================================================
-# TAB 3: VISUALIZZAZIONE (PYVIS)
 # ==============================================================================
 with tab_vis:
     st.header("Esplorazione Topologica")
     if driver:
-        physics = st.checkbox("Abilita Fisica", value=True)
-        cypher_vis = """
-        MATCH (s)-[r]->(o)
-        RETURN COALESCE(s.label, s.name, head(labels(s))) as src,
-               type(r) as rel,
-               COALESCE(o.label, o.name, head(labels(o))) as dst
-        LIMIT 100
-        """
-        graph_data = run_query(driver, cypher_vis)
-        if graph_data:
-            net = Network(height="600px", width="100%", bgcolor="#222222", font_color="white", notebook=False)
-            for item in graph_data:
-                src = str(item['src'])
-                dst = str(item['dst'])
-                rel = str(item['rel'])
-                net.add_node(src, label=src, color="#4facfe", title=src)
-                net.add_node(dst, label=dst, color="#00f2fe", title=dst)
-                net.add_edge(src, dst, title=rel, label=rel)
-            net.toggle_physics(physics)
-            path = "data/processed/graph_viz.html"
-            os.makedirs("data/processed", exist_ok=True)
-            net.save_graph(path)
-            with open(path, 'r', encoding='utf-8') as f:
-                html_string = f.read()
-            components.html(html_string, height=600, scrolling=True)
-        else:
-            st.info("Grafo vuoto.")
     else:
-        st.warning("Database non connesso.")
-# Footer
-st.markdown("---")
-st.caption("Activa Digital | NextGenTech | Prototipo v1.0")

 from dotenv import load_dotenv
 # --- IMPORT MODULI SPECIFICI ---
 from src.ingestion.semantic_splitter import ActivaSemanticSplitter
 from src.extraction.extractor import NeuroSymbolicExtractor, GraphTriple
 from src.graph.graph_loader import KnowledgeGraphPersister
 from src.graph.entity_resolver import EntityResolver
+# --- CONFIGURAZIONE PAGINA ---
 load_dotenv()
 st.set_page_config(
     page_title="Activa Semantic Discovery",
     page_icon="🧠"
 )
+# --- CSS CUSTOM PER UX MIGLIORATA ---
+# Rende le card più leggibili e stilizza i messaggi di stato
+st.markdown("""
+<style>
+    .step-card {
+        padding: 20px;
+        border-radius: 10px;
+        border: 1px solid #e0e0e0;
+        margin-bottom: 20px;
+        background-color: #262730;
+    }
+    .step-header {
+        font-size: 1.2rem;
+        font-weight: bold;
+        margin-bottom: 10px;
+        color: #4facfe;
+    }
+    .success-box {
+        padding: 10px;
+        background-color: rgba(76, 175, 80, 0.1);
+        border-left: 5px solid #4CAF50;
+        border-radius: 5px;
+    }
+</style>
+""", unsafe_allow_html=True)
+# --- SESSION STATE MANAGEMENT ---
+if 'pipeline_stage' not in st.session_state:
+    st.session_state.pipeline_stage = 0  # 0: Init, 1: Chunked, 2: Extracted, 3: Loaded
+if 'current_file' not in st.session_state:
+    st.session_state.current_file = None
+def reset_pipeline():
+    st.session_state.pipeline_stage = 0
+    st.session_state.current_file = None
+    # Pulisce i file processati per evitare incongruenze
+    if os.path.exists("data/processed"):
+        shutil.rmtree("data/processed")
+    os.makedirs("data/processed", exist_ok=True)
+# --- CACHING RISORSE ---
+@st.cache_resource
+def get_splitter():
+    return ActivaSemanticSplitter()
+@st.cache_resource
+def get_extractor():
+    return NeuroSymbolicExtractor()
+@st.cache_resource
+def get_resolver():
+    return EntityResolver(similarity_threshold=0.85)
+# --- FUNZIONI NEO4J ---
 def get_driver(uri, user, password):
+    if not uri or not password: return None
     try:
         return GraphDatabase.driver(uri, auth=(user, password))
+    except: return None
 def run_query(driver, query, params=None):
+    if driver is None: return []
     with driver.session() as session:
         result = session.run(query, params)
         return [r.data() for r in result]
+# --- UI: SIDEBAR ---
 st.sidebar.title("⚙️ Configurazione")
 env_uri = os.getenv("NEO4J_URI", "")
 env_password = os.getenv("NEO4J_PASSWORD", "")
 env_hf_token = os.getenv("HF_TOKEN", "")
+st.sidebar.subheader("Backend AI")
+hf_token_input = st.sidebar.text_input("HF Token (Opzionale)", value=env_hf_token, type="password")
+if hf_token_input: os.environ["HF_TOKEN"] = hf_token_input
+st.sidebar.subheader("Knowledge Graph")
+uri = st.sidebar.text_input("URI", value=env_uri)
 user = st.sidebar.text_input("User", value=env_user)
 password = st.sidebar.text_input("Password", value=env_password, type="password")
 if uri and password:
     driver = get_driver(uri, user, password)
     if driver:
+        st.sidebar.success("🟢 Connesso a Neo4j")
         os.environ["NEO4J_URI"] = uri
         os.environ["NEO4J_USER"] = user
         os.environ["NEO4J_PASSWORD"] = password
     else:
         st.sidebar.error("🔴 Errore connessione")
+st.sidebar.divider()
+if st.sidebar.button("🔄 Reset Pipeline", on_click=reset_pipeline):
+    st.sidebar.info("Stato resettato.")
+# --- MAIN HEADER ---
 st.title("🧠 Automated Semantic Discovery Prototype")
+st.markdown("**Pipeline Sequenziale Neuro-Simbolica**")
+# --- TAB LOGIC ---
 tab_gen, tab_val, tab_vis = st.tabs([
+    "⚙️ 1. Pipeline Generativa",
+    "🔍 2. Validazione (HITL)",
+    "🕸️ 3. Esplorazione Grafo"
 ])
 # ==============================================================================
+# TAB 1: PIPELINE GENERATIVA (STEPPER UI)
 # ==============================================================================
 with tab_gen:
+    # --- SELEZIONE FILE ---
+    st.subheader("1. Sorgente Documentale")
+    col_sel, col_info = st.columns([1, 2])
+    with col_sel:
+        data_source = st.radio("Modalità:", ("📂 Esempi Demo", "⬆️ Upload"), horizontal=True)
+    selected_file = None
+    os.makedirs("data/raw", exist_ok=True)
+    os.makedirs("data/processed", exist_ok=True)
+    os.makedirs("data/examples", exist_ok=True)
+    if data_source == "📂 Esempi Demo":
+        files = [f for f in os.listdir("data/examples") if f.endswith(".txt")]
+        if files:
+            choice = st.selectbox("Seleziona scenario:", files, index=0)
+            if choice:
+                src = os.path.join("data/examples", choice)
+                dst = os.path.join("data/raw", choice)
+                shutil.copy(src, dst)
+                selected_file = choice
+        else:
+            st.warning("Nessun file in data/examples")
+    else:
+        uploaded = st.file_uploader("Carica .txt", type="txt")
+        if uploaded:
+            with open(os.path.join("data/raw", uploaded.name), "wb") as f:
+                f.write(uploaded.getbuffer())
+            selected_file = uploaded.name
+    # Logica di cambio file: se cambia il file, resetta la pipeline
+    if selected_file and selected_file != st.session_state.current_file:
+        st.session_state.current_file = selected_file
+        st.session_state.pipeline_stage = 0
+        st.rerun()
+    if not selected_file:
+        st.info("👈 Seleziona o carica un file per iniziare.")
+        st.stop()
+    st.markdown("---")
+    # --- PROGRESS BAR ---
+    # stage 0 -> 0%, stage 1 -> 33%, stage 2 -> 66%, stage 3 -> 100%
+    progress_val = int((st.session_state.pipeline_stage / 3) * 100)
+    st.progress(progress_val, text=f"Progresso Pipeline: {progress_val}%")
+    # ==========================
+    # FASE A: CHUNKING
+    # ==========================
+    with st.container():
+        st.markdown(f"### {'✅' if st.session_state.pipeline_stage >= 1 else '1️⃣'} Fase A: Semantic Chunking")
+        if st.session_state.pipeline_stage >= 1:
+            # Stato Completato: Mostra riassunto
+            with open("data/processed/chunks.json", "r") as f:
+                chunks = json.load(f)
+            st.markdown(f"""
+            <div class="success-box">
+                <b>Chunking completato!</b> Generati {len(chunks)} frammenti semantici.<br>
+                Modello vettoriale utilizzato: <i>MiniLM-L12-v2</i>
+            </div>
+            """, unsafe_allow_html=True)
+            with st.expander("Vedi dettagli frammenti"):
+                st.json(chunks[:3]) # Mostra solo i primi 3 per pulizia
+        else:
+            # Stato Attivo: Bottone azione
+            st.markdown("Segmentazione del testo basata sulla coerenza semantica vettoriale.")
+            if st.button("Avvia Analisi Semantica", type="primary"):
+                with st.spinner("Calcolo vettori e segmentazione..."):
+                    try:
+                        with open(os.path.join("data/raw", selected_file), "r", encoding="utf-8") as f:
+                            text_content = f.read()
+                        splitter = get_splitter()
+                        chunks, dists, threshold = splitter.create_chunks(text_content)
+                        with open("data/processed/chunks.json", "w", encoding="utf-8") as f:
+                            json.dump(chunks, f, ensure_ascii=False, indent=2)
+                        st.session_state.pipeline_stage = 1
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Errore: {e}")
+    st.markdown("⬇️")
+    # ==========================
+    # FASE B: EXTRACTION
+    # ==========================
+    is_step_b_unlocked = st.session_state.pipeline_stage >= 1
+    with st.container():
+        # Header grigio se bloccato, colorato se attivo
+        color = "black" if is_step_b_unlocked else "gray"
+        icon = "✅" if st.session_state.pipeline_stage >= 2 else ("2️⃣" if is_step_b_unlocked else "🔒")
+        st.markdown(f"<h3 style='color:{color}'>{icon} Fase B: Information Extraction</h3>", unsafe_allow_html=True)
+        if not is_step_b_unlocked:
+            st.caption("Completa la Fase A per sbloccare l'estrazione.")
+        elif st.session_state.pipeline_stage >= 2:
+            # Stato Completato
+            with open("data/processed/triples_raw.json", "r") as f:
+                triples = json.load(f)
+            st.markdown(f"""
+            <div class="success-box">
+                <b>Estrazione completata!</b> Identificate {len(triples)} triple candidate.<br>
+                Motore Neuro-Simbolico: <i>Llama3/Mistral + Dependecy Parsing</i>
+            </div>
+            """, unsafe_allow_html=True)
+            with st.expander("Vedi esempio triple"):
+                st.dataframe(pd.DataFrame(triples).head(5), hide_index=True)
         else:
+            # Stato Attivo
+            st.markdown("Estrazione di Entità e Relazioni tramite approccio Neuro-Simbolico.")
+            if st.button("Avvia Estrazione Ontologica", type="primary"):
+                with st.spinner("Processando frammenti con LLM..."):
+                    try:
+                        with open("data/processed/chunks.json", "r", encoding="utf-8") as f:
+                            chunks = json.load(f)
+                        extractor = get_extractor()
+                        all_triples = []
+                        prog_bar = st.progress(0)
+                        for i, chunk in enumerate(chunks):
+                            res = extractor.extract(chunk, source_id=selected_file)
+                            all_triples.extend([t.model_dump() for t in res.triples])
+                            prog_bar.progress((i+1)/len(chunks))
+                        with open("data/processed/triples_raw.json", "w", encoding="utf-8") as f:
+                            json.dump(all_triples, f, ensure_ascii=False, indent=2)
+                        st.session_state.pipeline_stage = 2
+                        st.rerun()
+                    except Exception as e:
+                        st.error(f"Errore: {e}")
+    st.markdown("⬇️")
+    # ==========================
+    # FASE C: GRAPH POPULATION
+    # ==========================
+    is_step_c_unlocked = st.session_state.pipeline_stage >= 2
+    with st.container():
+        color = "black" if is_step_c_unlocked else "gray"
+        icon = "✅" if st.session_state.pipeline_stage >= 3 else ("3️⃣" if is_step_c_unlocked else "🔒")
+        st.markdown(f"<h3 style='color:{color}'>{icon} Fase C: Graph Construction</h3>", unsafe_allow_html=True)
+        if not is_step_c_unlocked:
+            st.caption("Completa la Fase B per popolare il grafo.")
+        elif st.session_state.pipeline_stage >= 3:
+            st.markdown("""
+            <div class="success-box">
+                <b>Grafo Aggiornato!</b> I dati sono stati caricati su Neo4j.<br>
+                Puoi esplorarli nei tab "Validazione" e "Visualizzazione".
+            </div>
+            """, unsafe_allow_html=True)
+            st.balloons()
+            if st.button("Riavvia con nuovo file"):
+                reset_pipeline()
+                st.rerun()
+        else:
+            st.markdown("Entity Resolution (Deduplica) e Caricamento su Neo4j.")
+            if not driver:
+                st.error("⚠️ Connettiti a Neo4j (nella sidebar) per procedere.")
+            else:
+                if st.button("Genera Knowledge Graph", type="primary"):
+                    with st.spinner("Risoluzione entità e scrittura DB..."):
                         try:
+                            with open("data/processed/triples_raw.json", "r", encoding="utf-8") as f:
+                                raw_data = json.load(f)
+                            triples_objs = [GraphTriple(**t) for t in raw_data]
+                            resolver = get_resolver()
+                            resolved = resolver.resolve_entities(triples_objs)
+                            persister = KnowledgeGraphPersister()
+                            persister.save_triples(resolved)
+                            persister.close()
+                            st.session_state.pipeline_stage = 3
+                            st.rerun()
                         except Exception as e:
                             st.error(f"Errore: {e}")
 # ==============================================================================
+# TAB 2: VALIDAZIONE (Codice invariato, solo stile)
 # ==============================================================================
 with tab_val:
     st.header("Curation & Feedback Loop")
     if driver:
+        # Recupera statistiche rapide
+        stats = run_query(driver, "MATCH (n) RETURN count(n) as nodes, count{()-->()} as rels")
+        if stats:
+            c1, c2 = st.columns(2)
+            c1.metric("Nodi Totali", stats[0]['nodes'])
+            c2.metric("Relazioni", stats[0]['rels'])
         cypher_val = """
         MATCH (s)-[r]->(o)
         RETURN elementId(r) as id,
         if triples_data:
             df = pd.DataFrame(triples_data)
+            st.dataframe(df.drop(columns=["id"]), use_container_width=True, hide_index=True)
         else:
+            st.info("Grafo vuoto.")
     else:
         st.warning("Database non connesso.")
 # ==============================================================================
+# TAB 3: VISUALIZZAZIONE
 # ==============================================================================
 with tab_vis:
     st.header("Esplorazione Topologica")
     if driver:
+        physics = st.checkbox("Abilita Fisica (Gravità)", value=True)
+        if st.button("Aggiorna Visualizzazione"):
+            cypher_vis = """
+            MATCH (s)-[r]->(o)
+            RETURN COALESCE(s.label, s.name, head(labels(s))) as src,
+                   type(r) as rel,
+                   COALESCE(o.label, o.name, head(labels(o))) as dst
+            LIMIT 100
+            """
+            graph_data = run_query(driver, cypher_vis)
+            if graph_data:
+                net = Network(height="600px", width="100%", bgcolor="#222222", font_color="white", notebook=False)
+                for item in graph_data:
+                    src, dst, rel = str(item['src']), str(item['dst']), str(item['rel'])
+                    net.add_node(src, label=src, color="#4facfe", title=src)
+                    net.add_node(dst, label=dst, color="#00f2fe", title=dst)
+                    net.add_edge(src, dst, title=rel, label=rel)
+                net.toggle_physics(physics)
+                path = "data/processed/graph_viz.html"
+                os.makedirs("data/processed", exist_ok=True)
+                net.save_graph(path)
+                with open(path, 'r', encoding='utf-8') as f:
+                    html_string = f.read()
+                components.html(html_string, height=600, scrolling=True)
     else:
+        st.warning("Database non connesso.")