import streamlit as st
import os
import shutil
import csv
import json
import pandas as pd
from datetime import datetime
from neo4j import GraphDatabase
from pyvis.network import Network
import streamlit.components.v1 as components
from dotenv import load_dotenv

# --- IMPORT MODULI SPECIFICI ---
# 1. Ingestion
from src.ingestion.semantic_splitter import ActivaSemanticSplitter
# 2. Extraction (Importiamo anche la classe GraphTriple per la ricostruzione dei dati)
from src.extraction.extractor import NeuroSymbolicExtractor, GraphTriple
# 3. Graph Building (Loader & Resolver)
from src.graph.graph_loader import KnowledgeGraphPersister
from src.graph.entity_resolver import EntityResolver

# --- CONFIGURAZIONE INIZIALE ---
load_dotenv()
st.set_page_config(
    page_title="Activa Semantic Discovery",
    layout="wide",
    initial_sidebar_state="expanded",
    page_icon="🧠"
)

# --- FUNZIONI DI UTILITÀ NEO4J (Frontend) ---
def get_driver(uri, user, password):
    if not uri or not password:
        return None
    try:
        return GraphDatabase.driver(uri, auth=(user, password))
    except Exception as e:
        return None

def run_query(driver, query, params=None):
    if driver is None:
        return []
    with driver.session() as session:
        result = session.run(query, params)
        return [r.data() for r in result]

# --- LOGICA FEEDBACK LOOP (HUMAN-IN-THE-LOOP) ---
def reject_relationship(driver, rel_id, subj, pred, obj, reason="Human Rejection"):
    """
    1. Cancella dal DB (Azione Reale).
    2. Salva in CSV per Active Learning (Data Lineage del rifiuto).
    """
    query = "MATCH ()-[r]->() WHERE elementId(r) = $id DELETE r"
    try:
        run_query(driver, query, {"id": rel_id})
    except Exception as e:
        st.error(f"Errore durante la cancellazione: {e}")
        return False
    
    log_file = "data/processed/rejected_triples.csv"
    os.makedirs("data/processed", exist_ok=True)
    file_exists = os.path.isfile(log_file)
    
    try:
        with open(log_file, mode='a', newline='', encoding='utf-8') as f:
            writer = csv.writer(f)
            if not file_exists:
                writer.writerow(["timestamp", "subject", "predicate", "object", "reason"])
            writer.writerow([datetime.now(), subj, pred, obj, reason])
        return True
    except Exception as e:
        st.warning(f"Relazione cancellata dal DB, ma errore nel log CSV: {e}")
        return True

# --- UI: SIDEBAR & CONFIGURAZIONE ---
st.sidebar.title("⚙️ Configurazione")

env_uri = os.getenv("NEO4J_URI", "")
env_user = os.getenv("NEO4J_USER", "neo4j")
env_password = os.getenv("NEO4J_PASSWORD", "")
env_hf_token = os.getenv("HF_TOKEN", "")

st.sidebar.subheader("Backend LLM")
# Se il token HF è presente nei secrets/env, lo mostriamo mascherato, altrimenti input
hf_token_input = st.sidebar.text_input("HuggingFace Token (Opzionale per Cloud)", value=env_hf_token, type="password")
if hf_token_input:
    os.environ["HF_TOKEN"] = hf_token_input

st.sidebar.subheader("Connessione Neo4j AuraDB")
uri = st.sidebar.text_input("URI", value=env_uri, placeholder="neo4j+s://...")
user = st.sidebar.text_input("User", value=env_user)
password = st.sidebar.text_input("Password", value=env_password, type="password")

driver = None
if uri and password:
    driver = get_driver(uri, user, password)
    if driver:
        st.sidebar.success("🟢 Connesso al Knowledge Graph")
        os.environ["NEO4J_URI"] = uri
        os.environ["NEO4J_USER"] = user
        os.environ["NEO4J_PASSWORD"] = password
    else:
        st.sidebar.error("🔴 Errore connessione")

# --- UI: HEADER PRINCIPALE ---
st.title("🧠 Automated Semantic Discovery Prototype")
st.markdown("**Generazione Neuro-Simbolica & Validazione Human-in-the-Loop**")

# --- TAB NAVIGAZIONE ---
tab_gen, tab_val, tab_vis = st.tabs([
    "⚙️ 1. Generazione (Pipeline)", 
    "🔍 2. Validazione (Active Learning)", 
    "🕸️ 3. Visualizzazione (Graph)"
])

# ==============================================================================
# TAB 1: GENERAZIONE (PIPELINE DI ESECUZIONE)
# ==============================================================================
with tab_gen:
    st.header("Pipeline di Ingestione ed Estrazione")
    
    col_src, col_act = st.columns([1, 2])
    
    # 1. Selezione File
    with col_src:
        st.subheader("Sorgente Dati")
        data_source = st.radio("Modalità:", ("📂 Esempi Demo", "⬆️ Upload (Sperimentale)"))
        
        selected_file_path = None
        os.makedirs("data/raw", exist_ok=True)
        os.makedirs("data/processed", exist_ok=True)
        os.makedirs("data/examples", exist_ok=True)

        if data_source == "📂 Esempi Demo":
            files = [f for f in os.listdir("data/examples") if f.endswith(".txt")]
            if files:
                choice = st.selectbox("Seleziona scenario:", files)
                if choice:
                    src = os.path.join("data/examples", choice)
                    dst = os.path.join("data/raw", choice)
                    shutil.copy(src, dst)
                    selected_file_path = choice
            else:
                st.warning("Nessun file in data/examples")
        else:
            uploaded = st.file_uploader("Carica .txt", type="txt")
            if uploaded:
                with open(os.path.join("data/raw", uploaded.name), "wb") as f:
                    f.write(uploaded.getbuffer())
                selected_file_path = uploaded.name

    # 2. Esecuzione Step
    with col_act:
        if selected_file_path:
            st.info(f"File attivo: **{selected_file_path}**")
            c1, c2, c3 = st.columns(3)
            
            # --- STEP A: CHUNKING ---
            with c1:
                if st.button("A. Semantic Chunking"):
                    with st.status("Analisi vettoriale in corso...", expanded=True) as status:
                        try:
                            # 1. Leggi il testo raw
                            with open(os.path.join("data/raw", selected_file_path), "r", encoding="utf-8") as f:
                                text_content = f.read()
                            
                            # 2. Inizializza Splitter e processa
                            status.write("Caricamento modelli di embedding...")
                            splitter = ActivaSemanticSplitter() # Usa default huggingface
                            
                            status.write("Calcolo distanze coseno...")
                            chunks, dists, threshold = splitter.create_chunks(text_content)
                            
                            # 3. Salvataggio intermedio
                            chunk_file = "data/processed/chunks.json"
                            with open(chunk_file, "w", encoding="utf-8") as f:
                                json.dump(chunks, f, ensure_ascii=False, indent=2)
                            
                            status.update(label="Chunking Completato!", state="complete", expanded=False)
                            st.success(f"Generati {len(chunks)} frammenti semantici.")
                            
                            # Optional: Mostra grafico se generato
                            if os.path.exists("chunking_analysis.png"):
                                st.image("chunking_analysis.png", caption="Analisi Coerenza")
                                
                        except Exception as e:
                            status.update(label="Errore Chunking", state="error")
                            st.error(f"Errore: {e}")
            
            # --- STEP B: EXTRACTION ---
            with c2:
                if st.button("B. Info Extraction"):
                    chunk_file = "data/processed/chunks.json"
                    if not os.path.exists(chunk_file):
                        st.error("Esegui prima il Chunking!")
                    else:
                        with st.status("Estrazione Neuro-Simbolica...", expanded=True) as status:
                            try:
                                # 1. Carica Chunks
                                with open(chunk_file, "r", encoding="utf-8") as f:
                                    chunks = json.load(f)
                                
                                # 2. Init Extractor (Rileva HF_TOKEN da env)
                                status.write("Inizializzazione LLM (Locale/Cloud)...")
                                extractor = NeuroSymbolicExtractor() # Usa default params
                                
                                all_triples = []
                                progress_bar = st.progress(0)
                                
                                # 3. Loop su chunk
                                for i, chunk in enumerate(chunks):
                                    status.write(f"Processando chunk {i+1}/{len(chunks)}...")
                                    result = extractor.extract(chunk, source_id=selected_file_path)
                                    # Converti oggetti Pydantic in dict per serializzazione JSON
                                    triples_dicts = [t.model_dump() for t in result.triples]
                                    all_triples.extend(triples_dicts)
                                    progress_bar.progress((i + 1) / len(chunks))
                                
                                # 4. Salvataggio Raw Triples
                                triples_file = "data/processed/triples_raw.json"
                                with open(triples_file, "w", encoding="utf-8") as f:
                                    json.dump(all_triples, f, ensure_ascii=False, indent=2)
                                
                                status.update(label="Estrazione Completata!", state="complete", expanded=False)
                                st.success(f"Estratte {len(all_triples)} triple candidate.")
                                
                            except Exception as e:
                                status.update(label="Errore Estrazione", state="error")
                                st.error(f"Errore: {e}")

            # --- STEP C: GRAPH BUILDING ---
            with c3:
                if st.button("C. Popola Neo4j", type="primary"):
                    triples_file = "data/processed/triples_raw.json"
                    if not os.path.exists(triples_file):
                        st.error("Esegui prima l'estrazione!")
                    elif not driver:
                        st.error("Connettiti al DB prima!")
                    else:
                        with st.status("Costruzione Grafo...", expanded=True) as status:
                            try:
                                # 1. Carica Raw Triples
                                with open(triples_file, "r", encoding="utf-8") as f:
                                    raw_data = json.load(f)
                                
                                # Ricostruisci oggetti GraphTriple (necessari per il Resolver)
                                triples_objs = [GraphTriple(**t) for t in raw_data]
                                
                                # 2. Entity Resolution
                                status.write("Entity Resolution (DBSCAN Clustering)...")
                                resolver = EntityResolver(similarity_threshold=0.85)
                                resolved_triples = resolver.resolve_entities(triples_objs)
                                status.write(f"Entità normalizzate. Triple da inserire: {len(resolved_triples)}")
                                
                                # 3. Persistenza Neo4j
                                status.write("Scrittura Batch su Neo4j...")
                                persister = KnowledgeGraphPersister() # Prende credenziali da env
                                persister.save_triples(resolved_triples)
                                persister.close()
                                
                                status.update(label="Grafo Aggiornato!", state="complete", expanded=False)
                                st.success("🚀 Grafo costruito con successo su AuraDB!")
                                st.balloons()
                                
                            except Exception as e:
                                status.update(label="Errore Costruzione", state="error")
                                st.error(f"Errore: {e}")
        else:
            st.write("👈 Seleziona un file per iniziare.")

# ==============================================================================
# TAB 2: VALIDAZIONE (HUMAN IN THE LOOP)
# ==============================================================================
with tab_val:
    st.header("Curation & Feedback Loop")
    st.markdown("Validazione delle triple estratte. Le relazioni rifiutate vengono usate per il fine-tuning.")
    
    if driver:
        # Query aggiornata per mostrare nodi reali
        cypher_val = """
        MATCH (s)-[r]->(o) 
        RETURN elementId(r) as id, 
               COALESCE(s.label, s.name, head(labels(s))) as Soggetto, 
               type(r) as Predicato, 
               COALESCE(o.label, o.name, head(labels(o))) as Oggetto, 
               COALESCE(r.confidence, 0.85) as Confidenza
        ORDER BY Confidenza ASC LIMIT 50
        """
        triples_data = run_query(driver, cypher_val)
        
        if triples_data:
            df = pd.DataFrame(triples_data)
            st.write(f"Relazioni candidate ({len(df)}):")
            
            event = st.dataframe(
                df.drop(columns=["id"]),
                selection_mode="single-row",
                on_select="rerun",
                use_container_width=True,
                hide_index=True
            )
            
            if len(event.selection.rows) > 0:
                idx = event.selection.rows[0]
                row = df.iloc[idx]
                
                st.divider()
                col_warn, col_btn = st.columns([3, 1])
                with col_warn:
                    st.warning(f"Rifiutare: **{row['Soggetto']}** --[{row['Predicato']}]--> **{row['Oggetto']}**?")
                with col_btn:
                    if st.button("🗑️ CONFERMA RIFIUTO", type="primary"):
                        success = reject_relationship(driver, row['id'], row['Soggetto'], row['Predicato'], row['Oggetto'])
                        if success:
                            st.success("Relazione eliminata!")
                            st.rerun()
        else:
            st.info("Nessuna relazione nel grafo.")
    else:
        st.warning("Database non connesso.")

# ==============================================================================
# TAB 3: VISUALIZZAZIONE (PYVIS)
# ==============================================================================
with tab_vis:
    st.header("Esplorazione Topologica")
    
    if driver:
        physics = st.checkbox("Abilita Fisica", value=True)
        cypher_vis = """
        MATCH (s)-[r]->(o) 
        RETURN COALESCE(s.label, s.name, head(labels(s))) as src, 
               type(r) as rel, 
               COALESCE(o.label, o.name, head(labels(o))) as dst 
        LIMIT 100
        """
        graph_data = run_query(driver, cypher_vis)
        
        if graph_data:
            net = Network(height="600px", width="100%", bgcolor="#222222", font_color="white", notebook=False)
            
            for item in graph_data:
                src = str(item['src'])
                dst = str(item['dst'])
                rel = str(item['rel'])
                
                net.add_node(src, label=src, color="#4facfe", title=src)
                net.add_node(dst, label=dst, color="#00f2fe", title=dst)
                net.add_edge(src, dst, title=rel, label=rel)
            
            net.toggle_physics(physics)
            path = "data/processed/graph_viz.html"
            os.makedirs("data/processed", exist_ok=True)
            net.save_graph(path)
            
            with open(path, 'r', encoding='utf-8') as f:
                html_string = f.read()
            components.html(html_string, height=600, scrolling=True)
        else:
            st.info("Grafo vuoto.")
    else:
        st.warning("Database non connesso.")

# Footer
st.markdown("---")
st.caption("Activa Digital | NextGenTech | Prototipo v1.0")