Spaces:

Danielfonseca1212
/

DOMINANT

Sleeping

File size: 29,750 Bytes

58e740c

# app.py — DOMINANT Graph Anomaly Detection | Sem Labels
import streamlit as st
import numpy as np
import torch
import os
from datetime import datetime

st.set_page_config(
    page_title="DOMINANT — Anomaly Detection",
    page_icon="🔬",
    layout="wide",
    initial_sidebar_state="expanded"
)

st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=IBM+Plex+Mono:wght@400;600&family=IBM+Plex+Sans:wght@300;400;600;700&display=swap');
html, body, [class*="css"] {
    font-family: 'IBM Plex Sans', sans-serif;
    background: #020408; color: #cdd9e5;
}
h1,h2,h3 { font-weight: 700; letter-spacing: -0.5px; }
code, pre { font-family: 'IBM Plex Mono', monospace !important; }

.card {
    background: #0d1117; border: 1px solid #30363d;
    border-radius: 10px; padding: 18px;
}
.metric-val { font-size: 2rem; font-weight: 700; font-family: 'IBM Plex Mono'; }
.metric-lbl { font-size: .68rem; color: #8b949e; text-transform: uppercase; letter-spacing: 2px; margin-top: 2px; }

.anomaly-row {
    display: flex; align-items: center; gap: 10px;
    padding: 8px 12px; border-radius: 8px; margin: 3px 0;
    font-family: 'IBM Plex Mono', monospace; font-size: .82rem;
}
.anomaly-high { background: #1a0505; border-left: 3px solid #f85149; }
.anomaly-med  { background: #1a0e00; border-left: 3px solid #d29922; }
.anomaly-low  { background: #051a0a; border-left: 3px solid #3fb950; }

.score-bar-bg { background: #21262d; border-radius: 3px; height: 5px; }
.stProgress > div > div { background: linear-gradient(90deg,#f85149,#ff7b72) !important; }
</style>
""", unsafe_allow_html=True)

# ── SESSION STATE ─────────────────────────────────────────────
for k, v in {
    'trainer': None, 'treinado': False, 'data': None,
    'edge_weight': None, 'metricas': None,
    'neo4j': None, 'neo4j_ok': False,
}.items():
    if k not in st.session_state:
        st.session_state[k] = v

# ── NEO4J ─────────────────────────────────────────────────────
def get_neo4j_config():
    cfg = {}
    try:
        s = st.secrets
        if 'NEO4J_URI' in s:
            cfg = {'uri': s['NEO4J_URI'], 'username': s['NEO4J_USERNAME'],
                   'password': s['NEO4J_PASSWORD'],
                   'database': s.get('NEO4J_DATABASE', 'neo4j')}
        elif 'neo4j' in s:
            n = s['neo4j']
            cfg = {'uri': n.get('uri',''), 'username': n.get('username',''),
                   'password': n.get('password',''), 'database': n.get('database','neo4j')}
    except Exception:
        pass
    if not cfg.get('uri'):
        cfg = {'uri': os.getenv('NEO4J_URI',''), 'username': os.getenv('NEO4J_USERNAME',''),
               'password': os.getenv('NEO4J_PASSWORD',''), 'database': os.getenv('NEO4J_DATABASE','neo4j')}
    return cfg

@st.cache_resource
def conectar_neo4j():
    try:
        from neo4j import GraphDatabase
        cfg = get_neo4j_config()
        if not all([cfg['uri'], cfg['username'], cfg['password']]):
            return None
        driver = GraphDatabase.driver(cfg['uri'], auth=(cfg['username'], cfg['password']))
        with driver.session(database=cfg['database']) as s:
            s.run('RETURN 1')
        return driver, cfg['database']
    except Exception:
        return None

@st.cache_resource
def carregar_libs():
    try:
        from dominant_data  import gerar_grafo_anomaly, get_adj_normalizada
        from dominant_model import TrainerDOMINANT
        return gerar_grafo_anomaly, get_adj_normalizada, TrainerDOMINANT
    except Exception as e:
        return str(e), None, None

# ── CHARTS ────────────────────────────────────────────────────
def loss_auc_svg(historico):
    loss = historico['loss']
    auc  = historico['auc']
    ep   = len(loss)
    if ep == 0: return ''

    def pts(vals, H=110):
        mn,mx = min(vals),max(vals); r = mx-mn or 1
        return ' '.join(f'{i*460/max(ep-1,1):.1f},{H-(v-mn)/r*H:.1f}'
                        for i,v in enumerate(vals))

    return f"""<div class="card" style="margin-top:10px">
    <div style="font-size:11px;color:#8b949e;margin-bottom:6px">
      <span style="color:#f85149">— Loss</span>
      <span style="color:#3fb950;margin-left:12px">— AUC (sem labels)</span>
    </div>
    <svg viewBox="0 0 470 120" style="width:100%">
      <polyline points="{pts(loss)}" fill="none" stroke="#f85149" stroke-width="2"/>
      <polyline points="{pts(auc)}"  fill="none" stroke="#3fb950" stroke-width="2"/>
      <line x1="0" y1="110" x2="460" y2="110" stroke="#21262d"/>
    </svg></div>"""


def roc_svg(y_true, scores):
    from sklearn.metrics import roc_curve, auc as sk_auc
    fpr,tpr,_ = roc_curve(y_true, scores)
    ra = sk_auc(fpr, tpr)
    pts = ' '.join(f'{f*440:.1f},{170-t*170:.1f}' for f,t in zip(fpr,tpr))
    return f"""<div class="card">
    <div style="font-size:11px;color:#8b949e;margin-bottom:4px">
      ROC-AUC <b style="color:#f85149;font-family:'IBM Plex Mono'">{ra:.4f}</b>
      <span style="color:#8b949e;font-size:.75rem;margin-left:8px">(sem nenhum label no treino)</span>
    </div>
    <svg viewBox="0 0 460 185" style="width:100%">
      <line x1="0" y1="0" x2="440" y2="170" stroke="#21262d" stroke-dasharray="4"/>
      <polyline points="{pts}" fill="none" stroke="#f85149" stroke-width="2.5"/>
      <line x1="0" y1="170" x2="440" y2="170" stroke="#30363d"/>
      <line x1="0" y1="0"   x2="0"   y2="170" stroke="#30363d"/>
      <text x="220" y="183" text-anchor="middle" fill="#4b5563" font-size="10">FPR</text>
      <text x="-85" y="85" transform="rotate(-90)" fill="#4b5563" font-size="10">TPR</text>
    </svg></div>"""


def score_dist_svg(scores, y_true, thresh):
    """Distribuição dos scores: normal vs anomalia."""
    scores_norm = scores[y_true == 0]
    scores_anom = scores[y_true == 1]

    def hist_pts(vals, bins=30, H=120, W=440, color='#3fb950'):
        if len(vals) == 0: return ''
        counts, edges = np.histogram(vals, bins=bins, range=(0,1))
        max_c = max(counts.max(), 1)
        bars = ''
        bw = W / bins
        for i, c in enumerate(counts):
            x = i * bw
            h = c / max_c * H
            bars += f'<rect x="{x:.1f}" y="{H-h:.1f}" width="{bw*.9:.1f}" height="{h:.1f}" fill="{color}" opacity=".75" rx="1"/>'
        return bars

    t_x = thresh * 440
    return f"""<div class="card">
    <div style="font-size:11px;color:#8b949e;margin-bottom:4px">
      DISTRIBUIÇÃO DO ANOMALY SCORE
      <span style="color:#3fb950;margin-left:8px">█ Normal</span>
      <span style="color:#f85149;margin-left:8px">█ Anomalia (real)</span>
    </div>
    <svg viewBox="0 0 460 140" style="width:100%">
      {hist_pts(scores_norm, color='#3fb950')}
      {hist_pts(scores_anom, color='#f85149')}
      <line x1="{t_x:.1f}" y1="0" x2="{t_x:.1f}" y2="120"
            stroke="#d29922" stroke-width="1.5" stroke-dasharray="4"/>
      <text x="{t_x+3:.1f}" y="15" fill="#d29922" font-size="9">threshold</text>
      <line x1="0" y1="120" x2="440" y2="120" stroke="#21262d"/>
    </svg></div>"""


def scatter_erros_svg(err_attr, err_struct, y_true, n_show=300):
    """Scatter erro atributo vs erro estrutura."""
    idx = np.random.choice(len(err_attr), min(n_show, len(err_attr)), replace=False)
    ea  = err_attr[idx]; es = err_struct[idx]; yt = y_true[idx]
    mn_a,mx_a = ea.min(),ea.max(); mn_s,mx_s = es.min(),es.max()

    def sc(v,mn,mx,W): return (v-mn)/(mx-mn+1e-8)*W

    circles = ''
    for a,s,y in zip(ea,es,yt):
        px = sc(a,mn_a,mx_a,400); py = 200-sc(s,mn_s,mx_s,200)
        col = '#f85149' if y==1 else '#3fb95066'
        r   = 5 if y==1 else 3
        circles += f'<circle cx="{px:.1f}" cy="{py:.1f}" r="{r}" fill="{col}"/>'

    return f"""<div class="card">
    <div style="font-size:11px;color:#8b949e;margin-bottom:4px">
      ERRO ATRIBUTO vs ERRO ESTRUTURA
      <span style="color:#3fb950;margin-left:8px">● Normal</span>
      <span style="color:#f85149;margin-left:8px">● Anomalia real</span>
    </div>
    <svg viewBox="0 0 430 220" style="width:100%;background:#070d14;border-radius:6px">
      {circles}
      <text x="200" y="215" text-anchor="middle" fill="#4b5563" font-size="10">Erro Atributo →</text>
      <text x="-110" y="10" transform="rotate(-90)" fill="#4b5563" font-size="10">Erro Estrutura →</text>
    </svg></div>"""


def tsne_svg(embeddings, y_true):
    try:
        from sklearn.manifold import TSNE
        tsne   = TSNE(n_components=2, random_state=42,
                      perplexity=min(30, len(embeddings)//3))
        coords = tsne.fit_transform(embeddings)
        cx = coords[:,0]; cy = coords[:,1]
        mn_x,mx_x = cx.min(),cx.max(); mn_y,mx_y = cy.min(),cy.max()
        def sc(v,mn,mx,W): return (v-mn)/(mx-mn+1e-8)*W

        circles = ''
        for i,(x,y) in enumerate(zip(cx,cy)):
            px = sc(x,mn_x,mx_x,440); py = sc(y,mn_y,mx_y,260)
            col = '#f85149' if y_true[i]==1 else '#3fb95055'
            r   = 6 if y_true[i]==1 else 3
            circles += f'<circle cx="{px:.1f}" cy="{py:.1f}" r="{r}" fill="{col}" opacity=".85"/>'

        return f"""<div class="card">
        <div style="font-size:11px;color:#8b949e;margin-bottom:4px">
          EMBEDDINGS t-SNE — separação aprendida SEM LABELS
          <span style="color:#3fb950;margin-left:8px">● Normal</span>
          <span style="color:#f85149;margin-left:8px">● Anomalia</span>
        </div>
        <svg viewBox="0 0 460 270" style="width:100%;background:#070d14;border-radius:6px">
          {circles}
        </svg></div>"""
    except Exception as e:
        return f'<p style="color:#4b5563">t-SNE indisponível: {e}</p>'


def top_anomalias_html(top_list, feat_names, data_x):
    html = ''
    for item in top_list:
        idx   = item['idx']
        score = item['score']
        real  = item['label_real']
        cls   = 'anomaly-high' if score > 0.7 else ('anomaly-med' if score > 0.4 else 'anomaly-low')
        badge = '<span style="color:#f85149;font-size:.75rem">⚠ REAL</span>' if real else ''

        # Top features anômalas deste nó
        feats_no = data_x[idx].numpy()
        top_feat_idx = np.argsort(np.abs(feats_no - feats_no.mean()))[::-1][:3]
        feat_str = ' · '.join(feat_names[fi] for fi in top_feat_idx)

        bar_w = int(score * 100)
        html += f"""<div class="anomaly-row {cls}">
          <span style="min-width:50px;color:#8b949e">N{idx:04d}</span>
          <span style="min-width:130px">
            <div class="score-bar-bg" style="width:130px">
              <div style="width:{bar_w}%;height:5px;border-radius:3px;
                          background:{'#f85149' if score>.7 else ('#d29922' if score>.4 else '#3fb950')}"></div>
            </div>
            <span style="font-size:.75rem;color:#8b949e">{score:.4f}</span>
          </span>
          <span style="color:#8b949e;font-size:.75rem;flex:1">{feat_str}</span>
          {badge}
        </div>"""
    return html


# ── SIDEBAR ───────────────────────────────────────────────────
def sidebar():
    st.sidebar.markdown('## 🔬 DOMINANT Config')
    n_nos    = st.sidebar.slider('Nós no grafo',   100, 1000, 500, 50)
    n_arestas= st.sidebar.slider('Arestas',        500, 8000, 2500, 500)
    n_feats  = st.sidebar.select_slider('Features por nó', [8,16,32], 16)
    taxa_an  = st.sidebar.slider('Taxa anomalia %', 2, 15, 5)
    st.sidebar.markdown('---')
    st.sidebar.markdown('### Modelo')
    hidden   = st.sidebar.select_slider('Hidden dim', [32,64,128], 64)
    embed    = st.sidebar.select_slider('Embed dim',  [16,32,64],  32)
    alpha    = st.sidebar.slider('α (struct vs attr)', 0.0, 1.0, 0.5, 0.05)
    lr       = st.sidebar.select_slider('LR', [0.001,0.003,0.005,0.01], 0.005)
    epocas   = st.sidebar.slider('Épocas', 20, 200, 100, 10)
    dropout  = st.sidebar.slider('Dropout', 0.1, 0.5, 0.3, 0.05)
    st.sidebar.markdown('---')
    st.sidebar.markdown(f'**α = {alpha:.2f}**')
    st.sidebar.caption(f'α→1: mais peso na estrutura\nα→0: mais peso nos atributos')
    if st.session_state.neo4j_ok:
        st.sidebar.success('🗄️ Neo4j Conectado')
    else:
        st.sidebar.warning('⚠️ Neo4j Offline')
    return dict(n_nos=n_nos, n_arestas=n_arestas, n_feats=n_feats,
                taxa_an=taxa_an/100, hidden=hidden, embed=embed,
                alpha=alpha, lr=lr, epocas=epocas, dropout=dropout)


# ── MAIN ──────────────────────────────────────────────────────
def main():
    if st.session_state.neo4j is None:
        conn = conectar_neo4j()
        st.session_state.neo4j    = conn
        st.session_state.neo4j_ok = conn is not None

    cfg = sidebar()

    st.markdown("""
    <div style="margin-bottom:28px">
      <h1 style="font-size:2.2rem;margin:0;
                 background:linear-gradient(90deg,#f85149,#d29922,#3fb950);
                 -webkit-background-clip:text;-webkit-text-fill-color:transparent">
        DOMINANT
      </h1>
      <p style="color:#8b949e;margin:2px 0 0 2px;font-size:.9rem">
        Deep Anomaly Detection on Attributed Networks · Ding et al., IJCAI 2019 · 
        <b style="color:#f85149">Zero labels no treino</b>
      </p>
    </div>
    """, unsafe_allow_html=True)

    tabs = st.tabs(['📐 Teoria', '🏗️ Grafo', '🧠 Treinar',
                    '📊 Performance', '🔍 Top Anomalias', '🗄️ Neo4j'])

    # ── TAB 0: TEORIA ─────────────────────────────────────────
    with tabs[0]:
        c1, c2 = st.columns(2)
        with c1:
            st.markdown('### Por que sem labels?')
            st.markdown("""
Em produção, fraude real **não tem label imediato**.
O banco só descobre que foi fraude dias ou semanas depois,
quando o cliente contesta. O DOMINANT resolve isso:

> *"Aprendo o que é normal. Quem desvia é anômalo."*

**Intuição:**
- Treina um autoencoder no grafo inteiro
- Nós normais são **reconstruídos bem**
- Anomalias têm **alto erro de reconstrução**
- Anomaly score = erro de reconstrução
            """)

            st.markdown('### Dois tipos de anomalia detectados')
            for tipo, desc, cor in [
                ('Atributo', 'Features fora da distribuição normal (saldo extremo, chargeback alto)', '#f85149'),
                ('Estrutural', 'Padrão de conexões anômalo (hub suspeito, isolado)', '#d29922'),
                ('Combinada', 'Ambos — o caso mais típico de fraude real', '#a78bfa'),
            ]:
                st.markdown(
                    f'<div class="card" style="margin:6px 0;border-left:3px solid {cor}">'
                    f'<b style="color:{cor}">{tipo}</b><br>'
                    f'<span style="font-size:.85rem;color:#8b949e">{desc}</span></div>',
                    unsafe_allow_html=True)

        with c2:
            st.markdown('### Arquitetura DOMINANT')
            st.markdown("""
```
Grafo G = (A, X)
    A: matriz de adjacência
    X: features dos nós
         ↓
   GCN Encoder
     gc1: X → H (hidden)  
     gc2: H → Z (embedding)
         ↓
    ┌────┴────┐
    ▼         ▼
Attr Dec   Struct Dec
GCN → X̂   GCN → Z'
           Z' @ Z'ᵀ → Â
    ↓         ↓
 L_attr    L_struct
‖X - X̂‖²  BCE(A, Â)
    └────┬────┘
         ↓
  Loss = α·L_s + (1-α)·L_a

  Anomaly Score(v) =
    α·err_struct(v) + (1-α)·err_attr(v)
```
            """)
            st.markdown(f'**α = {cfg["alpha"]:.2f}** controla o balanço estrutura/atributo')

    # ── TAB 1: GRAFO ──────────────────────────────────────────
    with tabs[1]:
        res = carregar_libs()
        if isinstance(res[0], str):
            st.error(f'Erro de importação: {res[0]}')
            st.stop()
        gerar_grafo, get_adj_norm, TrainerDOMINANT = res

        c1, c2 = st.columns([1, 2])
        with c1:
            st.markdown(f"""
**Configuração atual:**
- `{cfg["n_nos"]}` nós (contas bancárias)
- `{cfg["n_arestas"]}` arestas (transações)
- `{cfg["n_feats"]}` features por nó
- `{cfg["taxa_an"]:.0%}` anomalias embutidas
- Labels **ocultos** do modelo
            """)
            if st.button('🔄 Gerar Grafo', type='primary', use_container_width=True):
                with st.spinner('Gerando...'):
                    data, feat_names = gerar_grafo(
                        n_nos=cfg['n_nos'],
                        n_arestas=cfg['n_arestas'],
                        n_features=cfg['n_feats'],
                        taxa_anomalia=cfg['taxa_an'],
                    )
                    ei, ew, _ = get_adj_norm(data.edge_index, data.x.shape[0])
                    data.edge_index = ei
                    st.session_state.data        = data
                    st.session_state.edge_weight = ew
                    st.session_state.feat_names  = feat_names
                    st.session_state.treinado    = False
                    st.session_state.trainer     = None
                    st.session_state.metricas    = None
                st.success('✅ Grafo gerado!')

        with c2:
            if st.session_state.data is not None:
                data = st.session_state.data
                n_anom = int(data.y.sum())
                m1,m2,m3,m4 = st.columns(4)
                for col, v, l in [
                    (m1, data.x.shape[0], 'Nós'),
                    (m2, data.edge_index.shape[1], 'Arestas'),
                    (m3, n_anom, '🚨 Anomalias'),
                    (m4, f'{n_anom/data.x.shape[0]:.1%}', 'Taxa'),
                ]:
                    col.markdown(
                        f'<div class="card" style="text-align:center">'
                        f'<div class="metric-val" style="color:#f85149">{v}</div>'
                        f'<div class="metric-lbl">{l}</div></div>',
                        unsafe_allow_html=True)

                st.markdown('<br>', unsafe_allow_html=True)

                # Visualização do grafo (amostra)
                try:
                    from pyvis.network import Network
                    n_show = min(80, data.x.shape[0])
                    idx_show = np.random.choice(data.x.shape[0], n_show, replace=False)
                    idx_map  = {int(v): i for i, v in enumerate(idx_show)}
                    net = Network(height='400px', width='100%',
                                  bgcolor='#0d1117', font_color='#cdd9e5', directed=False)
                    net.set_options('{"physics":{"stabilization":{"iterations":80}},'
                                    '"edges":{"color":{"inherit":false},"width":0.8}}')
                    for i in idx_show:
                        is_anom = bool(data.y[i])
                        net.add_node(int(i),
                                     color='#f85149' if is_anom else '#3fb950',
                                     size=12 if is_anom else 7,
                                     title=f'Nó {i} | {"⚠ Anomalia" if is_anom else "Normal"}')
                    ei = data.edge_index
                    shown = 0
                    for j in range(ei.shape[1]):
                        s,d = int(ei[0,j]), int(ei[1,j])
                        if s in idx_map and d in idx_map and shown < 200:
                            net.add_edge(s, d, color='#30363d')
                            shown += 1
                    st.components.v1.html(net.generate_html(), height=420)
                    st.caption('🔴 Anomalia (label real) · 🟢 Normal · apenas amostra')
                except ImportError:
                    st.info('pyvis não instalado — adicione ao requirements.txt para ver o grafo interativo.')
            else:
                st.info('Clique em **Gerar Grafo**.')

    # ── TAB 2: TREINAR ────────────────────────────────────────
    with tabs[2]:
        _, _, TrainerDOMINANT = carregar_libs()
        if st.session_state.data is None:
            st.warning('⬅️ Gere o grafo primeiro.')
        else:
            c1, c2 = st.columns([1, 2])
            with c1:
                st.markdown(f"""
**Parâmetros do modelo:**
- Hidden: `{cfg["hidden"]}` · Embed: `{cfg["embed"]}`
- α: `{cfg["alpha"]}` · LR: `{cfg["lr"]}`
- Dropout: `{cfg["dropout"]}` · Épocas: `{cfg["epocas"]}`

**⚠️ Importante:** O modelo não vê nenhum label durante o treino.
A AUC é calculada apenas para avaliação do paper.
                """)
                if st.button('🚀 Treinar DOMINANT', type='primary', use_container_width=True):
                    st.session_state.trainer = TrainerDOMINANT(
                        st.session_state.data,
                        st.session_state.edge_weight,
                        hidden_dim=cfg['hidden'],
                        embed_dim=cfg['embed'],
                        alpha=cfg['alpha'],
                        lr=cfg['lr'],
                        dropout=cfg['dropout'],
                    )
                    prog   = st.progress(0)
                    status = st.empty()
                    logs   = []
                    log_box = c2.empty()

                    def cb(ep, total, loss, auc):
                        prog.progress(ep/total)
                        status.markdown(
                            f'**Época {ep}/{total}** · Loss `{loss:.4f}` · AUC `{auc:.3f}`')
                        if ep % 10 == 0 or ep == total:
                            logs.append(f'[{ep:>3}] loss={loss:.4f}  auc={auc:.3f}')
                            log_box.code('\n'.join(logs[-15:]))

                    with st.spinner('Treinando sem labels...'):
                        st.session_state.trainer.treinar(cfg['epocas'], cb)
                    st.session_state.treinado = True
                    st.session_state.metricas = \
                        st.session_state.trainer.metricas_completas()
                    st.success(
                        f'✅ Melhor AUC: {st.session_state.trainer.melhor_auc:.4f}')

            with c2:
                if st.session_state.treinado:
                    st.components.v1.html(
                        loss_auc_svg(st.session_state.trainer.historico), height=200)

    # ── TAB 3: PERFORMANCE ────────────────────────────────────
    with tabs[3]:
        if not st.session_state.treinado or st.session_state.metricas is None:
            st.warning('⬅️ Treine o modelo primeiro.')
        else:
            m = st.session_state.metricas
            cols = st.columns(5)
            for col, (nome, val, cor) in zip(cols, [
                ('ROC-AUC',   f"{m['auc']:.4f}",   '#f85149'),
                ('Avg Prec',  f"{m['ap']:.4f}",    '#d29922'),
                ('F1',        f"{m['f1']:.4f}",    '#3fb950'),
                ('Precision', f"{m['precision']:.4f}", '#58a6ff'),
                ('Recall',    f"{m['recall']:.4f}",  '#a78bfa'),
            ]):
                col.markdown(
                    f'<div class="card" style="text-align:center">'
                    f'<div class="metric-val" style="color:{cor}">{val}</div>'
                    f'<div class="metric-lbl">{nome}</div></div>',
                    unsafe_allow_html=True)

            st.markdown('<br>', unsafe_allow_html=True)
            c1, c2 = st.columns(2)
            with c1:
                st.components.v1.html(
                    roc_svg(m['y_true'], m['scores']), height=230)
                st.components.v1.html(
                    score_dist_svg(m['scores'], m['y_true'], m['thresh']), height=200)
            with c2:
                st.components.v1.html(
                    scatter_erros_svg(m['err_attr'], m['err_struct'], m['y_true']),
                    height=260)
                st.components.v1.html(
                    tsne_svg(m['embeddings'], m['y_true']), height=310)

    # ── TAB 4: TOP ANOMALIAS ──────────────────────────────────
    with tabs[4]:
        if not st.session_state.treinado:
            st.warning('⬅️ Treine o modelo primeiro.')
        else:
            st.markdown('### Nós mais anômalos detectados')
            st.markdown('Rankeados por anomaly score — sem nenhum label no treino.')

            n_top = st.slider('Top N', 10, 50, 20)
            top   = st.session_state.trainer.get_top_anomalias(n_top)
            feat_names = st.session_state.get('feat_names', [f'f{i}' for i in range(16)])
            data_x     = st.session_state.data.x

            n_detectados = sum(1 for t in top if t['label_real'] == 1)
            n_real_total = int(st.session_state.data.y.sum())
            c1,c2,c3 = st.columns(3)
            c1.metric('Top anomalias analisadas', n_top)
            c2.metric('Anomalias reais detectadas', n_detectados)
            c3.metric(f'Precision@{n_top}', f'{n_detectados/n_top:.1%}')

            st.markdown('<br>', unsafe_allow_html=True)
            st.markdown(
                f'<div style="font-family:\'IBM Plex Mono\',monospace">'
                f'{top_anomalias_html(top, feat_names, data_x)}</div>',
                unsafe_allow_html=True)

            # Detalhe de um nó específico
            with st.expander('🔍 Inspecionar nó específico'):
                idx_insp = st.number_input('Índice do nó', 0,
                                           int(data_x.shape[0])-1, int(top[0]['idx']))
                m = st.session_state.metricas
                score_no = float(m['scores'][idx_insp])
                label_no = int(st.session_state.data.y[idx_insp])
                feats_no = data_x[idx_insp].numpy()
                st.markdown(f"""
**Nó {idx_insp}** · Score: `{score_no:.4f}` · 
Err Atributo: `{m['err_attr'][idx_insp]:.4f}` · 
Err Estrutura: `{m['err_struct'][idx_insp]:.4f}` · 
Label real: `{'⚠ Anomalia' if label_no else '✅ Normal'}`
                """)
                # Features do nó como barras
                bars_html = '<div style="font-family:\'IBM Plex Mono\',monospace;font-size:.78rem">'
                for fi, fn in enumerate(feat_names):
                    v   = float(feats_no[fi])
                    pct = min(abs(v)*100, 100)
                    cor = '#f85149' if v > 0.7 else ('#d29922' if v > 0.4 else '#3fb950')
                    bars_html += (
                        f'<div style="display:flex;align-items:center;gap:8px;margin:2px 0">'
                        f'<span style="color:#8b949e;width:200px;font-size:.75rem">{fn}</span>'
                        f'<div style="flex:1;background:#21262d;border-radius:3px;height:8px">'
                        f'<div style="width:{pct:.0f}%;height:8px;background:{cor};border-radius:3px"></div></div>'
                        f'<span style="color:{cor};width:45px;text-align:right">{v:.3f}</span>'
                        f'</div>')
                bars_html += '</div>'
                st.markdown(bars_html, unsafe_allow_html=True)

    # ── TAB 5: NEO4J ─────────────────────────────────────────
    with tabs[5]:
        st.header('🗄️ Neo4j')
        if not st.session_state.neo4j_ok:
            st.warning('Neo4j offline.')
            with st.expander('Como configurar'):
                st.markdown("""
**HF Spaces → Settings → Variables and secrets:**

| Chave | Valor |
|---|---|
| `NEO4J_URI` | `neo4j+s://XXXXXXXX.databases.neo4j.io` |
| `NEO4J_USERNAME` | `neo4j` |
| `NEO4J_PASSWORD` | `sua_senha` |
| `NEO4J_DATABASE` | `neo4j` |
                """)
        else:
            st.success('Conectado!')
            if st.session_state.treinado and st.button('💾 Salvar anomalias no Neo4j'):
                driver, db = st.session_state.neo4j
                top = st.session_state.trainer.get_top_anomalias(50)
                m   = st.session_state.metricas
                try:
                    with driver.session(database=db) as s:
                        s.run("""
                            MERGE (r:DOMINANTRun {ts: $ts})
                            SET r.auc=$auc, r.ap=$ap, r.alpha=$alpha,
                                r.n_nos=$n, r.n_anomalias=$na
                        """, ts=datetime.now().isoformat(),
                             auc=float(m['auc']), ap=float(m['ap']),
                             alpha=cfg['alpha'],
                             n=int(st.session_state.data.x.shape[0]),
                             na=int(st.session_state.data.y.sum()))
                        for item in top[:20]:
                            s.run("""
                                MERGE (n:AnomaliaNode {idx: $idx})
                                SET n.score=$score, n.label=$label
                            """, idx=item['idx'], score=item['score'],
                                 label=item['label_real'])
                    st.success(f'✅ Run + {min(20,len(top))} anomalias salvas!')
                except Exception as e:
                    st.error(str(e))


if __name__ == '__main__':
    main()