Spaces:

Danielfonseca1212
/

RelGNNDeepRelationalLearning

Sleeping

App Files Files Community

Danielfonseca1212 commited on Mar 4

Commit

0b037bd

verified ·

1 Parent(s): 2b0de6d

Create app.py

Browse files

Files changed (1) hide show

app.py +288 -0

app.py ADDED Viewed

	@@ -0,0 +1,288 @@

+"""
+RelGNN — Deep Relational Learning para Detecção de Fraude
+Projeto 8: Do SQL ao Graph AI sem Engenharia Manual
+Hugging Face Spaces — Gradio Interface
+"""
+import gradio as gr
+import pandas as pd
+import numpy as np
+import matplotlib.pyplot as plt
+import matplotlib.patches as mpatches
+from matplotlib.gridspec import GridSpec
+import warnings
+warnings.filterwarnings("ignore")
+from data.tpch_generator import generate_tpch_data
+from data.routes import discover_atomic_routes, RouteConfig
+from relgnn.model import RelGNN, RelGNNConfig
+from relgnn.trainer import Trainer
+from baseline.graphsage_baseline import GraphSAGEBaseline
+from baseline.xgboost_baseline import XGBoostBaseline
+# ─── GLOBALS ──────────────────────────────────────────────────────────────────
+RESULTS_CACHE = {}
+# ─── CORE PIPELINE ────────────────────────────────────────────────────────────
+def run_full_pipeline(n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops, progress=gr.Progress()):
+    """Full pipeline: gera dados → treina RelGNN → compara baselines → retorna resultados."""
+    logs = []
+    def log(msg):
+        logs.append(msg)
+        return "\n".join(logs)
+    progress(0.05, desc="Gerando dataset TPC-H sintético...")
+    tables = generate_tpch_data(
+        n_customers=int(n_customers),
+        n_orders=int(n_orders),
+        fraud_rate=float(fraud_rate) / 100.0,
+        seed=42
+    )
+    fraud_count = tables["orders"]["is_fraud"].sum()
+    log(f"✅ Dataset gerado: {int(n_customers)} clientes, {int(n_orders)} pedidos, {fraud_count} fraudes ({fraud_rate:.1f}%)")
+    progress(0.15, desc="Descobrindo rotas atômicas...")
+    route_config = RouteConfig(max_hops=int(max_hops))
+    routes = discover_atomic_routes(tables, route_config)
+    log(f"✅ {len(routes)} rotas atômicas descobertas (max {max_hops} hops)")
+    for r in routes:
+        log(f"   → {' → '.join(r.path)}  (hop={r.n_hops})")
+    progress(0.30, desc="Treinando RelGNN...")
+    config = RelGNNConfig(
+        hidden_dim=int(hidden_dim),
+        num_epochs=int(num_epochs),
+        learning_rate=1e-3,
+        dropout=0.2,
+    )
+    relgnn = RelGNN(config)
+    relgnn_metrics, relgnn_history = relgnn.fit(tables, routes, log_fn=log, progress_fn=progress)
+    log(f"✅ RelGNN — AUC: {relgnn_metrics['auc']:.4f}  F1: {relgnn_metrics['f1']:.4f}  Tempo: {relgnn_metrics['train_time']:.1f}s")
+    progress(0.70, desc="Treinando GraphSAGE baseline...")
+    graphsage = GraphSAGEBaseline(hidden_dim=int(hidden_dim), num_epochs=int(num_epochs))
+    gs_metrics, gs_history = graphsage.fit(tables, log_fn=log)
+    log(f"✅ GraphSAGE — AUC: {gs_metrics['auc']:.4f}  F1: {gs_metrics['f1']:.4f}  Tempo: {gs_metrics['train_time']:.1f}s")
+    progress(0.85, desc="Treinando XGBoost baseline...")
+    xgb = XGBoostBaseline()
+    xgb_metrics = xgb.fit(tables, log_fn=log)
+    log(f"✅ XGBoost  — AUC: {xgb_metrics['auc']:.4f}  F1: {xgb_metrics['f1']:.4f}  Tempo: {xgb_metrics['train_time']:.1f}s")
+    progress(0.93, desc="Gerando visualizações...")
+    fig = plot_results(relgnn_metrics, gs_metrics, xgb_metrics, relgnn_history, gs_history, routes)
+    metrics_df = pd.DataFrame([
+        {"Modelo": "🔷 RelGNN (Rotas Atômicas)", **relgnn_metrics},
+        {"Modelo": "🟣 GraphSAGE (Grafo Estático)", **gs_metrics},
+        {"Modelo": "🟡 XGBoost (Flat Features)", **xgb_metrics},
+    ]).rename(columns={"auc": "AUC-ROC", "f1": "F1-Score",
+                        "precision": "Precisão", "recall": "Recall",
+                        "train_time": "Tempo (s)"})
+    metrics_df = metrics_df.round(4)
+    routes_df = pd.DataFrame([{
+        "Rota": " → ".join(r.path),
+        "Hops": r.n_hops,
+        "Peso α": f"{r.attention_weight:.3f}",
+        "Ativa": "✅" if r.active else "—",
+    } for r in routes])
+    delta_auc  = (relgnn_metrics["auc"] - gs_metrics["auc"]) * 100
+    delta_f1   = (relgnn_metrics["f1"]  - gs_metrics["f1"])  * 100
+    delta_time = (1 - relgnn_metrics["train_time"] / gs_metrics["train_time"]) * 100
+    summary = (
+        f"## 🎯 Resultado Final\n\n"
+        f"| Métrica | RelGNN | GraphSAGE | Δ |\n"
+        f"|---------|--------|-----------|---|\n"
+        f"| AUC-ROC | **{relgnn_metrics['auc']:.4f}** | {gs_metrics['auc']:.4f} | **+{delta_auc:.1f}%** |\n"
+        f"| F1-Score | **{relgnn_metrics['f1']:.4f}** | {gs_metrics['f1']:.4f} | **+{delta_f1:.1f}%** |\n"
+        f"| Tempo Treino | **{relgnn_metrics['train_time']:.1f}s** | {gs_metrics['train_time']:.1f}s | **−{delta_time:.0f}%** |\n\n"
+        f"✅ RelGNN é **+{delta_auc:.1f}% mais preciso** e **{delta_time:.0f}% mais rápido** que GraphSAGE.\n"
+        f"🔑 **{len(routes)} rotas atômicas** aprendidas automaticamente das FKs do schema SQL.\n"
+        f"🚀 **Zero engenharia manual** — sem conversão explícita para grafo."
+    )
+    RESULTS_CACHE["last"] = {
+        "relgnn": relgnn_metrics,
+        "graphsage": gs_metrics,
+        "xgboost": xgb_metrics,
+    }
+    progress(1.0, desc="Concluído!")
+    log("─" * 60)
+    log("🏁 Pipeline completo!")
+    return fig, metrics_df, routes_df, summary, "\n".join(logs)
+def plot_results(rm, gm, xm, rh, gh, routes):
+    plt.style.use("dark_background")
+    fig = plt.figure(figsize=(14, 9), facecolor="#0a0e1a")
+    gs = GridSpec(2, 3, figure=fig, hspace=0.45, wspace=0.35)
+    CYAN   = "#00d4ff"
+    PURPLE = "#7c3aed"
+    AMBER  = "#f59e0b"
+    GREEN  = "#10b981"
+    PANEL  = "#0f1629"
+    ax_curve = fig.add_subplot(gs[0, :2])
+    ax_bar   = fig.add_subplot(gs[0, 2])
+    ax_route = fig.add_subplot(gs[1, 0])
+    ax_time  = fig.add_subplot(gs[1, 1])
+    ax_delta = fig.add_subplot(gs[1, 2])
+    for ax in [ax_curve, ax_bar, ax_route, ax_time, ax_delta]:
+        ax.set_facecolor(PANEL)
+        for spine in ax.spines.values():
+            spine.set_color("#1e2d4a")
+    # 1. Training curves
+    epochs_r = [h["epoch"] for h in rh]
+    auc_r    = [h["auc"]   for h in rh]
+    epochs_g = [h["epoch"] for h in gh]
+    auc_g    = [h["auc"]   for h in gh]
+    ax_curve.plot(epochs_r, auc_r, color=CYAN,   lw=2.5, label="RelGNN", zorder=3)
+    ax_curve.plot(epochs_g, auc_g, color=PURPLE, lw=2,   label="GraphSAGE", linestyle="--", zorder=2)
+    ax_curve.fill_between(epochs_r, auc_r, alpha=0.12, color=CYAN)
+    ax_curve.set_title("Curva de Convergência (AUC-ROC)", color="white", fontsize=11, pad=8)
+    ax_curve.set_xlabel("Época", color="#64748b", fontsize=9)
+    ax_curve.set_ylabel("AUC-ROC", color="#64748b", fontsize=9)
+    ax_curve.tick_params(colors="#64748b", labelsize=8)
+    ax_curve.legend(facecolor="#141c33", edgecolor="#1e2d4a", labelcolor="white", fontsize=9)
+    ax_curve.grid(color="#1e2d4a", alpha=0.5, linewidth=0.5)
+    ax_curve.set_ylim(0.5, 1.0)
+    # 2. Bar comparison
+    metrics   = ["AUC", "F1", "Prec", "Rec"]
+    relgnn_v  = [rm["auc"], rm["f1"], rm["precision"], rm["recall"]]
+    graph_v   = [gm["auc"], gm["f1"], gm["precision"], gm["recall"]]
+    xgb_v     = [xm["auc"], xm["f1"], xm["precision"], xm["recall"]]
+    x = np.arange(len(metrics))
+    w = 0.25
+    ax_bar.bar(x - w, relgnn_v, w, color=CYAN,   alpha=0.85, label="RelGNN")
+    ax_bar.bar(x,     graph_v,  w, color=PURPLE, alpha=0.85, label="GraphSAGE")
+    ax_bar.bar(x + w, xgb_v,   w, color=AMBER,  alpha=0.85, label="XGBoost")
+    ax_bar.set_title("Métricas Comparativas", color="white", fontsize=11, pad=8)
+    ax_bar.set_xticks(x)
+    ax_bar.set_xticklabels(metrics, color="#64748b", fontsize=8)
+    ax_bar.set_ylim(0.5, 1.05)
+    ax_bar.tick_params(colors="#64748b", labelsize=8)
+    ax_bar.legend(facecolor="#141c33", edgecolor="#1e2d4a", labelcolor="white", fontsize=7)
+    ax_bar.grid(axis="y", color="#1e2d4a", alpha=0.5, linewidth=0.5)
+    # 3. Atomic routes weights
+    route_labels = [" → ".join(r.path[-2:]) if len(r.path) > 2 else " → ".join(r.path)
+                    for r in routes]
+    route_weights = [r.attention_weight for r in routes]
+    colors_r = [GREEN if r.active else "#334155" for r in routes]
+    bars = ax_route.barh(route_labels, route_weights, color=colors_r, alpha=0.85)
+    ax_route.set_title("Pesos de Atenção (α)\nRotas Atômicas", color="white", fontsize=10, pad=8)
+    ax_route.set_xlim(0, 1)
+    ax_route.tick_params(colors="#64748b", labelsize=7)
+    ax_route.grid(axis="x", color="#1e2d4a", alpha=0.5, linewidth=0.5)
+    for bar, w_ in zip(bars, route_weights):
+        ax_route.text(w_ + 0.02, bar.get_y() + bar.get_height()/2,
+                      f"{w_:.2f}", va="center", color="white", fontsize=8)
+    # 4. Training time
+    models_t = ["RelGNN", "GraphSAGE", "XGBoost"]
+    times    = [rm["train_time"], gm["train_time"], xm["train_time"]]
+    cols_t   = [CYAN, PURPLE, AMBER]
+    ax_time.bar(models_t, times, color=cols_t, alpha=0.85, width=0.5)
+    ax_time.set_title("Tempo de Treino (s)", color="white", fontsize=11, pad=8)
+    ax_time.tick_params(colors="#64748b", labelsize=8)
+    ax_time.grid(axis="y", color="#1e2d4a", alpha=0.5, linewidth=0.5)
+    for i, (t, c) in enumerate(zip(times, cols_t)):
+        ax_time.text(i, t + 0.5, f"{t:.1f}s", ha="center", color=c, fontsize=9, fontweight="bold")
+    # 5. Delta vs GraphSAGE
+    delta_metrics = ["AUC", "F1", "Precisão", "Recall"]
+    deltas = [
+        (rm["auc"]       - gm["auc"])       * 100,
+        (rm["f1"]        - gm["f1"])        * 100,
+        (rm["precision"] - gm["precision"]) * 100,
+        (rm["recall"]    - gm["recall"])    * 100,
+    ]
+    colors_d = [GREEN if d > 0 else "#ef4444" for d in deltas]
+    ax_delta.bar(delta_metrics, deltas, color=colors_d, alpha=0.85, width=0.5)
+    ax_delta.axhline(0, color="#64748b", linewidth=0.8)
+    ax_delta.set_title("RelGNN vs GraphSAGE\n(Δ pontos percentuais)", color="white", fontsize=10, pad=8)
+    ax_delta.tick_params(colors="#64748b", labelsize=8)
+    ax_delta.grid(axis="y", color="#1e2d4a", alpha=0.5, linewidth=0.5)
+    for i, (d, c) in enumerate(zip(deltas, colors_d)):
+        ax_delta.text(i, d + 0.1 if d >= 0 else d - 0.3,
+                      f"+{d:.1f}%" if d >= 0 else f"{d:.1f}%",
+                      ha="center", color=c, fontsize=9, fontweight="bold")
+    fig.suptitle("RelGNN — Deep Relational Learning · TPC-H Fraud Detection",
+                 color="white", fontsize=13, fontweight="bold", y=1.01)
+    return fig
+# ─── GRADIO UI ────────────────────────────────────────────────────────────────
+CSS = """
+.gradio-container { background: #0a0e1a !important; }
+.gr-button-primary { background: linear-gradient(135deg, #00d4ff, #7c3aed) !important; border: none !important; }
+footer { display: none !important; }
+"""
+with gr.Blocks(css=CSS, title="RelGNN — Deep Relational Learning") as demo:
+    gr.Markdown("""
+    # ⬡ RelGNN — Deep Relational Learning
+    ### Do SQL ao Graph AI sem Engenharia Manual · TPC-H Fraud Detection
+    **Projeto 8** | Compare RelGNN (Rotas Atômicas) vs GraphSAGE vs XGBoost
+    """)
+    with gr.Row():
+        with gr.Column(scale=1):
+            gr.Markdown("### ⚙️ Configuração")
+            n_customers = gr.Slider(100, 2000, value=500, step=100, label="Nº de Clientes")
+            n_orders    = gr.Slider(500, 10000, value=2000, step=500, label="Nº de Pedidos")
+            fraud_rate  = gr.Slider(1, 20, value=5, step=1, label="Taxa de Fraude (%)")
+            gr.Markdown("### 🧠 Hiperparâmetros")
+            hidden_dim  = gr.Slider(16, 128, value=64, step=16, label="Hidden Dim")
+            num_epochs  = gr.Slider(10, 100, value=50, step=10, label="Épocas")
+            max_hops    = gr.Slider(1, 4, value=3, step=1, label="Max Hops (Rotas Atômicas)")
+            btn = gr.Button("🚀 Rodar Pipeline Completo", variant="primary")
+        with gr.Column(scale=3):
+            with gr.Tabs():
+                with gr.Tab("📊 Visualizações"):
+                    plot_out = gr.Plot(label="Resultados")
+                with gr.Tab("📋 Métricas"):
+                    metrics_out = gr.Dataframe(label="Comparação de Modelos")
+                    routes_out  = gr.Dataframe(label="Rotas Atômicas Descobertas")
+                with gr.Tab("📝 Resumo"):
+                    summary_out = gr.Markdown()
+                with gr.Tab("🔧 Log"):
+                    log_out = gr.Textbox(label="Log de Execução", lines=20, max_lines=30)
+    btn.click(
+        fn=run_full_pipeline,
+        inputs=[n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops],
+        outputs=[plot_out, metrics_out, routes_out, summary_out, log_out],
+    )
+    gr.Markdown("""
+    ---
+    **Referências:** [RelBench](https://relbench.stanford.edu/) · [TPC-H Benchmark](https://www.tpc.org/tpch/) · [GraphSAGE](https://arxiv.org/abs/1706.02216)
+    """)
+if __name__ == "__main__":
+    demo.launch()