Danielfonseca1212 commited on
Commit
073d02a
Β·
verified Β·
1 Parent(s): 47a3ee9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +476 -162
app.py CHANGED
@@ -1,183 +1,503 @@
1
  """
2
- RelGNN β€” Deep Relational Learning
3
- Projeto 8: Do SQL ao Graph AI sem Engenharia Manual
 
4
  """
5
 
6
- # ── Auto-install (garante dependΓͺncias no HF Spaces) ──────────────────────────
7
  import subprocess, sys, os
8
- sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
9
 
10
- def install(pkg):
11
- subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])
 
12
 
13
- try:
14
- import plotly
15
- except ImportError:
16
- install("plotly")
 
17
 
18
- try:
19
- import sklearn
20
- except ImportError:
21
- install("scikit-learn")
22
-
23
- # ── Imports principais ─────────────────────────────────────────────────────────
24
  import gradio as gr
25
  import pandas as pd
26
  import numpy as np
27
  import plotly.graph_objects as go
28
  from plotly.subplots import make_subplots
29
- import warnings
 
 
 
 
 
 
 
 
 
 
30
  warnings.filterwarnings("ignore")
31
 
32
- from data.tpch_generator import generate_tpch_data
33
- from data.routes import discover_atomic_routes, RouteConfig
34
- from relgnn.model import RelGNN, RelGNNConfig
35
- from baseline.graphsage_baseline import GraphSAGEBaseline
36
- from baseline.xgboost_baseline import XGBoostBaseline
37
-
38
-
39
- def run_full_pipeline(n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops, progress=gr.Progress()):
40
- logs = []
41
- def log(msg): logs.append(str(msg))
42
-
43
- progress(0.05, desc="Gerando dataset TPC-H...")
44
- tables = generate_tpch_data(int(n_customers), int(n_orders), float(fraud_rate)/100.0, seed=42)
45
- log(f"βœ… {int(n_customers)} clientes, {int(n_orders)} pedidos, {tables['orders']['is_fraud'].sum()} fraudes")
46
-
47
- progress(0.15, desc="Descobrindo rotas atΓ΄micas...")
48
- routes = discover_atomic_routes(tables, RouteConfig(max_hops=int(max_hops)))
49
- log(f"βœ… {len(routes)} rotas atΓ΄micas descobertas")
50
- for r in routes:
51
- log(f" β†’ {' β†’ '.join(r.path)} (hops={r.n_hops}, Ξ±={r.attention_weight:.3f})")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  progress(0.30, desc="Treinando RelGNN...")
54
- relgnn = RelGNN(RelGNNConfig(hidden_dim=int(hidden_dim), num_epochs=int(num_epochs)))
55
- rm, rh = relgnn.fit(tables, routes, log_fn=log, progress_fn=progress)
56
- log(f"βœ… RelGNN AUC={rm['auc']:.4f} F1={rm['f1']:.4f} Tempo={rm['train_time']:.1f}s")
57
 
58
  progress(0.70, desc="Treinando GraphSAGE...")
59
- gs = GraphSAGEBaseline(hidden_dim=int(hidden_dim), num_epochs=int(num_epochs))
60
- gm, gh = gs.fit(tables, log_fn=log)
61
- log(f"βœ… GraphSAGE AUC={gm['auc']:.4f} F1={gm['f1']:.4f} Tempo={gm['train_time']:.1f}s")
62
 
63
- progress(0.85, desc="Treinando XGBoost...")
64
- xm = XGBoostBaseline().fit(tables, log_fn=log)
65
- log(f"βœ… XGBoost AUC={xm['auc']:.4f} F1={xm['f1']:.4f} Tempo={xm['train_time']:.1f}s")
66
 
67
- progress(0.93, desc="Plotando resultados...")
68
- fig = build_figure(rm, gm, xm, rh, gh, routes)
69
 
70
  metrics_df = pd.DataFrame([
71
- {"Modelo": "πŸ”· RelGNN", "AUC": rm["auc"], "F1": rm["f1"], "PrecisΓ£o": rm["precision"], "Recall": rm["recall"], "Tempo(s)": rm["train_time"]},
72
- {"Modelo": "🟣 GraphSAGE", "AUC": gm["auc"], "F1": gm["f1"], "Precisão": gm["precision"], "Recall": gm["recall"], "Tempo(s)": gm["train_time"]},
73
- {"Modelo": "🟑 XGBoost", "AUC": xm["auc"], "F1": xm["f1"], "Precisão": xm["precision"], "Recall": xm["recall"], "Tempo(s)": xm["train_time"]},
74
  ]).round(4)
75
 
76
- routes_df = pd.DataFrame([{
77
- "Rota": " β†’ ".join(r.path), "Hops": r.n_hops,
78
- "Peso Ξ±": round(r.attention_weight, 4), "Ativa": "βœ…" if r.active else "β€”",
79
- } for r in routes])
80
-
81
- da = (rm["auc"] - gm["auc"]) * 100
82
- df1 = (rm["f1"] - gm["f1"]) * 100
83
- dt = (1 - rm["train_time"] / max(gm["train_time"], 0.1)) * 100
84
- summary = (
85
- f"## 🎯 Resultado Final\n\n"
86
- f"| | RelGNN | GraphSAGE | Ξ” |\n|---|---|---|---|\n"
87
- f"| AUC-ROC | **{rm['auc']:.4f}** | {gm['auc']:.4f} | **+{da:.1f}%** |\n"
88
- f"| F1-Score | **{rm['f1']:.4f}** | {gm['f1']:.4f} | **+{df1:.1f}%** |\n"
89
- f"| Tempo | **{rm['train_time']:.1f}s** | {gm['train_time']:.1f}s | **βˆ’{dt:.0f}%** |\n\n"
90
- f"πŸš€ **{len(routes)} rotas atΓ΄micas** descobertas automaticamente via FK paths\n\n"
91
- f"πŸ”‘ Zero conversΓ£o para grafo Β· Zero feature engineering manual"
92
- )
93
-
94
- progress(1.0)
95
- log("🏁 Pipeline completo!")
96
- return fig, metrics_df, routes_df, summary, "\n".join(logs)
97
 
 
 
98
 
99
- def build_figure(rm, gm, xm, rh, gh, routes):
100
- BG = "#0a0e1a"; PANEL = "#0f1629"
101
- CYAN = "#00d4ff"; PURP = "#7c3aed"; AMBER = "#f59e0b"; GREEN = "#10b981"; GRAY = "#64748b"
102
-
103
- specs = [[{"type":"xy"},{"type":"xy"},{"type":"xy"}],
104
- [{"type":"xy"},{"type":"xy"},{"type":"polar"}]]
105
-
106
- fig = make_subplots(rows=2, cols=3,
107
- subplot_titles=["ConvergΓͺncia AUC-ROC", "MΓ©tricas Comparativas", "Tempo de Treino (s)",
108
- "Pesos de AtenΓ§Γ£o (Rotas)", "Ξ” RelGNN vs GraphSAGE (%)", "Radar de Performance"],
109
- specs=specs, vertical_spacing=0.20, horizontal_spacing=0.10)
110
-
111
- # 1. Curvas de convergΓͺncia
112
- fig.add_trace(go.Scatter(x=[h["epoch"] for h in rh], y=[h["auc"] for h in rh],
113
- name="RelGNN", line=dict(color=CYAN, width=3),
114
- fill="tozeroy", fillcolor="rgba(0,212,255,0.07)"), row=1, col=1)
115
- fig.add_trace(go.Scatter(x=[h["epoch"] for h in gh], y=[h["auc"] for h in gh],
116
- name="GraphSAGE", line=dict(color=PURP, width=2, dash="dash")), row=1, col=1)
117
-
118
- # 2. Barras de mΓ©tricas
119
- mnames = ["AUC", "F1", "PrecisΓ£o", "Recall"]
120
- for vals, name, col in [
121
- ([rm["auc"],rm["f1"],rm["precision"],rm["recall"]], "RelGNN", CYAN),
122
- ([gm["auc"],gm["f1"],gm["precision"],gm["recall"]], "GraphSAGE", PURP),
123
- ([xm["auc"],xm["f1"],xm["precision"],xm["recall"]], "XGBoost", AMBER),
124
- ]:
125
- fig.add_trace(go.Bar(x=mnames, y=vals, name=name,
126
- marker_color=col, opacity=0.85, showlegend=False), row=1, col=2)
127
-
128
- # 3. Tempo de treino
129
- fig.add_trace(go.Bar(
130
- x=["RelGNN","GraphSAGE","XGBoost"],
131
- y=[rm["train_time"], gm["train_time"], xm["train_time"]],
132
- marker_color=[CYAN, PURP, AMBER], opacity=0.85, showlegend=False,
133
- text=[f"{rm['train_time']:.1f}s", f"{gm['train_time']:.1f}s", f"{xm['train_time']:.1f}s"],
134
- textposition="outside"), row=1, col=3)
135
-
136
- # 4. Pesos de atenΓ§Γ£o das rotas
137
- rlabels = [" β†’ ".join(r.path[-2:]) if len(r.path) > 2 else " β†’ ".join(r.path) for r in routes]
138
- rweights = [r.attention_weight for r in routes]
139
- rcolors = [GREEN if r.active else GRAY for r in routes]
140
- fig.add_trace(go.Bar(x=rweights, y=rlabels, orientation="h",
141
- marker_color=rcolors, opacity=0.85, showlegend=False,
142
- text=[f"Ξ±={w:.3f}" for w in rweights], textposition="outside"), row=2, col=1)
143
-
144
- # 5. Deltas
145
- deltas = [(rm[k]-gm[k])*100 for k in ["auc","f1","precision","recall"]]
146
- fig.add_trace(go.Bar(x=mnames, y=deltas,
147
- marker_color=[GREEN if d >= 0 else "#ef4444" for d in deltas],
148
- opacity=0.85, showlegend=False,
149
- text=[f"+{d:.1f}%" if d >= 0 else f"{d:.1f}%" for d in deltas],
150
- textposition="outside"), row=2, col=2)
151
- fig.add_hline(y=0, line_color=GRAY, line_width=1, row=2, col=2)
152
-
153
- # 6. Radar
154
- cats = ["AUC","F1","PrecisΓ£o","Recall","Velocidade"]
155
- mx_t = max(rm["train_time"], gm["train_time"], xm["train_time"])
156
- for vals, name, col in [
157
- ([rm["auc"],rm["f1"],rm["precision"],rm["recall"],1-rm["train_time"]/mx_t], "RelGNN", CYAN),
158
- ([gm["auc"],gm["f1"],gm["precision"],gm["recall"],1-gm["train_time"]/mx_t], "GraphSAGE", PURP),
159
- ([xm["auc"],xm["f1"],xm["precision"],xm["recall"],1-xm["train_time"]/mx_t], "XGBoost", AMBER),
160
- ]:
161
- fig.add_trace(go.Scatterpolar(r=vals+[vals[0]], theta=cats+[cats[0]],
162
- name=name, fill="toself", line_color=col, opacity=0.55, showlegend=False), row=2, col=3)
163
-
164
- fig.update_layout(
165
- height=680, paper_bgcolor=BG, plot_bgcolor=PANEL, barmode="group",
166
- font=dict(color="#e2e8f0", family="monospace", size=11),
167
- title=dict(text="RelGNN Β· TPC-H Fraud Detection", font=dict(size=14, color=CYAN), x=0.5),
168
- legend=dict(bgcolor="#141c33", bordercolor="#1e2d4a"))
169
- fig.update_xaxes(gridcolor="#1e2d4a")
170
- fig.update_yaxes(gridcolor="#1e2d4a")
171
- fig.update_yaxes(range=[0.35, 1.05], row=1, col=1)
172
- fig.update_yaxes(range=[0.35, 1.05], row=1, col=2)
173
  return fig
174
 
 
 
 
175
 
176
  CSS = ".gradio-container{max-width:1100px!important} footer{display:none!important}"
177
 
178
  with gr.Blocks(css=CSS, title="RelGNN") as demo:
179
  gr.Markdown("# ⬑ RelGNN β€” Deep Relational Learning\n### Do SQL ao Graph AI sem Engenharia Manual Β· TPC-H Fraud Detection")
180
-
181
  with gr.Row():
182
  with gr.Column(scale=1, min_width=230):
183
  gr.Markdown("### βš™οΈ Dataset")
@@ -185,27 +505,21 @@ with gr.Blocks(css=CSS, title="RelGNN") as demo:
185
  n_orders = gr.Slider(500, 10000, value=2000, step=500, label="NΒΊ Pedidos")
186
  fraud_rate = gr.Slider(1, 20, value=5, step=1, label="Fraude (%)")
187
  gr.Markdown("### 🧠 Modelo")
188
- hidden_dim = gr.Slider(16, 128, value=64, step=16, label="Hidden Dim")
189
- num_epochs = gr.Slider(10, 100, value=50, step=10, label="Γ‰pocas")
190
- max_hops = gr.Slider(1, 4, value=3, step=1, label="Max Hops")
191
  btn = gr.Button("πŸš€ Rodar Pipeline", variant="primary", size="lg")
192
-
193
  with gr.Column(scale=3):
194
  with gr.Tabs():
195
- with gr.Tab("πŸ“Š VisualizaΓ§Γ΅es"):
196
- plot_out = gr.Plot()
197
  with gr.Tab("πŸ“‹ MΓ©tricas"):
198
  metrics_out = gr.Dataframe(label="ComparaΓ§Γ£o de Modelos")
199
  routes_out = gr.Dataframe(label="Rotas AtΓ΄micas")
200
- with gr.Tab("πŸ“ Resumo"):
201
- summary_out = gr.Markdown()
202
- with gr.Tab("πŸ”§ Log"):
203
- log_out = gr.Textbox(lines=22, max_lines=35)
204
-
205
- btn.click(
206
- fn=run_full_pipeline,
207
- inputs=[n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops],
208
- outputs=[plot_out, metrics_out, routes_out, summary_out, log_out])
209
 
210
  if __name__ == "__main__":
211
  demo.launch()
 
1
  """
2
+ RelGNN β€” Deep Relational Learning Β· Projeto 8
3
+ Do SQL ao Graph AI sem Engenharia Manual Β· TPC-H Fraud Detection
4
+ Arquivo ΓΊnico para Hugging Face Spaces (sem imports locais)
5
  """
6
 
 
7
  import subprocess, sys, os
 
8
 
9
+ def _install(pkg):
10
+ subprocess.check_call([sys.executable, "-m", "pip", "install", pkg,
11
+ "-q", "--root-user-action=ignore"])
12
 
13
+ for _pkg in ["plotly", "scikit-learn"]:
14
+ try:
15
+ __import__(_pkg.replace("-","_").split("[")[0])
16
+ except ImportError:
17
+ _install(_pkg)
18
 
 
 
 
 
 
 
19
  import gradio as gr
20
  import pandas as pd
21
  import numpy as np
22
  import plotly.graph_objects as go
23
  from plotly.subplots import make_subplots
24
+ import time, warnings
25
+ import torch
26
+ import torch.nn as nn
27
+ import torch.nn.functional as F
28
+ from torch import optim
29
+ from sklearn.model_selection import train_test_split
30
+ from sklearn.metrics import roc_auc_score, f1_score, precision_score, recall_score
31
+ from sklearn.ensemble import GradientBoostingClassifier
32
+ from collections import deque
33
+ from dataclasses import dataclass
34
+ from typing import List, Dict, Tuple, Optional, Callable
35
  warnings.filterwarnings("ignore")
36
 
37
+ # ══════════════════════════════════════════════════════════════════════════════
38
+ # DATA β€” TPC-H GENERATOR
39
+ # ══════════════════════════════════════════════════════════════════════════════
40
+
41
+ def generate_tpch_data(n_customers=500, n_orders=2000, fraud_rate=0.05, seed=42):
42
+ rng = np.random.default_rng(seed)
43
+ n_nations = 25
44
+ nations = pd.DataFrame({
45
+ "n_nationkey": np.arange(n_nations),
46
+ "n_name": [f"NATION_{i}" for i in range(n_nations)],
47
+ "n_regionkey": rng.integers(0, 5, n_nations),
48
+ })
49
+ n_suppliers = max(10, n_customers // 20)
50
+ suppliers = pd.DataFrame({
51
+ "s_suppkey": np.arange(n_suppliers),
52
+ "s_nationkey": rng.integers(0, n_nations, n_suppliers),
53
+ "s_acctbal": rng.uniform(-999, 9999, n_suppliers).round(2),
54
+ })
55
+ suppliers["s_risk_flag"] = (suppliers["s_acctbal"] < 100).astype(int)
56
+ n_parts = max(50, n_orders // 5)
57
+ parts = pd.DataFrame({
58
+ "p_partkey": np.arange(n_parts),
59
+ "p_retailprice": rng.uniform(5, 2000, n_parts).round(2),
60
+ })
61
+ customers = pd.DataFrame({
62
+ "c_custkey": np.arange(n_customers),
63
+ "c_nationkey": rng.integers(0, n_nations, n_customers),
64
+ "c_acctbal": rng.uniform(-999, 9999, n_customers).round(2),
65
+ "c_account_age_days": rng.integers(1, 3650, n_customers),
66
+ "c_num_prev_orders": rng.poisson(5, n_customers),
67
+ })
68
+ customer_keys = rng.integers(0, n_customers, n_orders)
69
+ totalprice = rng.exponential(scale=5000, size=n_orders).round(2)
70
+ cust_acctbal = customers.loc[customer_keys, "c_acctbal"].values
71
+ cust_age = customers.loc[customer_keys, "c_account_age_days"].values
72
+ fraud_score = (0.4*(cust_acctbal<0).astype(float)
73
+ + 0.3*(totalprice>15000).astype(float)
74
+ + 0.2*(cust_age<30).astype(float)
75
+ + 0.1*rng.random(n_orders))
76
+ threshold = np.quantile(fraud_score, 1-fraud_rate)
77
+ orders = pd.DataFrame({
78
+ "o_orderkey": np.arange(n_orders),
79
+ "o_custkey": customer_keys,
80
+ "o_totalprice": totalprice,
81
+ "o_shippriority": rng.integers(0, 3, n_orders),
82
+ "is_fraud": (fraud_score >= threshold).astype(int),
83
+ })
84
+ n_lines = rng.integers(1, 8, n_orders)
85
+ total_lines = n_lines.sum()
86
+ lineitem = pd.DataFrame({
87
+ "l_orderkey": np.repeat(np.arange(n_orders), n_lines),
88
+ "l_partkey": rng.integers(0, n_parts, total_lines),
89
+ "l_suppkey": rng.integers(0, n_suppliers, total_lines),
90
+ "l_quantity": rng.integers(1, 51, total_lines).astype(float),
91
+ "l_extendedprice":rng.uniform(10, 5000, total_lines).round(2),
92
+ "l_discount": rng.uniform(0, 0.1, total_lines).round(2),
93
+ "l_tax": rng.uniform(0, 0.08, total_lines).round(2),
94
+ })
95
+ return dict(customers=customers, orders=orders, lineitem=lineitem,
96
+ supplier=suppliers, nation=nations, part=parts)
97
+
98
+ # ══════════════════════════════════════════════════════════════════════════════
99
+ # DATA β€” ATOMIC ROUTES
100
+ # ══════════════════════════════════════════════════════════════════════════════
101
+
102
+ TPCH_FK = [
103
+ ("orders", "o_custkey", "customers","c_custkey"),
104
+ ("lineitem", "l_orderkey", "orders", "o_orderkey"),
105
+ ("lineitem", "l_suppkey", "supplier", "s_suppkey"),
106
+ ("lineitem", "l_partkey", "part", "p_partkey"),
107
+ ("customers","c_nationkey", "nation", "n_nationkey"),
108
+ ("supplier", "s_nationkey", "nation", "n_nationkey"),
109
+ ]
110
+
111
+ @dataclass
112
+ class AtomicRoute:
113
+ path: List[str]
114
+ n_hops: int = 0
115
+ attention_weight: float = 1.0
116
+ active: bool = True
117
+ def __post_init__(self): self.n_hops = len(self.path)-1
118
+
119
+ def discover_atomic_routes(tables, max_hops=3):
120
+ adj = {}
121
+ for (s,sc,d,dc) in TPCH_FK:
122
+ adj.setdefault(s,[]).append((d,"fwd"))
123
+ adj.setdefault(d,[]).append((s,"bwd"))
124
+ routes, queue = [], deque()
125
+ queue.append((["customers"], {"customers"}))
126
+ while queue:
127
+ path, visited = queue.popleft()
128
+ if len(path)-1 >= 1:
129
+ w = 1.0/((len(path)-1)**1.5)
130
+ routes.append(AtomicRoute(path=list(path), attention_weight=w, active=(len(path)-1<=2)))
131
+ if len(path)-1 >= max_hops:
132
+ continue
133
+ for (nb,_) in adj.get(path[-1],[]):
134
+ if nb not in visited and nb in tables:
135
+ queue.append((path+[nb], visited|{nb}))
136
+ routes.sort(key=lambda r: -r.attention_weight)
137
+ ws = np.array([r.attention_weight for r in routes])
138
+ ws = np.exp(ws)/np.exp(ws).sum()
139
+ for r,w in zip(routes,ws): r.attention_weight = float(w)
140
+ return routes
141
+
142
+ # ══════════════════════════════════════════════════════════════════════════════
143
+ # FEATURE EXTRACTION
144
+ # ══════════════════════════════════════════════════════════════════════════════
145
+
146
+ def extract_features(tables):
147
+ customers, orders, lineitem = tables["customers"], tables["orders"], tables["lineitem"]
148
+ supplier, nation = tables["supplier"], tables["nation"]
149
+ n = len(customers)
150
+ fraud_by_cust = orders.groupby("o_custkey")["is_fraud"].max()
151
+ labels = customers["c_custkey"].map(fraud_by_cust).fillna(0).values.astype(float)
152
+
153
+ def norm(arr):
154
+ mn,mx = arr.min(0,keepdims=True),arr.max(0,keepdims=True)
155
+ return (arr-mn)/np.where(mx-mn==0,1,mx-mn)
156
+
157
+ # customers
158
+ c_feat = norm(customers[["c_acctbal","c_nationkey","c_account_age_days","c_num_prev_orders"]].fillna(0).values.astype(np.float32))
159
+
160
+ # orders agg per customer
161
+ om = orders.groupby("o_custkey")[["o_totalprice","o_shippriority"]].mean()
162
+ ox = orders.groupby("o_custkey")[["o_totalprice"]].max()
163
+ oc = orders.groupby("o_custkey").size().rename("cnt")
164
+ oa = customers[["c_custkey"]].set_index("c_custkey").join(om).join(ox,rsuffix="_max").join(oc).fillna(0)
165
+ o_feat = norm(oa.values.astype(np.float32))
166
+
167
+ # lineitem agg
168
+ li = lineitem.merge(orders[["o_orderkey","o_custkey"]], on="o_orderkey", how="left")
169
+ lm = li.groupby("o_custkey")[["l_quantity","l_extendedprice","l_discount","l_tax"]].mean()
170
+ lc = li.groupby("o_custkey").size().rename("cnt")
171
+ la = customers[["c_custkey"]].set_index("c_custkey").join(lm).join(lc).fillna(0)
172
+ l_feat = norm(la.values.astype(np.float32))
173
+
174
+ # supplier agg
175
+ sw = li.merge(supplier, left_on="l_suppkey", right_on="s_suppkey", how="left")
176
+ sm = sw.groupby("o_custkey")[["s_acctbal","s_risk_flag"]].mean()
177
+ sa = customers[["c_custkey"]].set_index("c_custkey").join(sm).fillna(0)
178
+ s_feat = norm(sa.values.astype(np.float32))
179
+
180
+ # nation
181
+ nj = customers[["c_custkey","c_nationkey"]].merge(nation,left_on="c_nationkey",right_on="n_nationkey",how="left")[["n_nationkey","n_regionkey"]].fillna(0)
182
+ n_feat = norm(nj.values.astype(np.float32))
183
+
184
+ return dict(customers=c_feat, orders=o_feat, lineitem=l_feat, supplier=s_feat, nation=n_feat), labels
185
+
186
+ # ══════════════════════════════════════════════════════════════════════════════
187
+ # RELGNN MODEL
188
+ # ══════════════════════════════════════════════════════════════════════════════
189
+
190
+ class TableEncoder(nn.Module):
191
+ def __init__(self, in_dim, hidden):
192
+ super().__init__()
193
+ self.net = nn.Sequential(
194
+ nn.Linear(in_dim, hidden*2), nn.LayerNorm(hidden*2), nn.ReLU(), nn.Dropout(0.2),
195
+ nn.Linear(hidden*2, hidden), nn.LayerNorm(hidden), nn.ReLU())
196
+ def forward(self,x): return self.net(x)
197
+
198
+ class RouteAttention(nn.Module):
199
+ def __init__(self, hidden, heads=4):
200
+ super().__init__()
201
+ self.attn = nn.MultiheadAttention(hidden, heads, dropout=0.1, batch_first=True)
202
+ self.norm = nn.LayerNorm(hidden)
203
+ self.mlp = nn.Sequential(nn.Linear(hidden,hidden*2),nn.ReLU(),nn.Dropout(0.1),nn.Linear(hidden*2,hidden))
204
+ def forward(self, hops):
205
+ out,alpha = self.attn(hops,hops,hops)
206
+ out = self.norm(out+hops)
207
+ return out[:,0,:] + self.mlp(out[:,0,:]), alpha
208
+
209
+ class RelGNNModel(nn.Module):
210
+ def __init__(self, feat_dims, hidden, routes):
211
+ super().__init__()
212
+ self.encoders = nn.ModuleDict({t: TableEncoder(d,hidden) for t,d in feat_dims.items()})
213
+ self.route_attn= nn.ModuleList([RouteAttention(hidden) for _ in routes])
214
+ self.route_w = nn.Parameter(torch.ones(len(routes)))
215
+ self.head = nn.Sequential(nn.Linear(hidden,hidden//2),nn.ReLU(),nn.Dropout(0.2),nn.Linear(hidden//2,1))
216
+ self.routes = routes
217
+
218
+ def forward(self, feats):
219
+ embs = {t: enc(feats[t]) for t,enc in self.encoders.items() if t in feats}
220
+ route_embs = []
221
+ for i,(route,attn) in enumerate(zip(self.routes, self.route_attn)):
222
+ avail = [t for t in route.path if t in embs]
223
+ if len(avail) < 2:
224
+ route_embs.append(list(embs.values())[0])
225
+ continue
226
+ hops = torch.stack([embs[t] for t in avail], dim=1)
227
+ re, _ = attn(hops)
228
+ route_embs.append(re)
229
+ stacked = torch.stack(route_embs, dim=1)
230
+ w = F.softmax(self.route_w, dim=0)
231
+ agg = (stacked * w.unsqueeze(0).unsqueeze(-1)).sum(1)
232
+ return self.head(agg).squeeze(-1)
233
+
234
+ def train_relgnn(tables, routes, hidden=64, epochs=50, log_fn=print, progress_fn=None):
235
+ t0 = time.time()
236
+ feats_np, labels = extract_features(tables)
237
+ feat_dims = {k:v.shape[1] for k,v in feats_np.items()}
238
+ model = RelGNNModel(feat_dims, hidden, routes)
239
+ opt = optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4)
240
+ sched = optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
241
+ idx = np.arange(len(labels))
242
+ idx_tr,idx_te = train_test_split(idx,test_size=0.2,random_state=42,
243
+ stratify=(labels>0.5).astype(int))
244
+ def tensors(idx_):
245
+ return {k: torch.tensor(v[idx_],dtype=torch.float32) for k,v in feats_np.items()}
246
+ y_tr = torch.tensor(labels[idx_tr], dtype=torch.float32)
247
+ pw = torch.tensor([(y_tr==0).sum()/max((y_tr==1).sum(),1)])
248
+ loss_fn = nn.BCEWithLogitsLoss(pos_weight=pw)
249
+ history, log_every = [], max(1,epochs//8)
250
+ model.train()
251
+ for ep in range(1,epochs+1):
252
+ opt.zero_grad()
253
+ logits = model(tensors(idx_tr))
254
+ loss = loss_fn(logits, y_tr)
255
+ loss.backward()
256
+ nn.utils.clip_grad_norm_(model.parameters(),1.0)
257
+ opt.step(); sched.step()
258
+ if ep % log_every == 0 or ep == epochs:
259
+ model.eval()
260
+ with torch.no_grad():
261
+ p = torch.sigmoid(model(tensors(idx_te))).numpy()
262
+ try: auc = roc_auc_score(labels[idx_te], p)
263
+ except: auc = 0.5
264
+ history.append({"epoch":ep,"auc":auc})
265
+ log_fn(f" RelGNN ep={ep:3d}/{epochs} loss={float(loss):.4f} auc={auc:.4f}")
266
+ model.train()
267
+ if progress_fn: progress_fn(0.30+0.38*(ep/epochs), desc=f"RelGNN Γ©poca {ep}/{epochs}")
268
+ model.eval()
269
+ with torch.no_grad():
270
+ p = torch.sigmoid(model(tensors(idx_te))).numpy()
271
+ pred = (p>0.5).astype(int); yt = labels[idx_te].astype(int)
272
+ try:
273
+ metrics = dict(auc=round(roc_auc_score(yt,p),4), f1=round(f1_score(yt,pred,zero_division=0),4),
274
+ precision=round(precision_score(yt,pred,zero_division=0),4),
275
+ recall=round(recall_score(yt,pred,zero_division=0),4),
276
+ train_time=round(time.time()-t0,1))
277
+ except: metrics = dict(auc=0.5,f1=0.5,precision=0.5,recall=0.5,train_time=round(time.time()-t0,1))
278
+ w = F.softmax(model.route_w,dim=0).detach().numpy()
279
+ for i,r in enumerate(routes):
280
+ if i<len(w): r.attention_weight=float(w[i]); r.active=float(w[i])>0.15
281
+ return metrics, history
282
+
283
+ # ══════════════════════════════════════════════════════════════════════════════
284
+ # GRAPHSAGE BASELINE
285
+ # ══════════════════════════════════════════════════════════════════════════════
286
+
287
+ class SAGEConv(nn.Module):
288
+ def __init__(self,in_d,out_d):
289
+ super().__init__()
290
+ self.Ws=nn.Linear(in_d,out_d,bias=False); self.Wn=nn.Linear(in_d,out_d,bias=False)
291
+ self.b=nn.Parameter(torch.zeros(out_d))
292
+ def forward(self,h,adj): return F.relu(self.Ws(h)+self.Wn(torch.mm(adj,h))+self.b)
293
+
294
+ class GraphSAGENet(nn.Module):
295
+ def __init__(self,in_d,hid):
296
+ super().__init__()
297
+ self.c1=SAGEConv(in_d,hid); self.c2=SAGEConv(hid,hid)
298
+ self.drop=nn.Dropout(0.2); self.head=nn.Linear(hid,1)
299
+ def forward(self,h,adj):
300
+ h=self.c1(h,adj); h=self.drop(h); h=self.c2(h,adj)
301
+ return self.head(h).squeeze(-1)
302
+
303
+ def train_graphsage(tables, hidden=64, epochs=50, log_fn=print):
304
+ t0 = time.time()
305
+ log_fn(" [GraphSAGE] Convertendo SQL β†’ grafo estΓ‘tico...")
306
+ customers,orders = tables["customers"],tables["orders"]
307
+ n_c,n_o = len(customers),len(orders)
308
+ MAX_N = 2000
309
+ n_c = min(n_c,MAX_N); n_o = min(n_o,MAX_N)
310
+ cf = customers[["c_acctbal","c_nationkey","c_account_age_days","c_num_prev_orders"]].iloc[:n_c].fillna(0).values.astype(np.float32)
311
+ of = orders[["o_totalprice","o_shippriority"]].iloc[:n_o].fillna(0).values.astype(np.float32)
312
+ md = max(cf.shape[1],of.shape[1])
313
+ def pad(a,t):
314
+ if a.shape[1]<t: a=np.hstack([a,np.zeros((len(a),t-a.shape[1]),dtype=np.float32)])
315
+ return a
316
+ X = np.vstack([pad(cf,md),pad(of,md)])
317
+ X = (X-X.mean(0))/np.where(X.std(0)==0,1,X.std(0))
318
+ N = len(X)
319
+ ck = orders["o_custkey"].values[:n_o]
320
+ oi = np.arange(n_o)+n_c
321
+ vm = ck<n_c
322
+ src = np.concatenate([ck[vm],oi[vm]]); dst = np.concatenate([oi[vm],ck[vm]])
323
+ adj = torch.zeros(N,N)
324
+ for s,d in zip(src,dst):
325
+ if s<N and d<N: adj[d,s]=1.0
326
+ deg = adj.sum(1,keepdim=True).clamp(min=1); adj = adj/deg
327
+ fraud_c = orders.groupby("o_custkey")["is_fraud"].max()
328
+ labels = customers["c_custkey"].iloc[:n_c].map(fraud_c).fillna(0).values.astype(np.float32)
329
+ Xt = torch.tensor(X,dtype=torch.float32)
330
+ ci = np.arange(n_c)
331
+ i_tr,i_te = train_test_split(ci,test_size=0.2,random_state=42,stratify=(labels>0.5).astype(int))
332
+ y_tr = torch.tensor(labels[i_tr],dtype=torch.float32)
333
+ pw = torch.tensor([(y_tr==0).sum()/max((y_tr==1).sum(),1)])
334
+ model = GraphSAGENet(md,hidden)
335
+ opt = optim.AdamW(model.parameters(),lr=1e-3)
336
+ loss_fn = nn.BCEWithLogitsLoss(pos_weight=pw)
337
+ log_every = max(1,epochs//5)
338
+ history = []
339
+ model.train()
340
+ for ep in range(1,epochs+1):
341
+ opt.zero_grad()
342
+ loss = loss_fn(model(Xt,adj)[i_tr],y_tr)
343
+ loss.backward(); nn.utils.clip_grad_norm_(model.parameters(),1.0); opt.step()
344
+ if ep%log_every==0 or ep==epochs:
345
+ model.eval()
346
+ with torch.no_grad(): p=torch.sigmoid(model(Xt,adj)[i_te]).numpy()
347
+ try: auc=roc_auc_score(labels[i_te],p)
348
+ except: auc=0.5
349
+ history.append({"epoch":ep,"auc":auc})
350
+ model.train()
351
+ model.eval()
352
+ with torch.no_grad(): p=torch.sigmoid(model(Xt,adj)[i_te]).numpy()
353
+ pred=(p>0.5).astype(int); yt=labels[i_te].astype(int)
354
+ try:
355
+ m=dict(auc=round(roc_auc_score(yt,p),4),f1=round(f1_score(yt,pred,zero_division=0),4),
356
+ precision=round(precision_score(yt,pred,zero_division=0),4),
357
+ recall=round(recall_score(yt,pred,zero_division=0),4),
358
+ train_time=round(time.time()-t0,1))
359
+ except: m=dict(auc=0.5,f1=0.5,precision=0.5,recall=0.5,train_time=round(time.time()-t0,1))
360
+ log_fn(f" [GraphSAGE] {N} nΓ³s, {len(src)} arestas. Tempo: {m['train_time']}s")
361
+ return m, history
362
+
363
+ # ══════════════════════════════════════════════════════════════════════════════
364
+ # XGBOOST BASELINE
365
+ # ══════════════════════════════════════════════════════════════════════════════
366
+
367
+ def train_xgboost(tables, log_fn=print):
368
+ t0 = time.time()
369
+ customers,orders,lineitem,supplier = tables["customers"],tables["orders"],tables["lineitem"],tables["supplier"]
370
+ f = customers[["c_custkey","c_acctbal","c_nationkey","c_account_age_days","c_num_prev_orders"]].copy()
371
+ oa = orders.groupby("o_custkey").agg(ord_cnt=("o_orderkey","count"),
372
+ ord_mean=("o_totalprice","mean"),ord_max=("o_totalprice","max")).reset_index().rename(columns={"o_custkey":"c_custkey"})
373
+ f = f.merge(oa,on="c_custkey",how="left")
374
+ li = lineitem.merge(orders[["o_orderkey","o_custkey"]],on="o_orderkey",how="left")
375
+ la = li.groupby("o_custkey").agg(li_cnt=("l_quantity","count"),li_price=("l_extendedprice","mean"),
376
+ li_disc=("l_discount","mean")).reset_index().rename(columns={"o_custkey":"c_custkey"})
377
+ f = f.merge(la,on="c_custkey",how="left")
378
+ sw = li.merge(supplier,left_on="l_suppkey",right_on="s_suppkey",how="left")
379
+ sa = sw.groupby("o_custkey").agg(sup_risk=("s_risk_flag","sum"),sup_bal=("s_acctbal","mean")).reset_index().rename(columns={"o_custkey":"c_custkey"})
380
+ f = f.merge(sa,on="c_custkey",how="left").drop(columns=["c_custkey"]).fillna(0)
381
+ fraud_c = orders.groupby("o_custkey")["is_fraud"].max()
382
+ y = customers["c_custkey"].map(fraud_c).fillna(0).values.astype(int)
383
+ X = f.values.astype(np.float32)
384
+ i_tr,i_te = train_test_split(np.arange(len(y)),test_size=0.2,random_state=42,stratify=y)
385
+ model = GradientBoostingClassifier(n_estimators=80,max_depth=4,learning_rate=0.05,subsample=0.8,random_state=42)
386
+ model.fit(X[i_tr],y[i_tr])
387
+ p = model.predict_proba(X[i_te])[:,1]; pred=(p>0.5).astype(int)
388
+ try:
389
+ m=dict(auc=round(roc_auc_score(y[i_te],p),4),f1=round(f1_score(y[i_te],pred,zero_division=0),4),
390
+ precision=round(precision_score(y[i_te],pred,zero_division=0),4),
391
+ recall=round(recall_score(y[i_te],pred,zero_division=0),4),
392
+ train_time=round(time.time()-t0,1))
393
+ except: m=dict(auc=0.5,f1=0.5,precision=0.5,recall=0.5,train_time=round(time.time()-t0,1))
394
+ log_fn(f" [XGBoost] features={X.shape[1]} Tempo={m['train_time']}s")
395
+ return m
396
+
397
+ # ══════════════════════════════════════════════════════════════════════════════
398
+ # PIPELINE
399
+ # ══════════════════════════════════════════════════════════════════════════════
400
+
401
+ def run_pipeline(n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops, progress=gr.Progress()):
402
+ logs=[]; log=lambda m: logs.append(str(m))
403
+
404
+ progress(0.05, desc="Gerando TPC-H...")
405
+ tables = generate_tpch_data(int(n_customers),int(n_orders),float(fraud_rate)/100,seed=42)
406
+ log(f"βœ… {int(n_customers)} clientes Β· {int(n_orders)} pedidos Β· {tables['orders']['is_fraud'].sum()} fraudes")
407
+
408
+ progress(0.15, desc="Rotas atΓ΄micas...")
409
+ routes = discover_atomic_routes(tables, max_hops=int(max_hops))
410
+ log(f"βœ… {len(routes)} rotas descobertas")
411
+ for r in routes: log(f" β†’ {' β†’ '.join(r.path)} (hops={r.n_hops} Ξ±={r.attention_weight:.3f})")
412
 
413
  progress(0.30, desc="Treinando RelGNN...")
414
+ rm,rh = train_relgnn(tables,routes,int(hidden_dim),int(num_epochs),log,progress)
415
+ log(f"βœ… RelGNN AUC={rm['auc']} F1={rm['f1']} {rm['train_time']}s")
 
416
 
417
  progress(0.70, desc="Treinando GraphSAGE...")
418
+ gm,gh = train_graphsage(tables,int(hidden_dim),int(num_epochs),log)
419
+ log(f"βœ… GraphSAGE AUC={gm['auc']} F1={gm['f1']} {gm['train_time']}s")
 
420
 
421
+ progress(0.87, desc="Treinando XGBoost...")
422
+ xm = train_xgboost(tables,log)
423
+ log(f"βœ… XGBoost AUC={xm['auc']} F1={xm['f1']} {xm['train_time']}s")
424
 
425
+ progress(0.95, desc="Plotando...")
426
+ fig = build_figure(rm,gm,xm,rh,gh,routes)
427
 
428
  metrics_df = pd.DataFrame([
429
+ {"Modelo":"πŸ”· RelGNN", "AUC":rm["auc"],"F1":rm["f1"],"PrecisΓ£o":rm["precision"],"Recall":rm["recall"],"Tempo(s)":rm["train_time"]},
430
+ {"Modelo":"🟣 GraphSAGE","AUC":gm["auc"],"F1":gm["f1"],"Precisão":gm["precision"],"Recall":gm["recall"],"Tempo(s)":gm["train_time"]},
431
+ {"Modelo":"🟑 XGBoost", "AUC":xm["auc"],"F1":xm["f1"],"Precisão":xm["precision"],"Recall":xm["recall"],"Tempo(s)":xm["train_time"]},
432
  ]).round(4)
433
 
434
+ routes_df = pd.DataFrame([{"Rota":" β†’ ".join(r.path),"Hops":r.n_hops,
435
+ "Peso Ξ±":round(r.attention_weight,4),"Ativa":"βœ…" if r.active else "β€”"} for r in routes])
436
+
437
+ da=(rm["auc"]-gm["auc"])*100; dt=(1-rm["train_time"]/max(gm["train_time"],0.1))*100
438
+ summary=(f"## 🎯 Resultado Final\n\n| |RelGNN|GraphSAGE|Ξ”|\n|---|---|---|---|\n"
439
+ f"|AUC|**{rm['auc']}**|{gm['auc']}|**+{da:.1f}%**|\n"
440
+ f"|F1|**{rm['f1']}**|{gm['f1']}|**+{(rm['f1']-gm['f1'])*100:.1f}%**|\n"
441
+ f"|Tempo|**{rm['train_time']}s**|{gm['train_time']}s|**βˆ’{dt:.0f}%**|\n\n"
442
+ f"πŸš€ {len(routes)} rotas atΓ΄micas Β· zero conversΓ£o para grafo Β· zero feature engineering")
 
 
 
 
 
 
 
 
 
 
 
 
443
 
444
+ progress(1.0); log("🏁 Concluído!")
445
+ return fig, metrics_df, routes_df, summary, "\n".join(logs)
446
 
447
+ # ══════════════════════════════════════════════════════════════════════════════
448
+ # PLOTLY FIGURE
449
+ # ══════════════════════════════════════════════════════════════════════════════
450
+
451
+ def build_figure(rm,gm,xm,rh,gh,routes):
452
+ BG="#0a0e1a"; PANEL="#0f1629"; CYAN="#00d4ff"; PURP="#7c3aed"; AMBER="#f59e0b"; GREEN="#10b981"; GRAY="#64748b"
453
+ specs=[[{"type":"xy"},{"type":"xy"},{"type":"xy"}],[{"type":"xy"},{"type":"xy"},{"type":"polar"}]]
454
+ fig=make_subplots(rows=2,cols=3,specs=specs,vertical_spacing=0.22,horizontal_spacing=0.10,
455
+ subplot_titles=["ConvergΓͺncia AUC-ROC","MΓ©tricas Comparativas","Tempo de Treino (s)",
456
+ "Pesos de AtenΓ§Γ£o (Rotas)","Ξ” RelGNN vs GraphSAGE (%)","Radar de Performance"])
457
+ fig.add_trace(go.Scatter(x=[h["epoch"] for h in rh],y=[h["auc"] for h in rh],name="RelGNN",
458
+ line=dict(color=CYAN,width=3),fill="tozeroy",fillcolor="rgba(0,212,255,0.07)"),row=1,col=1)
459
+ fig.add_trace(go.Scatter(x=[h["epoch"] for h in gh],y=[h["auc"] for h in gh],name="GraphSAGE",
460
+ line=dict(color=PURP,width=2,dash="dash")),row=1,col=1)
461
+ mn=["AUC","F1","PrecisΓ£o","Recall"]
462
+ for vals,name,col in [([rm["auc"],rm["f1"],rm["precision"],rm["recall"]],"RelGNN",CYAN),
463
+ ([gm["auc"],gm["f1"],gm["precision"],gm["recall"]],"GraphSAGE",PURP),
464
+ ([xm["auc"],xm["f1"],xm["precision"],xm["recall"]],"XGBoost",AMBER)]:
465
+ fig.add_trace(go.Bar(x=mn,y=vals,name=name,marker_color=col,opacity=0.85,showlegend=False),row=1,col=2)
466
+ fig.add_trace(go.Bar(x=["RelGNN","GraphSAGE","XGBoost"],y=[rm["train_time"],gm["train_time"],xm["train_time"]],
467
+ marker_color=[CYAN,PURP,AMBER],opacity=0.85,showlegend=False,
468
+ text=[f"{v:.1f}s" for v in [rm["train_time"],gm["train_time"],xm["train_time"]]],textposition="outside"),row=1,col=3)
469
+ rl=[" β†’ ".join(r.path[-2:]) if len(r.path)>2 else " β†’ ".join(r.path) for r in routes]
470
+ rw=[r.attention_weight for r in routes]
471
+ fig.add_trace(go.Bar(x=rw,y=rl,orientation="h",marker_color=[GREEN if r.active else GRAY for r in routes],
472
+ opacity=0.85,showlegend=False,text=[f"Ξ±={w:.3f}" for w in rw],textposition="outside"),row=2,col=1)
473
+ deltas=[(rm[k]-gm[k])*100 for k in ["auc","f1","precision","recall"]]
474
+ fig.add_trace(go.Bar(x=mn,y=deltas,marker_color=[GREEN if d>=0 else "#ef4444" for d in deltas],
475
+ opacity=0.85,showlegend=False,text=[f"+{d:.1f}%" if d>=0 else f"{d:.1f}%" for d in deltas],
476
+ textposition="outside"),row=2,col=2)
477
+ fig.add_hline(y=0,line_color=GRAY,line_width=1,row=2,col=2)
478
+ cats=["AUC","F1","PrecisΓ£o","Recall","Velocidade"]
479
+ mx_t=max(rm["train_time"],gm["train_time"],xm["train_time"])
480
+ for vals,name,col in [([rm["auc"],rm["f1"],rm["precision"],rm["recall"],1-rm["train_time"]/mx_t],"RelGNN",CYAN),
481
+ ([gm["auc"],gm["f1"],gm["precision"],gm["recall"],1-gm["train_time"]/mx_t],"GraphSAGE",PURP),
482
+ ([xm["auc"],xm["f1"],xm["precision"],xm["recall"],1-xm["train_time"]/mx_t],"XGBoost",AMBER)]:
483
+ fig.add_trace(go.Scatterpolar(r=vals+[vals[0]],theta=cats+[cats[0]],name=name,fill="toself",
484
+ line_color=col,opacity=0.55,showlegend=False),row=2,col=3)
485
+ fig.update_layout(height=680,paper_bgcolor=BG,plot_bgcolor=PANEL,barmode="group",
486
+ font=dict(color="#e2e8f0",family="monospace",size=11),
487
+ title=dict(text="RelGNN Β· TPC-H Fraud Detection",font=dict(size=14,color=CYAN),x=0.5),
488
+ legend=dict(bgcolor="#141c33",bordercolor="#1e2d4a"))
489
+ fig.update_xaxes(gridcolor="#1e2d4a"); fig.update_yaxes(gridcolor="#1e2d4a")
490
+ fig.update_yaxes(range=[0.35,1.05],row=1,col=1); fig.update_yaxes(range=[0.35,1.05],row=1,col=2)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
491
  return fig
492
 
493
+ # ══════════════════════════════════════════════════════════════════════════════
494
+ # GRADIO UI
495
+ # ══════════════════════════════════════════════════════════════════════════════
496
 
497
  CSS = ".gradio-container{max-width:1100px!important} footer{display:none!important}"
498
 
499
  with gr.Blocks(css=CSS, title="RelGNN") as demo:
500
  gr.Markdown("# ⬑ RelGNN β€” Deep Relational Learning\n### Do SQL ao Graph AI sem Engenharia Manual Β· TPC-H Fraud Detection")
 
501
  with gr.Row():
502
  with gr.Column(scale=1, min_width=230):
503
  gr.Markdown("### βš™οΈ Dataset")
 
505
  n_orders = gr.Slider(500, 10000, value=2000, step=500, label="NΒΊ Pedidos")
506
  fraud_rate = gr.Slider(1, 20, value=5, step=1, label="Fraude (%)")
507
  gr.Markdown("### 🧠 Modelo")
508
+ hidden_dim = gr.Slider(16, 128, value=64, step=16, label="Hidden Dim")
509
+ num_epochs = gr.Slider(10, 100, value=50, step=10, label="Γ‰pocas")
510
+ max_hops = gr.Slider(1, 4, value=3, step=1, label="Max Hops")
511
  btn = gr.Button("πŸš€ Rodar Pipeline", variant="primary", size="lg")
 
512
  with gr.Column(scale=3):
513
  with gr.Tabs():
514
+ with gr.Tab("πŸ“Š VisualizaΓ§Γ΅es"): plot_out = gr.Plot()
 
515
  with gr.Tab("πŸ“‹ MΓ©tricas"):
516
  metrics_out = gr.Dataframe(label="ComparaΓ§Γ£o de Modelos")
517
  routes_out = gr.Dataframe(label="Rotas AtΓ΄micas")
518
+ with gr.Tab("πŸ“ Resumo"): summary_out = gr.Markdown()
519
+ with gr.Tab("πŸ”§ Log"): log_out = gr.Textbox(lines=22, max_lines=35)
520
+ btn.click(fn=run_pipeline,
521
+ inputs=[n_customers,n_orders,fraud_rate,hidden_dim,num_epochs,max_hops],
522
+ outputs=[plot_out,metrics_out,routes_out,summary_out,log_out])
 
 
 
 
523
 
524
  if __name__ == "__main__":
525
  demo.launch()