Danielfonseca1212 commited on
Commit
d4d8ec1
·
verified ·
1 Parent(s): c20c869

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +101 -75
app.py CHANGED
@@ -1,8 +1,25 @@
1
  """
2
- RelGNN — Deep Relational Learning para Detecção de Fraude
3
  Projeto 8: Do SQL ao Graph AI sem Engenharia Manual
4
  """
5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  import gradio as gr
7
  import pandas as pd
8
  import numpy as np
@@ -17,9 +34,10 @@ from relgnn.model import RelGNN, RelGNNConfig
17
  from baseline.graphsage_baseline import GraphSAGEBaseline
18
  from baseline.xgboost_baseline import XGBoostBaseline
19
 
 
20
  def run_full_pipeline(n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops, progress=gr.Progress()):
21
  logs = []
22
- def log(msg): logs.append(msg)
23
 
24
  progress(0.05, desc="Gerando dataset TPC-H...")
25
  tables = generate_tpch_data(int(n_customers), int(n_orders), float(fraud_rate)/100.0, seed=42)
@@ -29,24 +47,23 @@ def run_full_pipeline(n_customers, n_orders, fraud_rate, hidden_dim, num_epochs,
29
  routes = discover_atomic_routes(tables, RouteConfig(max_hops=int(max_hops)))
30
  log(f"✅ {len(routes)} rotas atômicas descobertas")
31
  for r in routes:
32
- log(f" → {' → '.join(r.path)} (α={r.attention_weight:.3f})")
33
 
34
  progress(0.30, desc="Treinando RelGNN...")
35
  relgnn = RelGNN(RelGNNConfig(hidden_dim=int(hidden_dim), num_epochs=int(num_epochs)))
36
  rm, rh = relgnn.fit(tables, routes, log_fn=log, progress_fn=progress)
37
- log(f"✅ RelGNN AUC={rm['auc']:.4f} F1={rm['f1']:.4f} {rm['train_time']:.1f}s")
38
 
39
  progress(0.70, desc="Treinando GraphSAGE...")
40
  gs = GraphSAGEBaseline(hidden_dim=int(hidden_dim), num_epochs=int(num_epochs))
41
  gm, gh = gs.fit(tables, log_fn=log)
42
- log(f"✅ GraphSAGE AUC={gm['auc']:.4f} F1={gm['f1']:.4f} {gm['train_time']:.1f}s")
43
 
44
  progress(0.85, desc="Treinando XGBoost...")
45
- xgb_metrics = XGBoostBaseline().fit(tables, log_fn=log)
46
- xm = xgb_metrics
47
- log(f"✅ XGBoost AUC={xm['auc']:.4f} F1={xm['f1']:.4f} {xm['train_time']:.1f}s")
48
 
49
- progress(0.93, desc="Plotando...")
50
  fig = build_figure(rm, gm, xm, rh, gh, routes)
51
 
52
  metrics_df = pd.DataFrame([
@@ -60,90 +77,98 @@ def run_full_pipeline(n_customers, n_orders, fraud_rate, hidden_dim, num_epochs,
60
  "Peso α": round(r.attention_weight, 4), "Ativa": "✅" if r.active else "—",
61
  } for r in routes])
62
 
63
- da = (rm["auc"]-gm["auc"])*100
64
- dt = (1-rm["train_time"]/max(gm["train_time"],0.1))*100
 
65
  summary = (
66
  f"## 🎯 Resultado Final\n\n"
67
  f"| | RelGNN | GraphSAGE | Δ |\n|---|---|---|---|\n"
68
- f"| AUC | **{rm['auc']:.4f}** | {gm['auc']:.4f} | **+{da:.1f}%** |\n"
69
- f"| F1 | **{rm['f1']:.4f}** | {gm['f1']:.4f} | **+{(rm['f1']-gm['f1'])*100:.1f}%** |\n"
70
  f"| Tempo | **{rm['train_time']:.1f}s** | {gm['train_time']:.1f}s | **−{dt:.0f}%** |\n\n"
71
- f"🚀 **{len(routes)} rotas atômicas** · Zero conversão para grafo · Zero feature engineering"
 
72
  )
73
 
74
  progress(1.0)
75
- log("🏁 Concluído!")
76
  return fig, metrics_df, routes_df, summary, "\n".join(logs)
77
 
78
 
79
  def build_figure(rm, gm, xm, rh, gh, routes):
80
- BG= "#0a0e1a"; PANEL="#0f1629"
81
- CYAN="#00d4ff"; PURP="#7c3aed"; AMBER="#f59e0b"; GREEN="#10b981"; GRAY="#64748b"
 
 
 
82
 
83
  fig = make_subplots(rows=2, cols=3,
84
- subplot_titles=["Convergência AUC-ROC","Métricas Comparativas","Tempo de Treino (s)",
85
- "Pesos Atenção (Rotas)","Δ RelGNN vs GraphSAGE","Radar de Performance"],
86
- vertical_spacing=0.20, horizontal_spacing=0.10,
87
- specs=[[{"type":"xy"},{"type":"xy"},{"type":"xy"}],
88
- [{"type":"xy"},{"type":"xy"},{"type":"polar"}]])
89
 
90
- # 1. Curvas
91
  fig.add_trace(go.Scatter(x=[h["epoch"] for h in rh], y=[h["auc"] for h in rh],
92
- name="RelGNN", line=dict(color=CYAN,width=3), fill="tozeroy", fillcolor="rgba(0,212,255,0.07)"), row=1,col=1)
 
93
  fig.add_trace(go.Scatter(x=[h["epoch"] for h in gh], y=[h["auc"] for h in gh],
94
- name="GraphSAGE", line=dict(color=PURP,width=2,dash="dash"), showlegend=True), row=1,col=1)
95
-
96
- # 2. Barras métricas
97
- mnames=["AUC","F1","Precisão","Recall"]
98
- for vals,name,col in [([rm["auc"],rm["f1"],rm["precision"],rm["recall"]],"RelGNN",CYAN),
99
- ([gm["auc"],gm["f1"],gm["precision"],gm["recall"]],"GraphSAGE",PURP),
100
- ([xm["auc"],xm["f1"],xm["precision"],xm["recall"]],"XGBoost",AMBER)]:
101
- fig.add_trace(go.Bar(x=mnames,y=vals,name=name,marker_color=col,opacity=0.85,showlegend=False),row=1,col=2)
102
-
103
- # 3. Tempo
104
- fig.add_trace(go.Bar(x=["RelGNN","GraphSAGE","XGBoost"],
105
- y=[rm["train_time"],gm["train_time"],xm["train_time"]],
106
- marker_color=[CYAN,PURP,AMBER], showlegend=False, opacity=0.85,
107
- text=[f"{rm['train_time']:.1f}s",f"{gm['train_time']:.1f}s",f"{xm['train_time']:.1f}s"],
108
- textposition="outside"), row=1,col=3)
109
-
110
- # 4. Rotas atenção
111
- rlabels=[" ".join(r.path[-2:]) if len(r.path)>2 else " ".join(r.path) for r in routes]
112
- rweights=[r.attention_weight for r in routes]
113
- rcolors=[GREEN if r.active else GRAY for r in routes]
114
- fig.add_trace(go.Bar(x=rweights,y=rlabels,orientation="h",
115
- marker_color=rcolors,opacity=0.85,showlegend=False,
116
- text=[f"α={w:.3f}" for w in rweights],textposition="outside"), row=2,col=1)
 
 
 
 
117
 
118
  # 5. Deltas
119
- deltas=[(rm["auc"]-gm["auc"])*100,(rm["f1"]-gm["f1"])*100,
120
- (rm["precision"]-gm["precision"])*100,(rm["recall"]-gm["recall"])*100]
121
- fig.add_trace(go.Bar(x=mnames,y=deltas,
122
- marker_color=[GREEN if d>=0 else "#ef4444" for d in deltas],
123
- showlegend=False, opacity=0.85,
124
- text=[f"+{d:.1f}%" if d>=0 else f"{d:.1f}%" for d in deltas],
125
- textposition="outside"), row=2,col=2)
126
- fig.add_hline(y=0,line_color=GRAY,line_width=1,row=2,col=2)
127
 
128
  # 6. Radar
129
- cats=["AUC","F1","Precisão","Recall","Velocidade"]
130
- mx_t=max(rm["train_time"],gm["train_time"],xm["train_time"])
131
- for vals,name,col in [
132
- ([rm["auc"],rm["f1"],rm["precision"],rm["recall"],1-rm["train_time"]/mx_t],"RelGNN",CYAN),
133
- ([gm["auc"],gm["f1"],gm["precision"],gm["recall"],1-gm["train_time"]/mx_t],"GraphSAGE",PURP),
134
- ([xm["auc"],xm["f1"],xm["precision"],xm["recall"],1-xm["train_time"]/mx_t],"XGBoost",AMBER),
135
  ]:
136
- fig.add_trace(go.Scatterpolar(r=vals+[vals[0]],theta=cats+[cats[0]],
137
- name=name,fill="toself",line_color=col,opacity=0.55,showlegend=False),row=2,col=3)
138
-
139
- fig.update_layout(height=680, paper_bgcolor=BG, plot_bgcolor=PANEL,
140
- font=dict(color="#e2e8f0",family="monospace",size=11),
141
- title=dict(text="RelGNN · TPC-H Fraud Detection",font=dict(size=14,color=CYAN),x=0.5),
142
- legend=dict(bgcolor="#141c33",bordercolor="#1e2d4a"),
143
- barmode="group")
144
- fig.update_xaxes(gridcolor="#1e2d4a"); fig.update_yaxes(gridcolor="#1e2d4a")
145
- fig.update_yaxes(range=[0.35,1.05],row=1,col=1)
146
- fig.update_yaxes(range=[0.35,1.05],row=1,col=2)
 
147
  return fig
148
 
149
 
@@ -161,7 +186,7 @@ with gr.Blocks(css=CSS, title="RelGNN") as demo:
161
  gr.Markdown("### 🧠 Modelo")
162
  hidden_dim = gr.Slider(16, 128, value=64, step=16, label="Hidden Dim")
163
  num_epochs = gr.Slider(10, 100, value=50, step=10, label="Épocas")
164
- max_hops = gr.Slider(1, 4, value=3, step=1, label="Max Hops")
165
  btn = gr.Button("🚀 Rodar Pipeline", variant="primary", size="lg")
166
 
167
  with gr.Column(scale=3):
@@ -169,14 +194,15 @@ with gr.Blocks(css=CSS, title="RelGNN") as demo:
169
  with gr.Tab("📊 Visualizações"):
170
  plot_out = gr.Plot()
171
  with gr.Tab("📋 Métricas"):
172
- metrics_out = gr.Dataframe(label="Comparação")
173
  routes_out = gr.Dataframe(label="Rotas Atômicas")
174
  with gr.Tab("📝 Resumo"):
175
  summary_out = gr.Markdown()
176
  with gr.Tab("🔧 Log"):
177
  log_out = gr.Textbox(lines=22, max_lines=35)
178
 
179
- btn.click(fn=run_full_pipeline,
 
180
  inputs=[n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops],
181
  outputs=[plot_out, metrics_out, routes_out, summary_out, log_out])
182
 
 
1
  """
2
+ RelGNN — Deep Relational Learning
3
  Projeto 8: Do SQL ao Graph AI sem Engenharia Manual
4
  """
5
 
6
+ # ── Auto-install (garante dependências no HF Spaces) ──────────────────────────
7
+ import subprocess, sys
8
+
9
+ def install(pkg):
10
+ subprocess.check_call([sys.executable, "-m", "pip", "install", pkg, "-q"])
11
+
12
+ try:
13
+ import plotly
14
+ except ImportError:
15
+ install("plotly")
16
+
17
+ try:
18
+ import sklearn
19
+ except ImportError:
20
+ install("scikit-learn")
21
+
22
+ # ── Imports principais ─────────────────────────────────────────────────────────
23
  import gradio as gr
24
  import pandas as pd
25
  import numpy as np
 
34
  from baseline.graphsage_baseline import GraphSAGEBaseline
35
  from baseline.xgboost_baseline import XGBoostBaseline
36
 
37
+
38
  def run_full_pipeline(n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops, progress=gr.Progress()):
39
  logs = []
40
+ def log(msg): logs.append(str(msg))
41
 
42
  progress(0.05, desc="Gerando dataset TPC-H...")
43
  tables = generate_tpch_data(int(n_customers), int(n_orders), float(fraud_rate)/100.0, seed=42)
 
47
  routes = discover_atomic_routes(tables, RouteConfig(max_hops=int(max_hops)))
48
  log(f"✅ {len(routes)} rotas atômicas descobertas")
49
  for r in routes:
50
+ log(f" → {' → '.join(r.path)} (hops={r.n_hops}, α={r.attention_weight:.3f})")
51
 
52
  progress(0.30, desc="Treinando RelGNN...")
53
  relgnn = RelGNN(RelGNNConfig(hidden_dim=int(hidden_dim), num_epochs=int(num_epochs)))
54
  rm, rh = relgnn.fit(tables, routes, log_fn=log, progress_fn=progress)
55
+ log(f"✅ RelGNN AUC={rm['auc']:.4f} F1={rm['f1']:.4f} Tempo={rm['train_time']:.1f}s")
56
 
57
  progress(0.70, desc="Treinando GraphSAGE...")
58
  gs = GraphSAGEBaseline(hidden_dim=int(hidden_dim), num_epochs=int(num_epochs))
59
  gm, gh = gs.fit(tables, log_fn=log)
60
+ log(f"✅ GraphSAGE AUC={gm['auc']:.4f} F1={gm['f1']:.4f} Tempo={gm['train_time']:.1f}s")
61
 
62
  progress(0.85, desc="Treinando XGBoost...")
63
+ xm = XGBoostBaseline().fit(tables, log_fn=log)
64
+ log(f"✅ XGBoost AUC={xm['auc']:.4f} F1={xm['f1']:.4f} Tempo={xm['train_time']:.1f}s")
 
65
 
66
+ progress(0.93, desc="Plotando resultados...")
67
  fig = build_figure(rm, gm, xm, rh, gh, routes)
68
 
69
  metrics_df = pd.DataFrame([
 
77
  "Peso α": round(r.attention_weight, 4), "Ativa": "✅" if r.active else "—",
78
  } for r in routes])
79
 
80
+ da = (rm["auc"] - gm["auc"]) * 100
81
+ df1 = (rm["f1"] - gm["f1"]) * 100
82
+ dt = (1 - rm["train_time"] / max(gm["train_time"], 0.1)) * 100
83
  summary = (
84
  f"## 🎯 Resultado Final\n\n"
85
  f"| | RelGNN | GraphSAGE | Δ |\n|---|---|---|---|\n"
86
+ f"| AUC-ROC | **{rm['auc']:.4f}** | {gm['auc']:.4f} | **+{da:.1f}%** |\n"
87
+ f"| F1-Score | **{rm['f1']:.4f}** | {gm['f1']:.4f} | **+{df1:.1f}%** |\n"
88
  f"| Tempo | **{rm['train_time']:.1f}s** | {gm['train_time']:.1f}s | **−{dt:.0f}%** |\n\n"
89
+ f"🚀 **{len(routes)} rotas atômicas** descobertas automaticamente via FK paths\n\n"
90
+ f"🔑 Zero conversão para grafo · Zero feature engineering manual"
91
  )
92
 
93
  progress(1.0)
94
+ log("🏁 Pipeline completo!")
95
  return fig, metrics_df, routes_df, summary, "\n".join(logs)
96
 
97
 
98
  def build_figure(rm, gm, xm, rh, gh, routes):
99
+ BG = "#0a0e1a"; PANEL = "#0f1629"
100
+ CYAN = "#00d4ff"; PURP = "#7c3aed"; AMBER = "#f59e0b"; GREEN = "#10b981"; GRAY = "#64748b"
101
+
102
+ specs = [[{"type":"xy"},{"type":"xy"},{"type":"xy"}],
103
+ [{"type":"xy"},{"type":"xy"},{"type":"polar"}]]
104
 
105
  fig = make_subplots(rows=2, cols=3,
106
+ subplot_titles=["Convergência AUC-ROC", "Métricas Comparativas", "Tempo de Treino (s)",
107
+ "Pesos de Atenção (Rotas)", "Δ RelGNN vs GraphSAGE (%)", "Radar de Performance"],
108
+ specs=specs, vertical_spacing=0.20, horizontal_spacing=0.10)
 
 
109
 
110
+ # 1. Curvas de convergência
111
  fig.add_trace(go.Scatter(x=[h["epoch"] for h in rh], y=[h["auc"] for h in rh],
112
+ name="RelGNN", line=dict(color=CYAN, width=3),
113
+ fill="tozeroy", fillcolor="rgba(0,212,255,0.07)"), row=1, col=1)
114
  fig.add_trace(go.Scatter(x=[h["epoch"] for h in gh], y=[h["auc"] for h in gh],
115
+ name="GraphSAGE", line=dict(color=PURP, width=2, dash="dash")), row=1, col=1)
116
+
117
+ # 2. Barras de métricas
118
+ mnames = ["AUC", "F1", "Precisão", "Recall"]
119
+ for vals, name, col in [
120
+ ([rm["auc"],rm["f1"],rm["precision"],rm["recall"]], "RelGNN", CYAN),
121
+ ([gm["auc"],gm["f1"],gm["precision"],gm["recall"]], "GraphSAGE", PURP),
122
+ ([xm["auc"],xm["f1"],xm["precision"],xm["recall"]], "XGBoost", AMBER),
123
+ ]:
124
+ fig.add_trace(go.Bar(x=mnames, y=vals, name=name,
125
+ marker_color=col, opacity=0.85, showlegend=False), row=1, col=2)
126
+
127
+ # 3. Tempo de treino
128
+ fig.add_trace(go.Bar(
129
+ x=["RelGNN","GraphSAGE","XGBoost"],
130
+ y=[rm["train_time"], gm["train_time"], xm["train_time"]],
131
+ marker_color=[CYAN, PURP, AMBER], opacity=0.85, showlegend=False,
132
+ text=[f"{rm['train_time']:.1f}s", f"{gm['train_time']:.1f}s", f"{xm['train_time']:.1f}s"],
133
+ textposition="outside"), row=1, col=3)
134
+
135
+ # 4. Pesos de atenção das rotas
136
+ rlabels = [" → ".join(r.path[-2:]) if len(r.path) > 2 else " → ".join(r.path) for r in routes]
137
+ rweights = [r.attention_weight for r in routes]
138
+ rcolors = [GREEN if r.active else GRAY for r in routes]
139
+ fig.add_trace(go.Bar(x=rweights, y=rlabels, orientation="h",
140
+ marker_color=rcolors, opacity=0.85, showlegend=False,
141
+ text=[f"α={w:.3f}" for w in rweights], textposition="outside"), row=2, col=1)
142
 
143
  # 5. Deltas
144
+ deltas = [(rm[k]-gm[k])*100 for k in ["auc","f1","precision","recall"]]
145
+ fig.add_trace(go.Bar(x=mnames, y=deltas,
146
+ marker_color=[GREEN if d >= 0 else "#ef4444" for d in deltas],
147
+ opacity=0.85, showlegend=False,
148
+ text=[f"+{d:.1f}%" if d >= 0 else f"{d:.1f}%" for d in deltas],
149
+ textposition="outside"), row=2, col=2)
150
+ fig.add_hline(y=0, line_color=GRAY, line_width=1, row=2, col=2)
 
151
 
152
  # 6. Radar
153
+ cats = ["AUC","F1","Precisão","Recall","Velocidade"]
154
+ mx_t = max(rm["train_time"], gm["train_time"], xm["train_time"])
155
+ for vals, name, col in [
156
+ ([rm["auc"],rm["f1"],rm["precision"],rm["recall"],1-rm["train_time"]/mx_t], "RelGNN", CYAN),
157
+ ([gm["auc"],gm["f1"],gm["precision"],gm["recall"],1-gm["train_time"]/mx_t], "GraphSAGE", PURP),
158
+ ([xm["auc"],xm["f1"],xm["precision"],xm["recall"],1-xm["train_time"]/mx_t], "XGBoost", AMBER),
159
  ]:
160
+ fig.add_trace(go.Scatterpolar(r=vals+[vals[0]], theta=cats+[cats[0]],
161
+ name=name, fill="toself", line_color=col, opacity=0.55, showlegend=False), row=2, col=3)
162
+
163
+ fig.update_layout(
164
+ height=680, paper_bgcolor=BG, plot_bgcolor=PANEL, barmode="group",
165
+ font=dict(color="#e2e8f0", family="monospace", size=11),
166
+ title=dict(text="RelGNN · TPC-H Fraud Detection", font=dict(size=14, color=CYAN), x=0.5),
167
+ legend=dict(bgcolor="#141c33", bordercolor="#1e2d4a"))
168
+ fig.update_xaxes(gridcolor="#1e2d4a")
169
+ fig.update_yaxes(gridcolor="#1e2d4a")
170
+ fig.update_yaxes(range=[0.35, 1.05], row=1, col=1)
171
+ fig.update_yaxes(range=[0.35, 1.05], row=1, col=2)
172
  return fig
173
 
174
 
 
186
  gr.Markdown("### 🧠 Modelo")
187
  hidden_dim = gr.Slider(16, 128, value=64, step=16, label="Hidden Dim")
188
  num_epochs = gr.Slider(10, 100, value=50, step=10, label="Épocas")
189
+ max_hops = gr.Slider(1, 4, value=3, step=1, label="Max Hops")
190
  btn = gr.Button("🚀 Rodar Pipeline", variant="primary", size="lg")
191
 
192
  with gr.Column(scale=3):
 
194
  with gr.Tab("📊 Visualizações"):
195
  plot_out = gr.Plot()
196
  with gr.Tab("📋 Métricas"):
197
+ metrics_out = gr.Dataframe(label="Comparação de Modelos")
198
  routes_out = gr.Dataframe(label="Rotas Atômicas")
199
  with gr.Tab("📝 Resumo"):
200
  summary_out = gr.Markdown()
201
  with gr.Tab("🔧 Log"):
202
  log_out = gr.Textbox(lines=22, max_lines=35)
203
 
204
+ btn.click(
205
+ fn=run_full_pipeline,
206
  inputs=[n_customers, n_orders, fraud_rate, hidden_dim, num_epochs, max_hops],
207
  outputs=[plot_out, metrics_out, routes_out, summary_out, log_out])
208