Spaces:

uyen1109
/

DemoGraph

Sleeping

App Files Files Community

uyen1109 commited on Nov 20, 2025

Commit

466a439

verified ·

1 Parent(s): d6f6917

Upload app.py

Browse files

Files changed (1) hide show

app.py +135 -148

app.py CHANGED Viewed

@@ -1,177 +1,164 @@
 import gradio as gr
 import pandas as pd
-import networkx as nx
-import matplotlib.pyplot as plt
 import os
 from huggingface_hub import hf_hub_download
-# --- 1. SETUP & DATA LOADING ---
 REPO_ID = "uyen1109/eth-fraud-gnn-uyenuyen-v3"
 TOKEN = os.getenv("HF_TOKEN")
-print("Loading data from Hugging Face Hub...")
-# Global variables
-df_scores = pd.DataFrame()
-df_edges = pd.DataFrame()
-# 1.1 Tải file điểm số (Scores)
 try:
-    scores_path = hf_hub_download(repo_id=REPO_ID, filename="scores/node_scores_with_labels.csv", repo_type="model", token=TOKEN)
-    df_scores = pd.read_csv(scores_path)
-    if "address" in df_scores.columns:
-        df_scores["address"] = df_scores["address"].astype(str).str.lower().str.strip()
-        df_scores.set_index("address", inplace=True)
-    print(f"✅ Loaded {len(df_scores)} node scores.")
 except Exception as e:
-    print(f"⚠️ Error loading scores: {e}")
-# 1.2 Tải file cạnh (Edges)
 try:
-    edges_path = hf_hub_download(repo_id=REPO_ID, filename="graph/edges_all.csv", repo_type="model", token=TOKEN)
-    # Chỉ tải các cột cần thiết để tiết kiệm RAM
-    df_edges = pd.read_csv(edges_path, usecols=["src", "dst", "edge_type"])
-    df_edges["src"] = df_edges["src"].astype(str).str.lower().str.strip()
-    df_edges["dst"] = df_edges["dst"].astype(str).str.lower().str.strip()
-    print(f"✅ Loaded {len(df_edges)} edges.")
 except Exception as e:
-    print(f"⚠️ Error loading edges: {e}")
-# --- 2. HELPER FUNCTIONS ---
-def get_node_info(address):
-    """Lấy thông tin điểm số và nhãn"""
-    address = address.lower().strip()
-    # Case 1: Có trong bảng điểm (Model đã chấm điểm)
-    if address in df_scores.index:
-        row = df_scores.loc[address]
-        score = float(row.get("prob_criminal", row.get("susp", 0.0)))
-        label_map = {0: "Benign (0)", 1: "Criminal (1)"}
-        label_val = row.get("label", float('nan'))
-        label_str = label_map.get(label_val, "Unknown")
-        return score, label_str, "SCORED"
-    # Case 2: Không có điểm, kiểm tra xem có trong giao dịch không
-    # (Lưu ý: Kiểm tra này hơi chậm nếu df lớn, nhưng chấp nhận được cho demo)
-    is_in_edges = ((df_edges["src"] == address) | (df_edges["dst"] == address)).any()
-    if is_in_edges:
-        return None, "Unknown", "UNSCORED_BUT_FOUND"
-    return None, "Unknown", "NOT_FOUND"
-def draw_ego_graph(address):
-    """Vẽ đồ thị 1-hop"""
-    address = address.lower().strip()
-    # Lọc các giao dịch liên quan
-    subset = df_edges[(df_edges["src"] == address) | (df_edges["dst"] == address)].head(30)
-    if subset.empty:
-        fig, ax = plt.subplots(figsize=(6, 6))
-        ax.text(0.5, 0.5, "No transactions found", ha='center')
-        ax.axis('off')
-        return fig
-    # Tạo đồ thị
-    G = nx.from_pandas_edgelist(subset, source="src", target="dst", edge_attr="edge_type", create_using=nx.DiGraph())
-    pos = nx.spring_layout(G, seed=42, k=0.8)
-    plt.figure(figsize=(8, 8))
-    # Tô màu node
-    node_colors = []
-    node_sizes = []
-    for node in G.nodes():
-        if node == address:
-            node_colors.append("#FF4500") # Target: OrangeRed
-            node_sizes.append(400)
-        else:
-            node_colors.append("#1E90FF") # Neighbor: DodgerBlue
-            node_sizes.append(150)
-    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes, alpha=0.9)
-    nx.draw_networkx_edges(G, pos, alpha=0.4, arrowstyle='->', arrowsize=15, edge_color="gray")
-    # Label ngắn gọn
-    labels = {n: (n[:5] + ".." if n != address else "TARGET") for n in G.nodes()}
-    nx.draw_networkx_labels(G, pos, labels=labels, font_size=9, font_color="black")
-    plt.title(f"Ego Graph: {address[:10]}...\n(Showing top {len(subset)} txs)")
-    plt.axis('off')
-    return plt.gcf()
-# --- 3. CORE LOGIC ---
-def analyze_wallet(address):
-    if not address:
-        return "Please enter an address.", "N/A", None
-    address = address.strip()
-    score, label_str, status = get_node_info(address)
-    # Xử lý kết quả hiển thị
-    if status == "NOT_FOUND":
-        return f"❌ Address {address} not found in any transaction data.", "Unknown", None
-    plot = draw_ego_graph(address)
-    if status == "UNSCORED_BUT_FOUND":
-        return (
-            f"⚠️ **Not Scored via GNN**\n\n"
-            f"This address exists in the transaction list (`edges_all.csv`) but was filtered out during the GNN training graph construction (likely an isolated node or missing features).\n"
-            f"Therefore, the model did not assign a risk score.",
-            "Not Scored",
-            plot
         )
-    # Nếu có điểm (SCORED)
-    risk_level = "LOW 🟢"
-    if score > 0.8: risk_level = "CRITICAL 🔴"
-    elif score > 0.5: risk_level = "HIGH 🟠"
-    elif score > 0.2: risk_level = "MEDIUM 🟡"
-    result_text = (
-        f"### 🎯 Risk Score: {score:.4f}\n"
-        f"**Label:** {label_str}\n"
-        f"**Status:** Analyzed by GraphSAGE\n"
-    )
-    return result_text, risk_level, plot
-# --- 4. UI ---
-with gr.Blocks(title="ETH Fraud Inspector") as demo:
-    gr.Markdown("# 🕵️‍♀️ Ethereum Fraud Inspector (GraphSAGE v3)")
-    gr.Markdown("Investigate Ethereum wallets using Graph Neural Networks. Even if a wallet wasn't scored by the model, we will visualize its transaction history.")
     with gr.Row():
-        with gr.Column(scale=1):
-            inp_addr = gr.Textbox(label="Ethereum Address", placeholder="0x...", lines=1)
-            btn = gr.Button("🔍 Analyze", variant="primary")
-            gr.Markdown("### 💡 Try these addresses:")
-            # Lấy mẫu 1 ví có điểm (Criminal) và 1 ví chỉ có trong edges
-            examples = []
-            if not df_scores.empty:
-                 # Lấy 1 ví criminal
-                crim_example = df_scores[df_scores['label'] == 1].index[0] if 1 in df_scores['label'].values else df_scores.index[0]
-                examples.append(crim_example)
-            gr.Examples(examples=examples, inputs=inp_addr)
-        with gr.Column(scale=2):
-            with gr.Row():
-                lbl_risk = gr.Label(label="Risk Level")
-                out_text = gr.Markdown(label="Analysis Report")
-            out_plot = gr.Plot(label="Transaction Graph")
-    btn.click(fn=analyze_wallet, inputs=inp_addr, outputs=[out_text, lbl_risk, out_plot])
 if __name__ == "__main__":
     demo.launch()

 import gradio as gr
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import SAGEConv, BatchNorm
+import json
 import pandas as pd
+import numpy as np
 import os
 from huggingface_hub import hf_hub_download
+# --- 1. ĐỊNH NGHĨA MODEL ARCHITECTURE ---
+# Phải khớp chính xác với kiến trúc đã dùng để train trong notebook (Cell 16, trang 25)
+class SAGE(nn.Module):
+    def __init__(self, in_dim, h=128, out_dim=2, p_drop=0.3):
+        super().__init__()
+        self.conv1 = SAGEConv(in_dim, h, bias=True)
+        self.bn1 = BatchNorm(h)
+        self.conv2 = SAGEConv(h, h, bias=True)
+        self.bn2 = BatchNorm(h)
+        self.head = nn.Linear(h, out_dim)
+        self.drop = nn.Dropout(p_drop)
+    def forward(self, x, edge_index):
+        # Layer 1
+        x = self.conv1(x, edge_index)
+        x = self.bn1(x)
+        x = F.relu(x)
+        x = self.drop(x)
+        # Layer 2
+        x = self.conv2(x, edge_index)
+        x = self.bn2(x)
+        x = F.relu(x)
+        x = self.drop(x)
+        # Output
+        return self.head(x)
+# --- 2. SETUP & LOAD MODEL ---
 REPO_ID = "uyen1109/eth-fraud-gnn-uyenuyen-v3"
 TOKEN = os.getenv("HF_TOKEN")
+print("⏳ Downloading model artifacts...")
+# 2.1 Tải danh sách Features (để biết thứ tự nhập liệu)
 try:
+    cols_path = hf_hub_download(repo_id=REPO_ID, filename="hf_export/feature_columns.json", token=TOKEN)
+    with open(cols_path, 'r') as f:
+        FEATURE_COLS = json.load(f)
+    print(f"✅ Loaded {len(FEATURE_COLS)} feature columns.")
 except Exception as e:
+    print(f"⚠️ Could not load feature_columns.json. Using default fallback list. Error: {e}")
+    # Fallback danh sách feature dựa trên notebook (Cell 8, 11, 12)
+    FEATURE_COLS = [
+        'out_deg', 'in_deg', 'eth_out_sum', 'eth_in_sum',
+        'unique_dst_cnt', 'unique_src_cnt', 'first_seen_ts', 'last_seen_ts',
+        'pr', 'clust_coef', 'betw'
+    ]
+# 2.2 Tải trọng số Model (pytorch_model.bin)
 try:
+    model_path = hf_hub_download(repo_id=REPO_ID, filename="hf_export/pytorch_model.bin", token=TOKEN)
+    # Khởi tạo model
+    # in_dim phải bằng số lượng feature
+    model = SAGE(in_dim=len(FEATURE_COLS), h=128, out_dim=2, p_drop=0.3)
+    # Load weights (map_location='cpu' để chạy trên không gian không có GPU)
+    state_dict = torch.load(model_path, map_location=torch.device('cpu'))
+    model.load_state_dict(state_dict)
+    model.eval() # Chuyển sang chế độ Inference (tắt Dropout, v.v.)
+    print("✅ Model loaded successfully!")
 except Exception as e:
+    print(f"❌ Critical Error loading model: {e}")
+    model = None
+# --- 3. INFERENCE FUNCTION ---
+def predict_custom_node(*features):
+    if model is None:
+        return "Model not loaded correctly.", "Error"
+    try:
+        # 1. Chuyển list features nhập từ UI thành Tensor
+        # features là một tuple các giá trị
+        feat_values = [float(f) for f in features]
+        x = torch.tensor([feat_values], dtype=torch.float) # Shape: [1, num_features]
+        # 2. Tạo cạnh giả (Dummy Edge Index)
+        # Vì GraphSAGE cần edge_index để chạy, nhưng với 1 node đơn lẻ (Inductive trên node mới),
+        # ta không có thông tin hàng xóm.
+        # Ta truyền vào edge_index rỗng. SAGEConv sẽ hoạt động dựa trên feature của chính node đó (Self-loop logic).
+        edge_index = torch.tensor([[], []], dtype=torch.long)
+        # 3. Forward pass
+        with torch.no_grad():
+            logits = model(x, edge_index)
+            probs = torch.softmax(logits, dim=1)
+            prob_criminal = probs[0][1].item()
+        # 4. Xử lý kết quả
+        label = "CRIMINAL 🔴" if prob_criminal > 0.5 else "BENIGN 🟢"
+        score_percent = f"{prob_criminal * 100:.2f}%"
+        explanation = (
+            f"### Prediction Result\n"
+            f"- **Probability of Fraud:** {score_percent}\n"
+            f"- **Verdict:** {label}\n\n"
+            f"### Debug Info\n"
+            f"- Input Shape: {x.shape}\n"
+            f"- Raw Logits: {logits.numpy()}\n"
+            f"- Model Architecture: GraphSAGE (2 layers, 128 hidden units)"
         )
+        return explanation, label
+    except Exception as e:
+        return f"Error during inference: {str(e)}", "Error"
+# --- 4. GRADIO UI ---
+with gr.Blocks(title="Inductive Fraud Prediction") as demo:
+    gr.Markdown("# 🧠 Inductive GraphSAGE Prediction")
+    gr.Markdown(
+        """
+        Demo này thể hiện tính **Inductive** của mô hình: Bạn có thể nhập thông số của một ví **hoàn toàn mới** (không có trong tập dữ liệu cũ) và mô hình sẽ dự đoán dựa trên những gì nó đã học được.
+        *Lưu ý: Vì nhập liệu thủ công, ta đang mô phỏng node này như một node cô lập (không có thông tin hàng xóm).*
+        """
+    )
+    inputs = []
     with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 1. Nhập Features (Đặc trưng) của Ví")
+            # Tự động tạo ô nhập liệu dựa trên danh sách FEATURE_COLS
+            for col in FEATURE_COLS:
+                # Gợi ý giá trị mặc định để dễ test
+                default_val = 0.0
+                if "ts" in col: default_val = 1600000000 # Timestamp
+                inp = gr.Number(label=col, value=default_val)
+                inputs.append(inp)
+        with gr.Column():
+            gr.Markdown("### 2. Kết quả Dự đoán")
+            btn_predict = gr.Button("Run Inference", variant="primary")
+            lbl_result = gr.Label(label="Prediction")
+            out_log = gr.Markdown()
+    # Nút Clear để reset
+    btn_clear = gr.Button("Clear Inputs")
+    # Sự kiện click
+    btn_predict.click(fn=predict_custom_node, inputs=inputs, outputs=[out_log, lbl_result])
+    # Reset tất cả về 0
+    def clear_fn():
+        return [0.0] * len(inputs)
+    btn_clear.click(fn=clear_fn, inputs=None, outputs=inputs)
 if __name__ == "__main__":
     demo.launch()