File size: 9,825 Bytes
2d320dd
466a439
 
 
 
2d320dd
466a439
672f23f
 
2d320dd
672f23f
2d320dd
 
672f23f
d4b1511
672f23f
466a439
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672f23f
4051c4e
672f23f
d6f6917
 
2d320dd
672f23f
 
 
 
4051c4e
d4b1511
672f23f
 
 
4051c4e
 
 
 
 
 
 
 
d4b1511
672f23f
4051c4e
672f23f
4051c4e
 
d4b1511
4051c4e
d4b1511
4051c4e
 
d4b1511
4051c4e
 
672f23f
 
 
d4b1511
 
672f23f
d4b1511
4051c4e
 
 
672f23f
4051c4e
 
 
 
 
 
 
d4b1511
 
672f23f
4051c4e
 
 
 
 
672f23f
d4b1511
4051c4e
 
 
 
 
 
 
d4b1511
4051c4e
 
 
 
672f23f
4051c4e
 
 
 
672f23f
 
 
 
 
 
 
4051c4e
672f23f
4051c4e
672f23f
 
 
 
4051c4e
 
 
 
 
 
 
672f23f
 
4051c4e
d4b1511
4051c4e
 
 
 
 
 
 
 
 
 
d4b1511
 
4051c4e
672f23f
 
 
 
d4b1511
672f23f
 
d4b1511
672f23f
 
 
4051c4e
672f23f
 
 
4051c4e
 
466a439
d4b1511
4051c4e
d4b1511
4051c4e
d4b1511
 
672f23f
 
 
d4b1511
4051c4e
d4b1511
4051c4e
672f23f
 
4051c4e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d4b1511
4051c4e
d4b1511
672f23f
 
d4b1511
4051c4e
 
d6f6917
466a439
d4b1511
466a439
 
4051c4e
 
466a439
4051c4e
2d320dd
672f23f
d4b1511
672f23f
d4b1511
4051c4e
2d320dd
4051c4e
 
d4b1511
672f23f
4051c4e
672f23f
4051c4e
 
d4b1511
4051c4e
 
 
672f23f
4051c4e
 
672f23f
4051c4e
d4b1511
4051c4e
d4b1511
4051c4e
 
d4b1511
4051c4e
 
d4b1511
4051c4e
 
 
2d320dd
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
import gradio as gr
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import SAGEConv, BatchNorm
import pandas as pd
import numpy as np
import networkx as nx
import matplotlib.pyplot as plt
import os
import json
from huggingface_hub import hf_hub_download

# ==========================================
# 1. ĐỊNH NGHĨA MODEL
# ==========================================
class SAGE(nn.Module):
    def __init__(self, in_dim, h=128, out_dim=2, p_drop=0.3):
        super().__init__()
        self.conv1 = SAGEConv(in_dim, h, bias=True)
        self.bn1 = BatchNorm(h)
        self.conv2 = SAGEConv(h, h, bias=True)
        self.bn2 = BatchNorm(h)
        self.head = nn.Linear(h, out_dim)
        self.drop = nn.Dropout(p_drop)

    def forward(self, x, edge_index):
        x = self.conv1(x, edge_index)
        x = self.bn1(x)
        x = F.relu(x)
        x = self.drop(x)
        x = self.conv2(x, edge_index)
        x = self.bn2(x)
        x = F.relu(x)
        x = self.drop(x)
        return self.head(x)

# ==========================================
# 2. QUẢN LÝ RESOURCE
# ==========================================
REPO_ID = "uyen1109/eth-fraud-gnn-uyenuyen-v3"
TOKEN = os.getenv("HF_TOKEN")

GLOBAL_DATA = {
    "model": None,
    "df_scores": pd.DataFrame(),
    "df_edges": pd.DataFrame(),
    "feature_cols": [],
    "status": "Initializing..."
}

def smart_load_file(filename):
    """
    Ưu tiên tìm ở root (theo hình ảnh user cung cấp).
    Thử có token -> không token.
    """
    # Đảo ngược thứ tự: Tìm ở root trước vì hình ảnh cho thấy file ở root
    paths = [filename, f"hf_export/{filename}"] 
    
    errs = []
    for p in paths:
        try:
            # Cách 1: Dùng Token (cho Private Repo hoặc LFS)
            return hf_hub_download(repo_id=REPO_ID, filename=p, token=TOKEN)
        except Exception as e1:
            errs.append(f"Token fail {p}: {e1}")
            try:
                # Cách 2: Không dùng Token (cho Public Repo)
                return hf_hub_download(repo_id=REPO_ID, filename=p, token=None)
            except Exception as e2:
                errs.append(f"No-Token fail {p}: {e2}")
                continue
    
    print(f"⚠️ Failed to load {filename}. Details: {errs}")
    return None

def load_resources():
    logs = []
    print("⏳ Starting Resource Loading...")
    
    # 1. Load Scores
    path = smart_load_file("node_scores_with_labels.csv")
    if path:
        try:
            df = pd.read_csv(path)
            # Tìm cột địa chỉ linh hoạt
            cols_lower = [c.lower() for c in df.columns]
            if "address" in cols_lower:
                addr_col = df.columns[cols_lower.index("address")]
            else:
                addr_col = df.columns[0]
            
            df[addr_col] = df[addr_col].astype(str).str.lower().str.strip()
            df.set_index(addr_col, inplace=True)
            GLOBAL_DATA["df_scores"] = df
            logs.append(f"✅ Loaded Scores: {len(df)} rows.")
        except Exception as e:
            logs.append(f"❌ Error parsing scores csv: {e}")
    else:
        logs.append("❌ 'node_scores_with_labels.csv' download failed.")

    # 2. Load Edges
    path = smart_load_file("edges_all.csv")
    if path:
        try:
            GLOBAL_DATA["df_edges"] = pd.read_csv(path, usecols=["src", "dst", "edge_type"])
            # Chuẩn hóa nhẹ để vẽ hình
            GLOBAL_DATA["df_edges"]["src"] = GLOBAL_DATA["df_edges"]["src"].astype(str).str.lower().str.strip()
            GLOBAL_DATA["df_edges"]["dst"] = GLOBAL_DATA["df_edges"]["dst"].astype(str).str.lower().str.strip()
            print("✅ Loaded Edges.")
        except Exception as e:
            print(f"⚠️ Edge parsing error: {e}")
    else:
        print("⚠️ 'edges_all.csv' download failed.")

    # 3. Load Model & Features
    model_path = smart_load_file("pytorch_model.bin")
    if model_path:
        try:
            state_dict = torch.load(model_path, map_location=torch.device('cpu'))
            detected_dim = state_dict['conv1.lin_l.weight'].shape[1]
            
            model = SAGE(in_dim=detected_dim, h=128, out_dim=2, p_drop=0.3)
            model.load_state_dict(state_dict)
            model.eval()
            GLOBAL_DATA["model"] = model
            logs.append(f"✅ Model Loaded (Input Dim: {detected_dim})")
            
            # Load Feature Columns
            cols_path = smart_load_file("feature_columns.json")
            if cols_path:
                with open(cols_path, 'r') as f:
                    cols = json.load(f)
                # Khớp số lượng feature
                if len(cols) == detected_dim:
                    GLOBAL_DATA["feature_cols"] = cols
                elif len(cols) > detected_dim:
                    GLOBAL_DATA["feature_cols"] = cols[:detected_dim]
                else:
                    GLOBAL_DATA["feature_cols"] = cols + [f"Feat_{i}" for i in range(len(cols), detected_dim)]
            else:
                GLOBAL_DATA["feature_cols"] = [f"Feature_{i}" for i in range(detected_dim)]
                logs.append("⚠️ Using Dummy Feature Names (json missing)")
                
        except Exception as e:
            logs.append(f"❌ Model Init Error: {e}")
    else:
        logs.append("❌ 'pytorch_model.bin' NOT FOUND. Please upload it to Repo Root.")
        # Fallback feature list để UI không bị lỗi (dựa trên log của bạn)
        GLOBAL_DATA["feature_cols"] = [
            'out_deg', 'in_deg', 'eth_out_sum', 'eth_in_sum', 
            'unique_dst_cnt', 'unique_src_cnt', 'first_seen_ts', 'last_seen_ts',
            'pr', 'clust_coef', 'betw', 'feat_11', 'feat_12', 'feat_13', 'feat_14'
        ]

    GLOBAL_DATA["status"] = "\n".join(logs)
    print(GLOBAL_DATA["status"])

load_resources()

# ==========================================
# 3. LOGIC XỬ LÝ
# ==========================================

def draw_graph(address):
    df = GLOBAL_DATA["df_edges"]
    if df.empty: return None
    
    subset = df[(df["src"] == address) | (df["dst"] == address)].head(20)
    if subset.empty: return None

    G = nx.from_pandas_edgelist(subset, "src", "dst", edge_attr="edge_type", create_using=nx.DiGraph())
    plt.figure(figsize=(6, 6))
    pos = nx.spring_layout(G, k=0.9, seed=42)
    
    node_colors = ["#FF4500" if n == address else "#1E90FF" for n in G.nodes()]
    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=200, alpha=0.9)
    nx.draw_networkx_edges(G, pos, alpha=0.3, arrowstyle='->')
    nx.draw_networkx_labels(G, pos, labels={n: n[:4] for n in G.nodes()}, font_size=8)
    
    plt.title(f"Ego Graph: {address[:6]}...")
    plt.axis('off')
    return plt.gcf()

def lookup_handler(address):
    if not address: return "Please enter an address.", None
    
    raw_addr = str(address).strip().lower()
    df = GLOBAL_DATA["df_scores"]
    
    # Logic tìm kiếm mạnh mẽ hơn
    found = None
    if not df.empty:
        if raw_addr in df.index:
            found = df.loc[raw_addr]
        elif raw_addr.replace("0x", "") in df.index:
            found = df.loc[raw_addr.replace("0x", "")]
    
    if found is not None:
        if isinstance(found, pd.DataFrame): found = found.iloc[0]
        score = float(found.get("prob_criminal", found.get("susp", 0.0)))
        return (
            f"### ✅ Found\n**Score:** {score:.4f}\n**Status:** {'CRITICAL 🔴' if score > 0.5 else 'BENIGN 🟢'}",
            draw_graph(raw_addr)
        )
    
    return (
        f"### ❌ Not Found\nAddress `{raw_addr}` not in database.\nStatus Logs:\n{GLOBAL_DATA['status']}", 
        None
    )

def predict_handler(*features):
    if GLOBAL_DATA["model"] is None:
        return f"❌ Model Error: pytorch_model.bin missing.\nPlease check 'System Status' below."
    
    try:
        x = torch.tensor([[float(f) for f in features]], dtype=torch.float)
        edge_index = torch.tensor([[], []], dtype=torch.long)
        with torch.no_grad():
            prob = torch.softmax(GLOBAL_DATA["model"](x, edge_index), dim=1)[0][1].item()
        return f"### Result\n**Fraud Probability:** {prob*100:.2f}%"
    except Exception as e:
        return f"Error: {e}"

# ==========================================
# 4. UI SETUP
# ==========================================
with gr.Blocks(title="ETH Fraud GNN") as demo:
    gr.Markdown("# 🕵️‍♀️ Ethereum Fraud Inspector")
    
    with gr.Accordion("System Status (Click to Debug)", open=False):
        gr.Markdown(lambda: GLOBAL_DATA["status"]) # Dynamic update

    with gr.Tabs():
        with gr.TabItem("🔍 Lookup"):
            with gr.Row():
                inp = gr.Textbox(label="Address")
                btn = gr.Button("Search", variant="primary")
            with gr.Row():
                out_txt = gr.Markdown()
                out_plt = gr.Plot()
            btn.click(lookup_handler, inputs=inp, outputs=[out_txt, out_plt])

        with gr.TabItem("🧠 Predict"):
            gr.Markdown("### Inductive Prediction (Simulated)")
            
            # Render input dựa trên feature cols đã load
            cols = GLOBAL_DATA["feature_cols"]
            inputs = []
            with gr.Row():
                # Chia cột tự động
                c1, c2 = gr.Column(), gr.Column()
                for i, c in enumerate(cols):
                    with (c1 if i % 2 == 0 else c2):
                        inputs.append(gr.Number(label=c, value=0.0))
            
            btn2 = gr.Button("Predict", variant="primary")
            out2 = gr.Markdown()
            btn2.click(predict_handler, inputs=inputs, outputs=out2)

if __name__ == "__main__":
    demo.launch()