Spaces:

uyen1109
/

DemoGraph

Sleeping

App Files Files Community

uyen1109 commited on Nov 20, 2025

Commit

4051c4e

verified ·

1 Parent(s): d4b1511

Upload app.py

Browse files

Files changed (1) hide show

app.py +118 -151

app.py CHANGED Viewed

@@ -36,38 +36,42 @@ class SAGE(nn.Module):
         return self.head(x)
 # ==========================================
-# 2. QUẢN LÝ RESOURCE (DATA & MODEL)
 # ==========================================
 REPO_ID = "uyen1109/eth-fraud-gnn-uyenuyen-v3"
 TOKEN = os.getenv("HF_TOKEN")
-# Danh sách feature mặc định (Fallback) để UI không bị trống nếu lỗi load file
-DEFAULT_FEATURES = [
-    'out_deg', 'in_deg', 'eth_out_sum', 'eth_in_sum',
-    'unique_dst_cnt', 'unique_src_cnt', 'first_seen_ts', 'last_seen_ts',
-    'pr', 'clust_coef', 'betw', 'feat_11', 'feat_12', 'feat_13', 'feat_14'
-]
 GLOBAL_DATA = {
     "model": None,
     "df_scores": pd.DataFrame(),
     "df_edges": pd.DataFrame(),
-    "feature_cols": DEFAULT_FEATURES, # Luôn có giá trị mặc định
     "status": "Initializing..."
 }
 def smart_load_file(filename):
-    """Thử tải file từ repo, ưu tiên hf_export"""
-    paths = [f"hf_export/{filename}", filename]
     for p in paths:
         try:
-            # Thử tải với token trước, nếu lỗi thử không token (public repo)
             return hf_hub_download(repo_id=REPO_ID, filename=p, token=TOKEN)
-        except:
             try:
                 return hf_hub_download(repo_id=REPO_ID, filename=p, token=None)
-            except:
                 continue
     return None
 def load_resources():
@@ -75,72 +79,83 @@ def load_resources():
     print("⏳ Starting Resource Loading...")
     # 1. Load Scores
-    try:
-        path = smart_load_file("scores/node_scores_with_labels.csv") or smart_load_file("node_scores_with_labels.csv")
-        if path:
             df = pd.read_csv(path)
-            # Chuẩn hóa cột địa chỉ: tìm cột chứa chữ 'address' hoặc 'id'
-            addr_col = next((c for c in df.columns if 'addr' in c.lower() or 'id' in c.lower()), df.columns[0])
             df[addr_col] = df[addr_col].astype(str).str.lower().str.strip()
-            # Đặt index là địa chỉ để tra cứu nhanh
             df.set_index(addr_col, inplace=True)
             GLOBAL_DATA["df_scores"] = df
-            msg = f"✅ Loaded Scores: {len(df)} rows (Index col: {addr_col})"
-            print(msg)
-            logs.append(msg)
-        else:
-            logs.append("⚠️ Scores CSV not found.")
-    except Exception as e:
-        logs.append(f"❌ Error loading scores: {str(e)}")
     # 2. Load Edges
-    try:
-        path = smart_load_file("graph/edges_all.csv") or smart_load_file("edges_all.csv")
-        if path:
-            df = pd.read_csv(path, usecols=["src", "dst", "edge_type"])
-            df["src"] = df["src"].astype(str).str.lower().str.strip()
-            df["dst"] = df["dst"].astype(str).str.lower().str.strip()
-            GLOBAL_DATA["df_edges"] = df
             print("✅ Loaded Edges.")
-    except:
-        print("⚠️ Edges CSV not found (Graph viz will be disabled).")
-    # 3. Load Model
-    try:
-        model_path = smart_load_file("pytorch_model.bin")
-        if model_path:
             state_dict = torch.load(model_path, map_location=torch.device('cpu'))
-            # Tự động phát hiện input dim
             detected_dim = state_dict['conv1.lin_l.weight'].shape[1]
             model = SAGE(in_dim=detected_dim, h=128, out_dim=2, p_drop=0.3)
             model.load_state_dict(state_dict)
             model.eval()
             GLOBAL_DATA["model"] = model
-            # Cập nhật danh sách feature cột nếu có
             cols_path = smart_load_file("feature_columns.json")
             if cols_path:
                 with open(cols_path, 'r') as f:
                     cols = json.load(f)
-                    # Điều chỉnh cho khớp detected_dim
-                    if len(cols) >= detected_dim:
-                        GLOBAL_DATA["feature_cols"] = cols[:detected_dim]
-                    else:
-                        GLOBAL_DATA["feature_cols"] = cols + [f"F_{i}" for i in range(len(cols), detected_dim)]
             else:
-                # Nếu không có file json, tạo dummy name cho đủ số lượng
                 GLOBAL_DATA["feature_cols"] = [f"Feature_{i}" for i in range(detected_dim)]
-            logs.append(f"✅ Model Loaded (Input Dim: {detected_dim})")
-        else:
-            logs.append("❌ pytorch_model.bin not found.")
-    except Exception as e:
-        logs.append(f"❌ Model Load Error: {str(e)}")
     GLOBAL_DATA["status"] = "\n".join(logs)
-# Chạy load ngay lập tức
 load_resources()
 # ==========================================
@@ -151,145 +166,97 @@ def draw_graph(address):
     df = GLOBAL_DATA["df_edges"]
     if df.empty: return None
-    # Tìm giao dịch liên quan (cả in và out)
-    subset = df[(df["src"] == address) | (df["dst"] == address)].head(30)
     if subset.empty: return None
     G = nx.from_pandas_edgelist(subset, "src", "dst", edge_attr="edge_type", create_using=nx.DiGraph())
-    plt.figure(figsize=(8, 8))
-    pos = nx.spring_layout(G, k=0.8, seed=42)
-    # Màu sắc: Target màu đỏ, Neighbor màu xanh
     node_colors = ["#FF4500" if n == address else "#1E90FF" for n in G.nodes()]
-    node_sizes = [400 if n == address else 150 for n in G.nodes()]
-    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=node_sizes, alpha=0.9)
     nx.draw_networkx_edges(G, pos, alpha=0.3, arrowstyle='->')
-    # Label ngắn gọn
-    nx.draw_networkx_labels(G, pos, labels={n: n[:4] + ".." for n in G.nodes()}, font_size=8)
     plt.title(f"Ego Graph: {address[:6]}...")
     plt.axis('off')
     return plt.gcf()
 def lookup_handler(address):
-    # Chuẩn hóa input cực mạnh để khớp với index CSV
-    raw_addr = str(address).strip().lower()
     df = GLOBAL_DATA["df_scores"]
-    found_row = None
-    # Thử các trường hợp khớp
-    if raw_addr in df.index:
-        found_row = df.loc[raw_addr]
-    elif raw_addr.startswith("0x") and raw_addr[2:] in df.index: # Thử bỏ 0x
-        found_row = df.loc[raw_addr[2:]]
-    elif ("0x" + raw_addr) in df.index: # Thử thêm 0x
-        found_row = df.loc["0x" + raw_addr]
-    if found_row is not None:
-        # Lấy điểm số
-        try:
-            # Xử lý trường hợp duplicate index hoặc series
-            if isinstance(found_row, pd.DataFrame):
-                found_row = found_row.iloc[0]
-            score = float(found_row.get("prob_criminal", found_row.get("susp", 0.0)))
-            label = int(found_row.get("label", -1))
-            risk = "CRITICAL 🔴" if score > 0.8 else ("HIGH 🟠" if score > 0.5 else "LOW 🟢")
-            label_text = "Unknown"
-            if label == 1: label_text = "Criminal (True Label)"
-            elif label == 0: label_text = "Benign (True Label)"
-            info = (
-                f"### ✅ Address Found\n"
-                f"- **Risk Score:** {score:.4f}\n"
-                f"- **Risk Level:** {risk}\n"
-                f"- **Dataset Label:** {label_text}"
-            )
-            return info, draw_graph(raw_addr)
-        except Exception as e:
-            return f"Error parsing row: {e}", None
-    # Nếu không tìm thấy
     return (
-        f"### ❌ Not Found in Database\n"
-        f"Address `{raw_addr}` does not exist in `node_scores_with_labels.csv`.\n"
-        f"Please verify the address or use the **Inductive Prediction** tab.",
         None
     )
 def predict_handler(*features):
-    model = GLOBAL_DATA["model"]
-    if model is None:
-        return f"❌ Model failed to load properly.\n\nLogs:\n{GLOBAL_DATA['status']}"
     try:
         x = torch.tensor([[float(f) for f in features]], dtype=torch.float)
         edge_index = torch.tensor([[], []], dtype=torch.long)
         with torch.no_grad():
-            logits = model(x, edge_index)
-            prob = torch.softmax(logits, dim=1)[0][1].item()
-        verdict = "CRIMINAL 🔴" if prob > 0.5 else "BENIGN 🟢"
-        return (
-            f"### 🧠 Prediction Result\n"
-            f"- **Fraud Probability:** {prob*100:.2f}%\n"
-            f"- **Verdict:** {verdict}"
-        )
     except Exception as e:
-        return f"Prediction Error: {str(e)}"
 # ==========================================
 # 4. UI SETUP
 # ==========================================
 with gr.Blocks(title="ETH Fraud GNN") as demo:
-    gr.Markdown("# 🕵️‍♀️ Ethereum Fraud GNN (Hybrid V3)")
-    # Hiển thị trạng thái load hệ thống (ẩn đi nếu muốn gọn)
-    with gr.Accordion("System Status / Logs", open=False):
-        gr.Markdown(GLOBAL_DATA["status"])
     with gr.Tabs():
-        # TAB 1: LOOKUP
-        with gr.TabItem("🔍 Lookup Address"):
             with gr.Row():
-                inp_addr = gr.Textbox(label="Enter Address", placeholder="0x...")
-                btn_search = gr.Button("Search", variant="primary")
             with gr.Row():
-                out_info = gr.Markdown()
-                out_plot = gr.Plot()
-            btn_search.click(lookup_handler, inputs=inp_addr, outputs=[out_info, out_plot])
-        # TAB 2: INDUCTIVE
-        with gr.TabItem("🧠 Inductive Prediction"):
-            gr.Markdown("### Predict New Address")
-            gr.Markdown("Enter extracted features manually:")
-            # TẠO INPUT ĐỘNG: Dù model có load được hay không, UI vẫn sẽ render dựa trên GLOBAL_DATA["feature_cols"]
-            # Điều này fix lỗi giao diện trống trơn.
-            feat_inputs = []
             cols = GLOBAL_DATA["feature_cols"]
-            # Chia layout thành 3 cột
             with gr.Row():
-                col1, col2, col3 = gr.Column(), gr.Column(), gr.Column()
-                # Phân phối input vào 3 cột
                 for i, c in enumerate(cols):
-                    target_col = col1 if i % 3 == 0 else (col2 if i % 3 == 1 else col3)
-                    with target_col:
-                        feat_inputs.append(gr.Number(label=c, value=0.0))
-            btn_predict = gr.Button("Run Inference", variant="primary")
-            out_pred = gr.Markdown()
-            btn_predict.click(predict_handler, inputs=feat_inputs, outputs=out_pred)
 if __name__ == "__main__":
     demo.launch()

         return self.head(x)
 # ==========================================
+# 2. QUẢN LÝ RESOURCE
 # ==========================================
 REPO_ID = "uyen1109/eth-fraud-gnn-uyenuyen-v3"
 TOKEN = os.getenv("HF_TOKEN")
 GLOBAL_DATA = {
     "model": None,
     "df_scores": pd.DataFrame(),
     "df_edges": pd.DataFrame(),
+    "feature_cols": [],
     "status": "Initializing..."
 }
 def smart_load_file(filename):
+    """
+    Ưu tiên tìm ở root (theo hình ảnh user cung cấp).
+    Thử có token -> không token.
+    """
+    # Đảo ngược thứ tự: Tìm ở root trước vì hình ảnh cho thấy file ở root
+    paths = [filename, f"hf_export/{filename}"]
+    errs = []
     for p in paths:
         try:
+            # Cách 1: Dùng Token (cho Private Repo hoặc LFS)
             return hf_hub_download(repo_id=REPO_ID, filename=p, token=TOKEN)
+        except Exception as e1:
+            errs.append(f"Token fail {p}: {e1}")
             try:
+                # Cách 2: Không dùng Token (cho Public Repo)
                 return hf_hub_download(repo_id=REPO_ID, filename=p, token=None)
+            except Exception as e2:
+                errs.append(f"No-Token fail {p}: {e2}")
                 continue
+    print(f"⚠️ Failed to load {filename}. Details: {errs}")
     return None
 def load_resources():
     print("⏳ Starting Resource Loading...")
     # 1. Load Scores
+    path = smart_load_file("node_scores_with_labels.csv")
+    if path:
+        try:
             df = pd.read_csv(path)
+            # Tìm cột địa chỉ linh hoạt
+            cols_lower = [c.lower() for c in df.columns]
+            if "address" in cols_lower:
+                addr_col = df.columns[cols_lower.index("address")]
+            else:
+                addr_col = df.columns[0]
             df[addr_col] = df[addr_col].astype(str).str.lower().str.strip()
             df.set_index(addr_col, inplace=True)
             GLOBAL_DATA["df_scores"] = df
+            logs.append(f"✅ Loaded Scores: {len(df)} rows.")
+        except Exception as e:
+            logs.append(f"❌ Error parsing scores csv: {e}")
+    else:
+        logs.append("❌ 'node_scores_with_labels.csv' download failed.")
     # 2. Load Edges
+    path = smart_load_file("edges_all.csv")
+    if path:
+        try:
+            GLOBAL_DATA["df_edges"] = pd.read_csv(path, usecols=["src", "dst", "edge_type"])
+            # Chuẩn hóa nhẹ để vẽ hình
+            GLOBAL_DATA["df_edges"]["src"] = GLOBAL_DATA["df_edges"]["src"].astype(str).str.lower().str.strip()
+            GLOBAL_DATA["df_edges"]["dst"] = GLOBAL_DATA["df_edges"]["dst"].astype(str).str.lower().str.strip()
             print("✅ Loaded Edges.")
+        except Exception as e:
+            print(f"⚠️ Edge parsing error: {e}")
+    else:
+        print("⚠️ 'edges_all.csv' download failed.")
+    # 3. Load Model & Features
+    model_path = smart_load_file("pytorch_model.bin")
+    if model_path:
+        try:
             state_dict = torch.load(model_path, map_location=torch.device('cpu'))
             detected_dim = state_dict['conv1.lin_l.weight'].shape[1]
             model = SAGE(in_dim=detected_dim, h=128, out_dim=2, p_drop=0.3)
             model.load_state_dict(state_dict)
             model.eval()
             GLOBAL_DATA["model"] = model
+            logs.append(f"✅ Model Loaded (Input Dim: {detected_dim})")
+            # Load Feature Columns
             cols_path = smart_load_file("feature_columns.json")
             if cols_path:
                 with open(cols_path, 'r') as f:
                     cols = json.load(f)
+                # Khớp số lượng feature
+                if len(cols) == detected_dim:
+                    GLOBAL_DATA["feature_cols"] = cols
+                elif len(cols) > detected_dim:
+                    GLOBAL_DATA["feature_cols"] = cols[:detected_dim]
+                else:
+                    GLOBAL_DATA["feature_cols"] = cols + [f"Feat_{i}" for i in range(len(cols), detected_dim)]
             else:
                 GLOBAL_DATA["feature_cols"] = [f"Feature_{i}" for i in range(detected_dim)]
+                logs.append("⚠️ Using Dummy Feature Names (json missing)")
+        except Exception as e:
+            logs.append(f"❌ Model Init Error: {e}")
+    else:
+        logs.append("❌ 'pytorch_model.bin' NOT FOUND. Please upload it to Repo Root.")
+        # Fallback feature list để UI không bị lỗi (dựa trên log của bạn)
+        GLOBAL_DATA["feature_cols"] = [
+            'out_deg', 'in_deg', 'eth_out_sum', 'eth_in_sum',
+            'unique_dst_cnt', 'unique_src_cnt', 'first_seen_ts', 'last_seen_ts',
+            'pr', 'clust_coef', 'betw', 'feat_11', 'feat_12', 'feat_13', 'feat_14'
+        ]
     GLOBAL_DATA["status"] = "\n".join(logs)
+    print(GLOBAL_DATA["status"])
 load_resources()
 # ==========================================
     df = GLOBAL_DATA["df_edges"]
     if df.empty: return None
+    subset = df[(df["src"] == address) | (df["dst"] == address)].head(20)
     if subset.empty: return None
     G = nx.from_pandas_edgelist(subset, "src", "dst", edge_attr="edge_type", create_using=nx.DiGraph())
+    plt.figure(figsize=(6, 6))
+    pos = nx.spring_layout(G, k=0.9, seed=42)
     node_colors = ["#FF4500" if n == address else "#1E90FF" for n in G.nodes()]
+    nx.draw_networkx_nodes(G, pos, node_color=node_colors, node_size=200, alpha=0.9)
     nx.draw_networkx_edges(G, pos, alpha=0.3, arrowstyle='->')
+    nx.draw_networkx_labels(G, pos, labels={n: n[:4] for n in G.nodes()}, font_size=8)
     plt.title(f"Ego Graph: {address[:6]}...")
     plt.axis('off')
     return plt.gcf()
 def lookup_handler(address):
+    if not address: return "Please enter an address.", None
+    raw_addr = str(address).strip().lower()
     df = GLOBAL_DATA["df_scores"]
+    # Logic tìm kiếm mạnh mẽ hơn
+    found = None
+    if not df.empty:
+        if raw_addr in df.index:
+            found = df.loc[raw_addr]
+        elif raw_addr.replace("0x", "") in df.index:
+            found = df.loc[raw_addr.replace("0x", "")]
+    if found is not None:
+        if isinstance(found, pd.DataFrame): found = found.iloc[0]
+        score = float(found.get("prob_criminal", found.get("susp", 0.0)))
+        return (
+            f"### ✅ Found\n**Score:** {score:.4f}\n**Status:** {'CRITICAL 🔴' if score > 0.5 else 'BENIGN 🟢'}",
+            draw_graph(raw_addr)
+        )
     return (
+        f"### ❌ Not Found\nAddress `{raw_addr}` not in database.\nStatus Logs:\n{GLOBAL_DATA['status']}",
         None
     )
 def predict_handler(*features):
+    if GLOBAL_DATA["model"] is None:
+        return f"❌ Model Error: pytorch_model.bin missing.\nPlease check 'System Status' below."
     try:
         x = torch.tensor([[float(f) for f in features]], dtype=torch.float)
         edge_index = torch.tensor([[], []], dtype=torch.long)
         with torch.no_grad():
+            prob = torch.softmax(GLOBAL_DATA["model"](x, edge_index), dim=1)[0][1].item()
+        return f"### Result\n**Fraud Probability:** {prob*100:.2f}%"
     except Exception as e:
+        return f"Error: {e}"
 # ==========================================
 # 4. UI SETUP
 # ==========================================
 with gr.Blocks(title="ETH Fraud GNN") as demo:
+    gr.Markdown("# 🕵️‍♀️ Ethereum Fraud Inspector")
+    with gr.Accordion("System Status (Click to Debug)", open=False):
+        gr.Markdown(lambda: GLOBAL_DATA["status"]) # Dynamic update
     with gr.Tabs():
+        with gr.TabItem("🔍 Lookup"):
             with gr.Row():
+                inp = gr.Textbox(label="Address")
+                btn = gr.Button("Search", variant="primary")
             with gr.Row():
+                out_txt = gr.Markdown()
+                out_plt = gr.Plot()
+            btn.click(lookup_handler, inputs=inp, outputs=[out_txt, out_plt])
+        with gr.TabItem("🧠 Predict"):
+            gr.Markdown("### Inductive Prediction (Simulated)")
+            # Render input dựa trên feature cols đã load
             cols = GLOBAL_DATA["feature_cols"]
+            inputs = []
             with gr.Row():
+                # Chia cột tự động
+                c1, c2 = gr.Column(), gr.Column()
                 for i, c in enumerate(cols):
+                    with (c1 if i % 2 == 0 else c2):
+                        inputs.append(gr.Number(label=c, value=0.0))
+            btn2 = gr.Button("Predict", variant="primary")
+            out2 = gr.Markdown()
+            btn2.click(predict_handler, inputs=inputs, outputs=out2)
 if __name__ == "__main__":
     demo.launch()