Spaces:

waltertaya
/

aml_app

Sleeping

App Files Files Community

waltertaya commited on Mar 7

Commit

a29c713

verified ·

1 Parent(s): 1e4e13b

Added files

Browse files

Files changed (3) hide show

README.md +69 -14
export_graph_state.py +63 -0
model.py +46 -0

README.md CHANGED Viewed

@@ -1,14 +1,69 @@
----
-title: Aml App
-emoji: 🐠
-colorFrom: purple
-colorTo: red
-sdk: gradio
-sdk_version: 6.9.0
-app_file: app.py
-pinned: false
-license: mit
-short_description: AML Detect app
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+# AML Detection — Gradio App
+GraphSAGE-based Anti-Money Laundering transaction classifier.
+## File Structure
+```
+aml_app/
+├── app.py                  # Gradio UI
+├── inference.py            # Feature engineering + inference engine
+├── model.py                # EdgeGNN architecture (matches training notebook exactly)
+├── requirements.txt
+├── export_graph_state.py   # Run in notebook to export artifacts
+└── artifacts/              # ← place your trained files here
+    ├── best_model.pt       # trained model weights
+    ├── config.json         # training config (threshold, hidden_dim, etc.)
+    └── graph_state.pt      # optional: historical graph for richer predictions
+```
+## Quick Start
+### 1. Install dependencies
+```bash
+pip install -r requirements.txt
+# PyTorch Geometric also needs:
+pip install torch-scatter torch-sparse -f https://data.pyg.org/whl/torch-2.1.0+cpu.html
+```
+### 2. Export artifacts from your notebook
+After training, run `export_graph_state.py` inside your Kaggle/Lightning AI notebook:
+```python
+exec(open('export_graph_state.py').read())
+```
+Then download the `artifacts/` folder and place it next to `app.py`.
+### 3. Run
+```bash
+python app.py
+# → http://localhost:7860
+```
+## Running without trained weights (Demo Mode)
+The app runs in **demo mode** if `artifacts/best_model.pt` is missing —
+it uses random weights so the interface is fully functional but predictions
+are meaningless. Good for testing the UI.
+## Inference Modes
+| Mode | When | Quality |
+|---|---|---|
+| `graph-lookup` | Account exists in `graph_state.pt` | Best — uses full neighbourhood |
+| `cold-start` | New account / no `graph_state.pt` | Reduced — single edge only |
+## Environment Variables
+| Variable | Default | Description |
+|---|---|---|
+| `MODEL_PATH` | `artifacts/best_model.pt` | Path to model weights |
+| `CONFIG_PATH` | `artifacts/config.json` | Path to config JSON |
+| `GRAPH_PATH` | `artifacts/graph_state.pt` | Path to graph state |
+| `PORT` | `7860` | Server port |
+## Deploying to Hugging Face Spaces
+1. Create a new Space (Gradio SDK)
+2. Upload all files in this directory
+3. Upload your `artifacts/` folder
+4. The Space will auto-install requirements and launch `app.py`

export_graph_state.py ADDED Viewed

	@@ -0,0 +1,63 @@

+"""
+export_graph_state.py
+─────────────────────
+Run this inside your Kaggle / Lightning AI notebook AFTER Cell 7
+to export everything the Gradio app needs for full graph-lookup inference.
+Usage (paste into a new notebook cell after c07-graph):
+    %run export_graph_state.py
+Or inline:
+    exec(open('export_graph_state.py').read())
+"""
+import os, torch
+from pathlib import Path
+EXPORT_DIR = Path(os.environ.get("EXPORT_DIR", "/kaggle/working/artifacts/aml-gnn"))
+EXPORT_DIR.mkdir(parents=True, exist_ok=True)
+# ── 1. Best model weights ─────────────────────────────────────────────────────
+#    (already saved by c09-train if you ran it — just copy)
+model_src = EXPORT_DIR / "best_model.pt"
+if not model_src.exists() and best_state is not None:
+    torch.save(best_state, model_src)
+    print(f"Saved model weights → {model_src}")
+else:
+    print(f"Model weights already at {model_src}")
+# ── 2. Graph state (for graph-lookup inference) ───────────────────────────────
+graph_path = EXPORT_DIR / "graph_state.pt"
+torch.save({
+    "data":          data,            # PyG Data object (node_x, edge_index, edge_attr)
+    "account_to_id": account_to_id,  # dict: account_str → node_id int
+    "edge_columns":  list(edge_feat_df.columns) if 'edge_feat_df' in dir() else [],
+}, graph_path)
+print(f"Saved graph state → {graph_path}")
+print(f"  Nodes : {data.x.shape[0]:,}")
+print(f"  Edges : {data.edge_index.shape[1]:,}")
+# ── 3. Config ─────────────────────────────────────────────────────────────────
+import json
+from datetime import datetime, timezone
+config_path = EXPORT_DIR / "config.json"
+cfg = {
+    "model_class"    : "EdgeGNN",
+    "in_dim"         : int(data.x.shape[1]),
+    "edge_dim"       : int(data.edge_attr.shape[1]),
+    "hidden_dim"     : int(HIDDEN_DIM),
+    "dropout"        : float(DROPOUT),
+    "best_threshold" : float(best_thr),
+    "use_focal_loss" : bool(USE_FOCAL_LOSS),
+    "focal_gamma"    : float(FOCAL_GAMMA),
+    "sample_frac"    : float(SAMPLE_FRAC),
+    "timestamp_utc"  : datetime.now(timezone.utc).isoformat(),
+}
+config_path.write_text(json.dumps(cfg, indent=2))
+print(f"Saved config → {config_path}")
+print("\n✅  Export complete. Copy the artifacts/ folder to your Gradio app directory.")
+print(f"   {EXPORT_DIR}/")
+print(f"   ├── best_model.pt")
+print(f"   ├── graph_state.pt")
+print(f"   └── config.json")

model.py ADDED Viewed

	@@ -0,0 +1,46 @@

+"""
+model.py — EdgeGNN definition, exactly matching the training notebook.
+"""
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from torch_geometric.nn import SAGEConv
+class EdgeGNN(nn.Module):
+    """2-layer GraphSAGE node encoder + MLP edge classifier."""
+    def __init__(self, in_dim: int, edge_dim: int, hidden_dim: int = 64, dropout: float = 0.2):
+        super().__init__()
+        self.conv1   = SAGEConv(in_dim, hidden_dim)
+        self.conv2   = SAGEConv(hidden_dim, hidden_dim)
+        self.dropout = dropout
+        self.edge_mlp = nn.Sequential(
+            nn.Linear(hidden_dim * 2 + edge_dim, hidden_dim),
+            nn.ReLU(),
+            nn.Dropout(dropout),
+            nn.Linear(hidden_dim, 1),
+        )
+    def encode_nodes(self, x, edge_index):
+        h = F.relu(self.conv1(x, edge_index))
+        h = F.dropout(h, p=self.dropout, training=self.training)
+        return self.conv2(h, edge_index)
+    def edge_logits(self, node_emb, edge_index, edge_attr, local_idx=None):
+        if local_idx is not None:
+            s  = edge_index[0, local_idx]
+            d  = edge_index[1, local_idx]
+            ea = edge_attr[local_idx]
+        else:
+            s  = edge_index[0]
+            d  = edge_index[1]
+            ea = edge_attr
+        return self.edge_mlp(
+            torch.cat([node_emb[s], node_emb[d], ea], dim=1)
+        ).squeeze(-1)
+    def forward(self, x, edge_index, edge_attr):
+        return self.edge_logits(
+            self.encode_nodes(x, edge_index), edge_index, edge_attr
+        )