Upload 4 files

Browse files

Files changed (4) hide show

gnn_aml.py +146 -0
graph_aml.py +97 -0
test_model.py +159 -0
trained_model.pth +3 -0

gnn_aml.py ADDED Viewed

	@@ -0,0 +1,146 @@

+import torch
+import torch.nn as nn
+import torch.optim as optim
+import torch.nn.functional as F  # ✅ Fix: Import missing F module
+import json
+import numpy as np
+from torch_geometric.data import Data
+from torch_geometric.nn import GATConv
+from graph_aml import add_transaction, detect_pattern, transaction_graphs
+from collections import defaultdict
+from sklearn.utils.class_weight import compute_class_weight
+# Load Simulated Transactions
+print("Loading simulated transactions...")
+with open("simulated_transactions.json", "r") as f:
+    transactions = json.load(f)
+print(f"Loaded {len(transactions)} transactions.")
+# Define AI Model
+class GAT(nn.Module):
+    def __init__(self, num_node_features, hidden_dim, output_dim, heads=3):
+        super(GAT, self).__init__()
+        self.conv1 = GATConv(num_node_features, hidden_dim, heads=heads, concat=True)
+        self.conv2 = GATConv(hidden_dim * heads, output_dim, heads=1, concat=False)
+        self.dropout = nn.Dropout(0.3)  # Dropout to reduce overfitting
+    def forward(self, data):
+        x, edge_index = data.x, data.edge_index
+        x = self.conv1(x, edge_index).relu()
+        x = self.dropout(x)  # Apply dropout
+        x = self.conv2(x, edge_index)
+        return F.log_softmax(x, dim=1)  # Apply softmax for classification
+# def normalize_feature(x):
+#     """Normalize feature vector"""
+#     x = np.array(x)
+#     return (x - np.min(x, axis=0)) / (np.max(x, axis=0) - np.min(x, axis=0) + 1e-8)
+# Prepare Graph Data
+def normalize_feature(x):
+    return (x - np.min(x)) / (np.max(x) - np.min(x) + 1e-8) if np.max(x) - np.min(x) != 0 else x
+def prepare_graph():
+    print("Preparing graph data...")
+    features = []
+    edge_list = []
+    labels = []
+    account_map = {}
+    for txn in transactions:
+        add_transaction(txn)  # Add transaction to graph
+    graph_list = list(transaction_graphs.values())
+    print(f"Total transaction graphs created: {len(graph_list)}")
+    for i, graph in enumerate(graph_list):
+        for node in graph.nodes:
+            if node not in account_map:
+                account_map[node] = len(account_map)
+        for node in graph.nodes:
+            raw_feature_vector = [
+                len(list(graph.successors(node))),  # Outgoing Connections
+                len(list(graph.predecessors(node))),  # Incoming Connections
+                1 if detect_pattern(graph) != "Normal" else 0  # AML Label
+            ]
+            # Normalize features
+            feature_vector = [normalize_feature(x) for x in raw_feature_vector]
+            features.append(feature_vector)
+            labels.append(1 if detect_pattern(graph) != "Normal" else 0)
+        for sender, receiver in graph.edges:
+            if sender in account_map and receiver in account_map:
+                edge_list.append([account_map[sender], account_map[receiver]])
+    print("Graph preparation complete.")
+    if not features:
+        print("❌ No valid features found. Exiting.")
+        return None, None
+    # 🚨 Debug: Check Label Distribution
+    # ✅ Check class balance
+    print(f"Label Distribution: {np.bincount(labels)}")
+    x = torch.tensor(features, dtype=torch.float)
+    edge_index = torch.tensor(edge_list, dtype=torch.long).t().contiguous()
+    return Data(x=x, edge_index=edge_index), labels
+# Train AI Model
+def train_gnn():
+    print("Starting GNN training...")
+    data, labels = prepare_graph()
+    if data is None:
+        print("❌ Training aborted. No valid data available.")
+        return
+    model = GAT(num_node_features=3, hidden_dim=16, output_dim=2)
+    optimizer = optim.Adam(model.parameters(), lr=0.005)
+    labels_np = np.array(labels).flatten()  # Ensure it's 1D
+    # ✅ Ensure both classes exist
+    if len(np.unique(labels_np)) < 2:
+        print("⚠️ Warning: Only one class present in dataset! Generating synthetic samples to balance.")
+        num_samples = len(labels_np)
+        new_class = 1 if np.all(labels_np == 0) else 0  # Add the missing class
+        synthetic_samples = np.full((num_samples // 5,), new_class)  # Add 20% of missing class
+        labels_np = np.concatenate([labels_np, synthetic_samples])  # Add new samples
+        print(f"✅ New Label Distribution: {np.bincount(labels_np)}")  # Debugging
+    # Compute class weights after ensuring both classes exist
+    class_weights = compute_class_weight(
+        class_weight="balanced", classes=np.array([0, 1]), y=labels_np
+    )
+    class_weights = torch.tensor(class_weights, dtype=torch.float)
+    # Apply weighted loss function
+    loss_fn = nn.CrossEntropyLoss(weight=class_weights)
+    labels = torch.tensor(labels, dtype=torch.long)
+    print("Training started...")
+    for epoch in range(200):
+        optimizer.zero_grad()
+        output = model(data)
+        loss = loss_fn(output, labels)
+        loss.backward()
+        optimizer.step()
+        if epoch % 20 == 0:
+            print(f"Epoch {epoch}, Loss: {loss.item()}")
+    print("✅ GNN Training Complete.")
+    torch.save(model.state_dict(), "trained_model.pth")
+    print("✅ Model saved as trained_model.pth")
+if __name__ == "__main__":
+    train_gnn()

graph_aml.py ADDED Viewed

	@@ -0,0 +1,97 @@

+import hashlib
+import networkx as nx
+import torch
+# Global Graph Storage
+transaction_graphs = {}
+# Generate Unique Hash for Transaction Groups
+def generate_graph_hash(transactions):
+    hash_string = "-".join(sorted(transactions))  # Sort for consistency
+    return hashlib.sha256(hash_string.encode()).hexdigest()
+# Hash Function for Keys
+def hash_key(value):
+    return hashlib.sha256(value.encode()).hexdigest()
+# Add Transaction to Graph
+def add_transaction(txn):
+    sender_hash = hash_key(txn["SenderAccount"])
+    receiver_hash = hash_key(txn["ReceiverAccount"])
+    # Check if sender or receiver is already in a known graph
+    related_graphs = [h for h, g in transaction_graphs.items() if sender_hash in g or receiver_hash in g]
+    if related_graphs:
+        # Merge related graphs into one
+        new_graph_hash = generate_graph_hash(related_graphs)
+        merged_graph = nx.compose_all([transaction_graphs[h] for h in related_graphs])
+        merged_graph.add_edge(sender_hash, receiver_hash, **txn)
+        # Remove old graphs and add the merged one
+        for h in related_graphs:
+            del transaction_graphs[h]
+        transaction_graphs[new_graph_hash] = merged_graph
+    else:
+        # Create a new graph if no related transactions exist
+        new_graph = nx.DiGraph()
+        new_graph.add_edge(sender_hash, receiver_hash, **txn)
+        transaction_graphs[generate_graph_hash([sender_hash, receiver_hash])] = new_graph
+# Detect Laundering Patterns
+def detect_pattern(graph):
+    """Detect laundering patterns in the transaction graph."""
+    # If input is a Torch Geometric graph
+    if isinstance(graph, torch.Tensor) or hasattr(graph, "edge_index"):
+        # Extract unique node indices
+        nodes = torch.unique(graph.edge_index).tolist()
+        successors = {node: [] for node in nodes}
+        predecessors = {node: [] for node in nodes}
+        for i in range(graph.edge_index.shape[1]):  # Process edges
+            sender, receiver = graph.edge_index[:, i].tolist()
+            successors[sender].append(receiver)
+            predecessors[receiver].append(sender)
+    # If input is a NetworkX graph
+    elif hasattr(graph, "nodes"):
+        nodes = list(graph.nodes)
+        successors = {node: list(graph.successors(node)) for node in nodes}
+        predecessors = {node: list(graph.predecessors(node)) for node in nodes}
+    else:
+        raise ValueError("Unsupported graph type")
+    # Pattern detection logic
+    for node in nodes:
+        outgoing = successors[node]
+        incoming = predecessors[node]
+        if len(outgoing) > 5:
+            return "Fan-Out"  # One sender, many receivers
+        elif len(incoming) > 5:
+            return "Fan-In"  # Many senders, one receiver
+        elif node in incoming:
+            return "Cycle"  # Circular laundering
+        elif len(outgoing) > 2 and len(incoming) > 2:
+            return "Scatter Gather"  # Money moves across multiple accounts
+    return "Normal"
+# Store Suspicious AML Clusters
+aml_clusters = {}
+def flag_suspicious_graph(graph_hash):
+    """Mark a graph as an AML cluster if laundering is detected"""
+    if graph_hash in transaction_graphs:
+        pattern = detect_pattern(transaction_graphs[graph_hash])
+        if pattern != "Normal":
+            aml_clusters[graph_hash] = transaction_graphs[graph_hash]
+            print(f"🚨 AML Detected: {pattern} | Cluster ID: {graph_hash}")

test_model.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import torch
+import json
+import numpy as np
+import matplotlib.pyplot as plt
+import seaborn as sns
+from sklearn.metrics import confusion_matrix, classification_report
+from torch_geometric.data import Data
+from gnn_aml import GAT, prepare_graph
+from graph_aml import detect_pattern
+# Load Model
+print("🔍 Loading Trained Model...")
+model = GAT(num_node_features=3, hidden_dim=16, output_dim=2)
+model.load_state_dict(torch.load("trained_model.pth"))
+model.eval()
+# Load New Test Data
+print("📥 Loading New Test Transactions...")
+with open("test_transactions.json", "r") as f:
+    test_transactions = json.load(f)
+# Prepare Graph Data
+print("🔄 Preparing Test Graph Data...")
+test_graph, _ = prepare_graph()
+# Run Model Predictions
+print("🧠 Running Predictions...")
+with torch.no_grad():
+    output = model(test_graph)
+    probs = torch.softmax(output, dim=1)  # Convert logits to probabilities
+    predictions = (probs[:, 1] > 0.75).long()  # 1 = AML, 0 = Normal
+# Store predictions
+test_results = []
+y_true = []  # True labels
+y_pred = []  # Predicted labels
+for txn, prediction in zip(test_transactions, predictions):
+    risk_score = txn["RiskScore"]
+    true_label = 1 if txn["AML_Flag"] == 1 else 0  # True AML label
+    predicted_label = prediction.item()
+    # Update labels for confusion matrix
+    y_true.append(true_label)
+    y_pred.append(predicted_label)
+    if risk_score < 0.5:
+        predicted_pattern = "None"
+    elif predicted_label == 1:
+        predicted_pattern = detect_pattern(test_graph)
+    else:
+        predicted_pattern = "None"
+    test_results.append({
+        "TransactionID": txn["TransactionID"],
+        "TrueLabel": true_label,
+        "PredictedLabel": predicted_label,
+        "PredictedPattern": predicted_pattern,
+        "RiskScore": risk_score
+    })
+# Save results to file
+with open("new_test_results_v2.json", "w") as f:
+    json.dump(test_results, f, indent=4)
+# **✅ Compute Accuracy Metrics**
+print("\n📊 **Final Test Results:**")
+cm = confusion_matrix(y_true, y_pred)
+report = classification_report(y_true, y_pred, target_names=[
+                               "Normal", "AML"], digits=4)
+print("\n🔢 **Confusion Matrix:**\n", cm)
+print("\n📄 **Classification Report:**\n", report)
+# **✅ Plot Confusion Matrix**
+plt.figure(figsize=(6, 5))
+sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=[
+            "Normal", "AML"], yticklabels=["Normal", "AML"])
+plt.xlabel("Predicted")
+plt.ylabel("Actual")
+plt.title("Confusion Matrix")
+plt.show()
+# **✅ Plot Prediction Distribution**
+labels, counts = np.unique(y_pred, return_counts=True)
+plt.figure(figsize=(6, 5))
+plt.bar(["Normal", "AML"], counts, color=["green", "red"])
+plt.xlabel("Transaction Classification")
+plt.ylabel("Number of Transactions")
+plt.title("AML vs. Normal Transactions Detected")
+plt.show()
+print("✅ Accuracy analysis complete! Check charts & logs.")
+# import torch
+# import json
+# from torch_geometric.data import Data
+# from gnn_aml import GAT, prepare_graph
+# from graph_aml import detect_pattern
+# # Load Model
+# print("🔍 Loading Trained Model...")
+# model = GAT(num_node_features=3, hidden_dim=16, output_dim=2)
+# model.load_state_dict(torch.load("trained_model.pth"))
+# model.eval()
+# # Load New Test Data
+# print("📥 Loading New Test Transactions...")
+# with open("test_transactions.json", "r") as f:
+#     test_transactions = json.load(f)
+# # Prepare Graph Data
+# print("🔄 Preparing Test Graph Data...")
+# test_graph, _ = prepare_graph()
+# # Run Model Predictions
+# print("🧠 Running Predictions...")
+# with torch.no_grad():
+#     output = model(test_graph)
+#     probs = torch.softmax(output, dim=1)  # Convert logits to probabilities
+#     predictions = (probs[:, 1] > 0.75).long()  # 1 = AML, 0 = Normal
+# # Store predictions
+# test_results = []
+# aml_count = 0
+# normal_count = 0
+# for txn, prediction in zip(test_transactions, predictions):
+#     risk_score = txn["RiskScore"]
+#     predicted_label = prediction.item()
+#     if risk_score < 0.5:
+#         predicted_pattern = "None"  # ✅ Mark as safe
+#         normal_count += 1  # ✅ Count normal transactions
+#     elif predicted_label == 1:
+#         predicted_pattern = detect_pattern(
+#             test_graph)  # ✅ Detect actual pattern
+#         aml_count += 1  # ✅ Count AML transactions
+#     else:
+#         predicted_pattern = "None"
+#         normal_count += 1  # ✅ Count normal transactions
+#     test_results.append({
+#         "TransactionID": txn["TransactionID"],
+#         "PredictedPattern": predicted_pattern,
+#         "RiskScore": risk_score
+#     })
+# # **✅ Move logging here, after results are fully analyzed**
+# print("\n📊 **Final Test Results:**")
+# print(f"🔴 AML Detected: {aml_count}")
+# print(f"🟢 Normal Transactions: {normal_count}")
+# # Save results to file
+# with open("new_test_results_v2.json", "w") as f:
+#     json.dump(test_results, f, indent=4)
+# print("✅ Test results saved to `new_test_results_v2.json`")

trained_model.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:95cf97d804d6e6bf66a4da24e577ad6a9328272f6cafbea6df078185d6214275
+size 4872