File size: 5,858 Bytes
bca978b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
import gradio as gr
import torch
from transformers import AlbertTokenizer, AlbertForSequenceClassification, BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F

# Load models
albert_model = AlbertForSequenceClassification.from_pretrained("Deepaksai1/albert-fraud-detector-v2").eval()
albert_tokenizer = AlbertTokenizer.from_pretrained("Deepaksai1/albert-fraud-detector-v2")
finbert_model = BertForSequenceClassification.from_pretrained("Deepaksai1/finbert-fraud-detector-v2").eval()
finbert_tokenizer = BertTokenizer.from_pretrained("Deepaksai1/finbert-fraud-detector-v2")

# Feature engineering function
def engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest):
    # Calculate derived features
    orig_diff = amount - (old_org - new_org)
    dest_diff = (new_dest - old_dest) - amount
    zero_balance = 1 if new_org == 0 else 0
    amount_fraction = amount / old_org if old_org > 0 else 0
    
    # Enhanced text representation with engineered features
    text = (f"Step: {step}, Type: {tx_type}, Amount: {amount}, "
            f"OldBalOrig: {old_org}, NewBalOrig: {new_org}, "
            f"OldBalDest: {old_dest}, NewBalDest: {new_dest}, "
            f"OrigDiff: {orig_diff}, DestDiff: {dest_diff}, "
            f"ZeroBalance: {zero_balance}, AmountFraction: {amount_fraction}")
    
    # Return text for transformer models and transaction metadata
    metadata = {
        'amount': amount,
        'zero_balance': zero_balance,
        'orig_diff': orig_diff
    }
    return text, metadata

# Individual model prediction
def predict_single_model(text, model_name):
    tokenizer = albert_tokenizer if model_name == "ALBERT" else finbert_tokenizer
    model = albert_model if model_name == "ALBERT" else finbert_model
    
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=1)
        fraud_score = probs[0][1].item()
    
    return fraud_score

# Ensemble prediction with adaptive thresholding
def ensemble_predict(step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble=True):
    # Engineer features
    text, metadata = engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest)
    
    # Get individual model predictions
    albert_score = predict_single_model(text, "ALBERT")
    finbert_score = predict_single_model(text, "FinBERT")
    
    if use_ensemble:
        # Weighted ensemble (ALBERT performs better so weighted higher)
        weights = {"ALBERT": 0.6, "FinBERT": 0.4}
        ensemble_score = weights["ALBERT"] * albert_score + weights["FinBERT"] * finbert_score
        
        # Adaptive thresholding based on transaction characteristics
        base_threshold = 0.5
        if metadata['amount'] > 1000000:  # High-value transaction
            threshold = base_threshold - 0.1  # Lower threshold for high-risk
        elif metadata['zero_balance'] == 1:  # Account emptying
            threshold = base_threshold - 0.15
        elif abs(metadata['orig_diff']) > 1000:  # Suspicious balance difference
            threshold = base_threshold - 0.08
        else:
            threshold = base_threshold
        
        is_fraud = ensemble_score > threshold
        result = "Fraud" if is_fraud else "Not Fraud"
        
        # Return individual scores as well for transparency
        return result, ensemble_score, albert_score, finbert_score, threshold
    else:
        # For comparison, return individual model results
        return "See individual scores", 0, albert_score, finbert_score, 0.5

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🔎 Advanced Hybrid Fraud Detection System")
    
    with gr.Row():
        step = gr.Number(label="Step", value=1)
        tx_type = gr.Dropdown(choices=["CASH_OUT", "TRANSFER", "PAYMENT", "DEBIT", "CASH_IN"], 
                             label="Transaction Type")
        amount = gr.Number(label="Amount", value=0.0)
    
    with gr.Row():
        old_org = gr.Number(label="Old Balance Orig", value=0.0)
        new_org = gr.Number(label="New Balance Orig", value=0.0)
    
    with gr.Row():
        old_dest = gr.Number(label="Old Balance Dest", value=0.0)
        new_dest = gr.Number(label="New Balance Dest", value=0.0)
    
    with gr.Row():
        use_ensemble = gr.Checkbox(label="Use Ensemble Model", value=True)
    
    with gr.Row():
        predict_btn = gr.Button("Predict")
    
    with gr.Row():
        pred_label = gr.Label(label="Final Prediction")
        ensemble_score = gr.Number(label="Ensemble Score")
    
    with gr.Row():
        albert_score = gr.Number(label="ALBERT Score")
        finbert_score = gr.Number(label="FinBERT Score")
        threshold = gr.Number(label="Applied Threshold")
    
    # Bind function
    predict_btn.click(
        fn=ensemble_predict,
        inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble],
        outputs=[pred_label, ensemble_score, albert_score, finbert_score, threshold]
    )
    
    # Example transactions
    examples = [
        [151, "CASH_OUT", 1633227.0, 1633227.0, 0.0, 2865353.22, 4498580.23, True],
        [353, "CASH_OUT", 174566.53, 174566.53, 0.0, 1191715.74, 1366282.27, True],
        [357, "TRANSFER", 484493.06, 484493.06, 0.0, 0.0, 0.0, True],
        [43, "CASH_OUT", 81571.63, 0.0, 0.0, 176194.2, 257765.83, True],
        [307, "DEBIT", 247.82, 11544.0, 11296.18, 3550535.53, 3550783.36, True],
        [350, "DEBIT", 4330.57, 3766.0, 0.0, 239435.41, 243765.98, True]
    ]
    
    gr.Examples(examples=examples, 
                inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble])

# Launch app
if __name__ == "__main__":
    demo.launch()