Spaces:
Sleeping
Sleeping
File size: 5,858 Bytes
bca978b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 |
import gradio as gr
import torch
from transformers import AlbertTokenizer, AlbertForSequenceClassification, BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F
# Load models
albert_model = AlbertForSequenceClassification.from_pretrained("Deepaksai1/albert-fraud-detector-v2").eval()
albert_tokenizer = AlbertTokenizer.from_pretrained("Deepaksai1/albert-fraud-detector-v2")
finbert_model = BertForSequenceClassification.from_pretrained("Deepaksai1/finbert-fraud-detector-v2").eval()
finbert_tokenizer = BertTokenizer.from_pretrained("Deepaksai1/finbert-fraud-detector-v2")
# Feature engineering function
def engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest):
# Calculate derived features
orig_diff = amount - (old_org - new_org)
dest_diff = (new_dest - old_dest) - amount
zero_balance = 1 if new_org == 0 else 0
amount_fraction = amount / old_org if old_org > 0 else 0
# Enhanced text representation with engineered features
text = (f"Step: {step}, Type: {tx_type}, Amount: {amount}, "
f"OldBalOrig: {old_org}, NewBalOrig: {new_org}, "
f"OldBalDest: {old_dest}, NewBalDest: {new_dest}, "
f"OrigDiff: {orig_diff}, DestDiff: {dest_diff}, "
f"ZeroBalance: {zero_balance}, AmountFraction: {amount_fraction}")
# Return text for transformer models and transaction metadata
metadata = {
'amount': amount,
'zero_balance': zero_balance,
'orig_diff': orig_diff
}
return text, metadata
# Individual model prediction
def predict_single_model(text, model_name):
tokenizer = albert_tokenizer if model_name == "ALBERT" else finbert_tokenizer
model = albert_model if model_name == "ALBERT" else finbert_model
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
with torch.no_grad():
outputs = model(**inputs)
probs = F.softmax(outputs.logits, dim=1)
fraud_score = probs[0][1].item()
return fraud_score
# Ensemble prediction with adaptive thresholding
def ensemble_predict(step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble=True):
# Engineer features
text, metadata = engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest)
# Get individual model predictions
albert_score = predict_single_model(text, "ALBERT")
finbert_score = predict_single_model(text, "FinBERT")
if use_ensemble:
# Weighted ensemble (ALBERT performs better so weighted higher)
weights = {"ALBERT": 0.6, "FinBERT": 0.4}
ensemble_score = weights["ALBERT"] * albert_score + weights["FinBERT"] * finbert_score
# Adaptive thresholding based on transaction characteristics
base_threshold = 0.5
if metadata['amount'] > 1000000: # High-value transaction
threshold = base_threshold - 0.1 # Lower threshold for high-risk
elif metadata['zero_balance'] == 1: # Account emptying
threshold = base_threshold - 0.15
elif abs(metadata['orig_diff']) > 1000: # Suspicious balance difference
threshold = base_threshold - 0.08
else:
threshold = base_threshold
is_fraud = ensemble_score > threshold
result = "Fraud" if is_fraud else "Not Fraud"
# Return individual scores as well for transparency
return result, ensemble_score, albert_score, finbert_score, threshold
else:
# For comparison, return individual model results
return "See individual scores", 0, albert_score, finbert_score, 0.5
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## 🔎 Advanced Hybrid Fraud Detection System")
with gr.Row():
step = gr.Number(label="Step", value=1)
tx_type = gr.Dropdown(choices=["CASH_OUT", "TRANSFER", "PAYMENT", "DEBIT", "CASH_IN"],
label="Transaction Type")
amount = gr.Number(label="Amount", value=0.0)
with gr.Row():
old_org = gr.Number(label="Old Balance Orig", value=0.0)
new_org = gr.Number(label="New Balance Orig", value=0.0)
with gr.Row():
old_dest = gr.Number(label="Old Balance Dest", value=0.0)
new_dest = gr.Number(label="New Balance Dest", value=0.0)
with gr.Row():
use_ensemble = gr.Checkbox(label="Use Ensemble Model", value=True)
with gr.Row():
predict_btn = gr.Button("Predict")
with gr.Row():
pred_label = gr.Label(label="Final Prediction")
ensemble_score = gr.Number(label="Ensemble Score")
with gr.Row():
albert_score = gr.Number(label="ALBERT Score")
finbert_score = gr.Number(label="FinBERT Score")
threshold = gr.Number(label="Applied Threshold")
# Bind function
predict_btn.click(
fn=ensemble_predict,
inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble],
outputs=[pred_label, ensemble_score, albert_score, finbert_score, threshold]
)
# Example transactions
examples = [
[151, "CASH_OUT", 1633227.0, 1633227.0, 0.0, 2865353.22, 4498580.23, True],
[353, "CASH_OUT", 174566.53, 174566.53, 0.0, 1191715.74, 1366282.27, True],
[357, "TRANSFER", 484493.06, 484493.06, 0.0, 0.0, 0.0, True],
[43, "CASH_OUT", 81571.63, 0.0, 0.0, 176194.2, 257765.83, True],
[307, "DEBIT", 247.82, 11544.0, 11296.18, 3550535.53, 3550783.36, True],
[350, "DEBIT", 4330.57, 3766.0, 0.0, 239435.41, 243765.98, True]
]
gr.Examples(examples=examples,
inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble])
# Launch app
if __name__ == "__main__":
demo.launch() |