fraud_detection / app.py
Deepaksai1's picture
Create app.py
bca978b verified
import gradio as gr
import torch
from transformers import AlbertTokenizer, AlbertForSequenceClassification, BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F
# Load models
albert_model = AlbertForSequenceClassification.from_pretrained("Deepaksai1/albert-fraud-detector-v2").eval()
albert_tokenizer = AlbertTokenizer.from_pretrained("Deepaksai1/albert-fraud-detector-v2")
finbert_model = BertForSequenceClassification.from_pretrained("Deepaksai1/finbert-fraud-detector-v2").eval()
finbert_tokenizer = BertTokenizer.from_pretrained("Deepaksai1/finbert-fraud-detector-v2")
# Feature engineering function
def engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest):
# Calculate derived features
orig_diff = amount - (old_org - new_org)
dest_diff = (new_dest - old_dest) - amount
zero_balance = 1 if new_org == 0 else 0
amount_fraction = amount / old_org if old_org > 0 else 0
# Enhanced text representation with engineered features
text = (f"Step: {step}, Type: {tx_type}, Amount: {amount}, "
f"OldBalOrig: {old_org}, NewBalOrig: {new_org}, "
f"OldBalDest: {old_dest}, NewBalDest: {new_dest}, "
f"OrigDiff: {orig_diff}, DestDiff: {dest_diff}, "
f"ZeroBalance: {zero_balance}, AmountFraction: {amount_fraction}")
# Return text for transformer models and transaction metadata
metadata = {
'amount': amount,
'zero_balance': zero_balance,
'orig_diff': orig_diff
}
return text, metadata
# Individual model prediction
def predict_single_model(text, model_name):
tokenizer = albert_tokenizer if model_name == "ALBERT" else finbert_tokenizer
model = albert_model if model_name == "ALBERT" else finbert_model
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
with torch.no_grad():
outputs = model(**inputs)
probs = F.softmax(outputs.logits, dim=1)
fraud_score = probs[0][1].item()
return fraud_score
# Ensemble prediction with adaptive thresholding
def ensemble_predict(step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble=True):
# Engineer features
text, metadata = engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest)
# Get individual model predictions
albert_score = predict_single_model(text, "ALBERT")
finbert_score = predict_single_model(text, "FinBERT")
if use_ensemble:
# Weighted ensemble (ALBERT performs better so weighted higher)
weights = {"ALBERT": 0.6, "FinBERT": 0.4}
ensemble_score = weights["ALBERT"] * albert_score + weights["FinBERT"] * finbert_score
# Adaptive thresholding based on transaction characteristics
base_threshold = 0.5
if metadata['amount'] > 1000000: # High-value transaction
threshold = base_threshold - 0.1 # Lower threshold for high-risk
elif metadata['zero_balance'] == 1: # Account emptying
threshold = base_threshold - 0.15
elif abs(metadata['orig_diff']) > 1000: # Suspicious balance difference
threshold = base_threshold - 0.08
else:
threshold = base_threshold
is_fraud = ensemble_score > threshold
result = "Fraud" if is_fraud else "Not Fraud"
# Return individual scores as well for transparency
return result, ensemble_score, albert_score, finbert_score, threshold
else:
# For comparison, return individual model results
return "See individual scores", 0, albert_score, finbert_score, 0.5
# Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("## 🔎 Advanced Hybrid Fraud Detection System")
with gr.Row():
step = gr.Number(label="Step", value=1)
tx_type = gr.Dropdown(choices=["CASH_OUT", "TRANSFER", "PAYMENT", "DEBIT", "CASH_IN"],
label="Transaction Type")
amount = gr.Number(label="Amount", value=0.0)
with gr.Row():
old_org = gr.Number(label="Old Balance Orig", value=0.0)
new_org = gr.Number(label="New Balance Orig", value=0.0)
with gr.Row():
old_dest = gr.Number(label="Old Balance Dest", value=0.0)
new_dest = gr.Number(label="New Balance Dest", value=0.0)
with gr.Row():
use_ensemble = gr.Checkbox(label="Use Ensemble Model", value=True)
with gr.Row():
predict_btn = gr.Button("Predict")
with gr.Row():
pred_label = gr.Label(label="Final Prediction")
ensemble_score = gr.Number(label="Ensemble Score")
with gr.Row():
albert_score = gr.Number(label="ALBERT Score")
finbert_score = gr.Number(label="FinBERT Score")
threshold = gr.Number(label="Applied Threshold")
# Bind function
predict_btn.click(
fn=ensemble_predict,
inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble],
outputs=[pred_label, ensemble_score, albert_score, finbert_score, threshold]
)
# Example transactions
examples = [
[151, "CASH_OUT", 1633227.0, 1633227.0, 0.0, 2865353.22, 4498580.23, True],
[353, "CASH_OUT", 174566.53, 174566.53, 0.0, 1191715.74, 1366282.27, True],
[357, "TRANSFER", 484493.06, 484493.06, 0.0, 0.0, 0.0, True],
[43, "CASH_OUT", 81571.63, 0.0, 0.0, 176194.2, 257765.83, True],
[307, "DEBIT", 247.82, 11544.0, 11296.18, 3550535.53, 3550783.36, True],
[350, "DEBIT", 4330.57, 3766.0, 0.0, 239435.41, 243765.98, True]
]
gr.Examples(examples=examples,
inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble])
# Launch app
if __name__ == "__main__":
demo.launch()