Spaces:

Deepaksai1
/

fraud_detection

Sleeping

File size: 5,858 Bytes

bca978b

import gradio as gr
import torch
from transformers import AlbertTokenizer, AlbertForSequenceClassification, BertTokenizer, BertForSequenceClassification
import torch.nn.functional as F

# Load models
albert_model = AlbertForSequenceClassification.from_pretrained("Deepaksai1/albert-fraud-detector-v2").eval()
albert_tokenizer = AlbertTokenizer.from_pretrained("Deepaksai1/albert-fraud-detector-v2")
finbert_model = BertForSequenceClassification.from_pretrained("Deepaksai1/finbert-fraud-detector-v2").eval()
finbert_tokenizer = BertTokenizer.from_pretrained("Deepaksai1/finbert-fraud-detector-v2")

# Feature engineering function
def engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest):
    # Calculate derived features
    orig_diff = amount - (old_org - new_org)
    dest_diff = (new_dest - old_dest) - amount
    zero_balance = 1 if new_org == 0 else 0
    amount_fraction = amount / old_org if old_org > 0 else 0
    
    # Enhanced text representation with engineered features
    text = (f"Step: {step}, Type: {tx_type}, Amount: {amount}, "
            f"OldBalOrig: {old_org}, NewBalOrig: {new_org}, "
            f"OldBalDest: {old_dest}, NewBalDest: {new_dest}, "
            f"OrigDiff: {orig_diff}, DestDiff: {dest_diff}, "
            f"ZeroBalance: {zero_balance}, AmountFraction: {amount_fraction}")
    
    # Return text for transformer models and transaction metadata
    metadata = {
        'amount': amount,
        'zero_balance': zero_balance,
        'orig_diff': orig_diff
    }
    return text, metadata

# Individual model prediction
def predict_single_model(text, model_name):
    tokenizer = albert_tokenizer if model_name == "ALBERT" else finbert_tokenizer
    model = albert_model if model_name == "ALBERT" else finbert_model
    
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
    with torch.no_grad():
        outputs = model(**inputs)
        probs = F.softmax(outputs.logits, dim=1)
        fraud_score = probs[0][1].item()
    
    return fraud_score

# Ensemble prediction with adaptive thresholding
def ensemble_predict(step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble=True):
    # Engineer features
    text, metadata = engineer_features(step, tx_type, amount, old_org, new_org, old_dest, new_dest)
    
    # Get individual model predictions
    albert_score = predict_single_model(text, "ALBERT")
    finbert_score = predict_single_model(text, "FinBERT")
    
    if use_ensemble:
        # Weighted ensemble (ALBERT performs better so weighted higher)
        weights = {"ALBERT": 0.6, "FinBERT": 0.4}
        ensemble_score = weights["ALBERT"] * albert_score + weights["FinBERT"] * finbert_score
        
        # Adaptive thresholding based on transaction characteristics
        base_threshold = 0.5
        if metadata['amount'] > 1000000:  # High-value transaction
            threshold = base_threshold - 0.1  # Lower threshold for high-risk
        elif metadata['zero_balance'] == 1:  # Account emptying
            threshold = base_threshold - 0.15
        elif abs(metadata['orig_diff']) > 1000:  # Suspicious balance difference
            threshold = base_threshold - 0.08
        else:
            threshold = base_threshold
        
        is_fraud = ensemble_score > threshold
        result = "Fraud" if is_fraud else "Not Fraud"
        
        # Return individual scores as well for transparency
        return result, ensemble_score, albert_score, finbert_score, threshold
    else:
        # For comparison, return individual model results
        return "See individual scores", 0, albert_score, finbert_score, 0.5

# Gradio Interface
with gr.Blocks() as demo:
    gr.Markdown("## 🔎 Advanced Hybrid Fraud Detection System")
    
    with gr.Row():
        step = gr.Number(label="Step", value=1)
        tx_type = gr.Dropdown(choices=["CASH_OUT", "TRANSFER", "PAYMENT", "DEBIT", "CASH_IN"], 
                             label="Transaction Type")
        amount = gr.Number(label="Amount", value=0.0)
    
    with gr.Row():
        old_org = gr.Number(label="Old Balance Orig", value=0.0)
        new_org = gr.Number(label="New Balance Orig", value=0.0)
    
    with gr.Row():
        old_dest = gr.Number(label="Old Balance Dest", value=0.0)
        new_dest = gr.Number(label="New Balance Dest", value=0.0)
    
    with gr.Row():
        use_ensemble = gr.Checkbox(label="Use Ensemble Model", value=True)
    
    with gr.Row():
        predict_btn = gr.Button("Predict")
    
    with gr.Row():
        pred_label = gr.Label(label="Final Prediction")
        ensemble_score = gr.Number(label="Ensemble Score")
    
    with gr.Row():
        albert_score = gr.Number(label="ALBERT Score")
        finbert_score = gr.Number(label="FinBERT Score")
        threshold = gr.Number(label="Applied Threshold")
    
    # Bind function
    predict_btn.click(
        fn=ensemble_predict,
        inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble],
        outputs=[pred_label, ensemble_score, albert_score, finbert_score, threshold]
    )
    
    # Example transactions
    examples = [
        [151, "CASH_OUT", 1633227.0, 1633227.0, 0.0, 2865353.22, 4498580.23, True],
        [353, "CASH_OUT", 174566.53, 174566.53, 0.0, 1191715.74, 1366282.27, True],
        [357, "TRANSFER", 484493.06, 484493.06, 0.0, 0.0, 0.0, True],
        [43, "CASH_OUT", 81571.63, 0.0, 0.0, 176194.2, 257765.83, True],
        [307, "DEBIT", 247.82, 11544.0, 11296.18, 3550535.53, 3550783.36, True],
        [350, "DEBIT", 4330.57, 3766.0, 0.0, 239435.41, 243765.98, True]
    ]
    
    gr.Examples(examples=examples, 
                inputs=[step, tx_type, amount, old_org, new_org, old_dest, new_dest, use_ensemble])

# Launch app
if __name__ == "__main__":
    demo.launch()