Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,38 +1,22 @@
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import AlbertTokenizer, AlbertForSequenceClassification, BertTokenizer, BertForSequenceClassification
|
| 4 |
-
from catboost import CatBoostClassifier
|
| 5 |
-
from huggingface_hub import hf_hub_download
|
| 6 |
import numpy as np
|
| 7 |
|
| 8 |
# Load ALBERT model and tokenizer from Hugging Face Hub
|
| 9 |
-
albert_model = AlbertForSequenceClassification.from_pretrained("Deepaksai1/albert-fraud-detector")
|
| 10 |
-
albert_tokenizer = AlbertTokenizer.from_pretrained("Deepaksai1/albert-fraud-detector")
|
| 11 |
albert_model.eval()
|
| 12 |
|
| 13 |
# Load FinBERT model and tokenizer from Hugging Face Hub
|
| 14 |
-
finbert_model = BertForSequenceClassification.from_pretrained("Deepaksai1/finbert-fraud-detector")
|
| 15 |
-
finbert_tokenizer = BertTokenizer.from_pretrained("Deepaksai1/finbert-fraud-detector")
|
| 16 |
finbert_model.eval()
|
| 17 |
|
| 18 |
-
# Download and load CatBoost model from Hugging Face Hub
|
| 19 |
-
catboost_model_path = hf_hub_download(repo_id="Deepaksai1/catboost-fraud-detector", filename="catboost_fraud_model.cbm")
|
| 20 |
-
catboost_model = CatBoostClassifier()
|
| 21 |
-
catboost_model.load_model(catboost_model_path)
|
| 22 |
-
|
| 23 |
-
# CatBoost prediction (simple numeric extraction)
|
| 24 |
-
def predict_with_catboost(text):
|
| 25 |
-
try:
|
| 26 |
-
amount = float([s for s in text.split(',') if 'Amount' in s][0].split(':')[1].strip())
|
| 27 |
-
except:
|
| 28 |
-
return "Invalid input", 0.0
|
| 29 |
-
prediction = catboost_model.predict([[amount]])[0]
|
| 30 |
-
proba = catboost_model.predict_proba([[amount]])[0][1]
|
| 31 |
-
return ("Fraud" if prediction == 1 else "Not Fraud"), float(proba)
|
| 32 |
-
|
| 33 |
# ALBERT prediction
|
|
|
|
| 34 |
def predict_with_albert(text):
|
| 35 |
-
inputs = albert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=
|
| 36 |
with torch.no_grad():
|
| 37 |
outputs = albert_model(**inputs)
|
| 38 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
|
@@ -41,8 +25,9 @@ def predict_with_albert(text):
|
|
| 41 |
return ("Fraud" if pred_class == 1 else "Not Fraud"), float(pred_prob)
|
| 42 |
|
| 43 |
# FinBERT prediction
|
|
|
|
| 44 |
def predict_with_finbert(text):
|
| 45 |
-
inputs = finbert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=
|
| 46 |
with torch.no_grad():
|
| 47 |
outputs = finbert_model(**inputs)
|
| 48 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
|
@@ -51,23 +36,23 @@ def predict_with_finbert(text):
|
|
| 51 |
return ("Fraud" if pred_class == 1 else "Not Fraud"), float(pred_prob)
|
| 52 |
|
| 53 |
# Model selector
|
|
|
|
| 54 |
def predict(text, model_name):
|
| 55 |
if model_name == "ALBERT":
|
| 56 |
return predict_with_albert(text)
|
| 57 |
elif model_name == "FinBERT":
|
| 58 |
return predict_with_finbert(text)
|
| 59 |
-
elif model_name == "CatBoost":
|
| 60 |
-
return predict_with_catboost(text)
|
| 61 |
else:
|
| 62 |
return "Unknown Model", 0.0
|
| 63 |
|
| 64 |
-
#
|
| 65 |
examples = [
|
| 66 |
-
"Step:
|
| 67 |
-
"Step:
|
| 68 |
-
"Step:
|
| 69 |
-
"Step:
|
| 70 |
-
"Step:
|
|
|
|
| 71 |
]
|
| 72 |
|
| 73 |
# Gradio Interface
|
|
@@ -75,15 +60,15 @@ gui = gr.Interface(
|
|
| 75 |
fn=predict,
|
| 76 |
inputs=[
|
| 77 |
gr.Textbox(label="Enter Transaction Description"),
|
| 78 |
-
gr.Dropdown(choices=["ALBERT", "FinBERT"
|
| 79 |
],
|
| 80 |
outputs=[
|
| 81 |
gr.Label(label="Prediction"),
|
| 82 |
gr.Number(label="Fraud Probability")
|
| 83 |
],
|
| 84 |
-
examples=
|
| 85 |
title="💸 Fraud Detection Assistant",
|
| 86 |
-
description="Analyze transaction text using ALBERT
|
| 87 |
)
|
| 88 |
|
| 89 |
# Launch the app
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import torch
|
| 3 |
from transformers import AlbertTokenizer, AlbertForSequenceClassification, BertTokenizer, BertForSequenceClassification
|
|
|
|
|
|
|
| 4 |
import numpy as np
|
| 5 |
|
| 6 |
# Load ALBERT model and tokenizer from Hugging Face Hub
|
| 7 |
+
albert_model = AlbertForSequenceClassification.from_pretrained("Deepaksai1/albert-fraud-detector-v2")
|
| 8 |
+
albert_tokenizer = AlbertTokenizer.from_pretrained("Deepaksai1/albert-fraud-detector-v2")
|
| 9 |
albert_model.eval()
|
| 10 |
|
| 11 |
# Load FinBERT model and tokenizer from Hugging Face Hub
|
| 12 |
+
finbert_model = BertForSequenceClassification.from_pretrained("Deepaksai1/finbert-fraud-detector-v2")
|
| 13 |
+
finbert_tokenizer = BertTokenizer.from_pretrained("Deepaksai1/finbert-fraud-detector-v2")
|
| 14 |
finbert_model.eval()
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# ALBERT prediction
|
| 17 |
+
|
| 18 |
def predict_with_albert(text):
|
| 19 |
+
inputs = albert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
|
| 20 |
with torch.no_grad():
|
| 21 |
outputs = albert_model(**inputs)
|
| 22 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
|
|
|
| 25 |
return ("Fraud" if pred_class == 1 else "Not Fraud"), float(pred_prob)
|
| 26 |
|
| 27 |
# FinBERT prediction
|
| 28 |
+
|
| 29 |
def predict_with_finbert(text):
|
| 30 |
+
inputs = finbert_tokenizer(text, return_tensors="pt", truncation=True, padding=True, max_length=128)
|
| 31 |
with torch.no_grad():
|
| 32 |
outputs = finbert_model(**inputs)
|
| 33 |
probs = torch.nn.functional.softmax(outputs.logits, dim=1)
|
|
|
|
| 36 |
return ("Fraud" if pred_class == 1 else "Not Fraud"), float(pred_prob)
|
| 37 |
|
| 38 |
# Model selector
|
| 39 |
+
|
| 40 |
def predict(text, model_name):
|
| 41 |
if model_name == "ALBERT":
|
| 42 |
return predict_with_albert(text)
|
| 43 |
elif model_name == "FinBERT":
|
| 44 |
return predict_with_finbert(text)
|
|
|
|
|
|
|
| 45 |
else:
|
| 46 |
return "Unknown Model", 0.0
|
| 47 |
|
| 48 |
+
# Updated examples: 3 fraud + 3 non-fraud, using training-format features
|
| 49 |
examples = [
|
| 50 |
+
["Step: 151, Type: CASH_OUT, Amount: 1633227.0, OldBalOrig: 1633227.0, NewBalOrig: 0.0, OldBalDest: 2865353.22, NewBalDest: 4498580.23", "ALBERT"],
|
| 51 |
+
["Step: 353, Type: CASH_OUT, Amount: 174566.53, OldBalOrig: 174566.53, NewBalOrig: 0.0, OldBalDest: 1191715.74, NewBalDest: 1366282.27", "FinBERT"],
|
| 52 |
+
["Step: 357, Type: TRANSFER, Amount: 484493.06, OldBalOrig: 484493.06, NewBalOrig: 0.0, OldBalDest: 0.0, NewBalDest: 0.0", "ALBERT"],
|
| 53 |
+
["Step: 43, Type: CASH_OUT, Amount: 81571.63, OldBalOrig: 0.0, NewBalOrig: 0.0, OldBalDest: 176194.2, NewBalDest: 257765.83", "FinBERT"],
|
| 54 |
+
["Step: 307, Type: DEBIT, Amount: 247.82, OldBalOrig: 11544.0, NewBalOrig: 11296.18, OldBalDest: 3550535.53, NewBalDest: 3550783.36", "ALBERT"],
|
| 55 |
+
["Step: 350, Type: DEBIT, Amount: 4330.57, OldBalOrig: 3766.0, NewBalOrig: 0.0, OldBalDest: 239435.41, NewBalDest: 243765.98", "FinBERT"]
|
| 56 |
]
|
| 57 |
|
| 58 |
# Gradio Interface
|
|
|
|
| 60 |
fn=predict,
|
| 61 |
inputs=[
|
| 62 |
gr.Textbox(label="Enter Transaction Description"),
|
| 63 |
+
gr.Dropdown(choices=["ALBERT", "FinBERT"], label="Select Model", value="ALBERT")
|
| 64 |
],
|
| 65 |
outputs=[
|
| 66 |
gr.Label(label="Prediction"),
|
| 67 |
gr.Number(label="Fraud Probability")
|
| 68 |
],
|
| 69 |
+
examples=examples,
|
| 70 |
title="💸 Fraud Detection Assistant",
|
| 71 |
+
description="Analyze transaction text using ALBERT or FinBERT models. Format: Step, Type, Amount, OldBalOrig, NewBalOrig, OldBalDest, NewBalDest."
|
| 72 |
)
|
| 73 |
|
| 74 |
# Launch the app
|