|
|
import os |
|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import joblib |
|
|
import numpy as np |
|
|
from huggingface_hub import hf_hub_download |
|
|
|
|
|
|
|
|
os.environ["HF_HOME"] = "/tmp/hf_cache" |
|
|
os.makedirs("/tmp/hf_cache", exist_ok=True) |
|
|
|
|
|
REPO_ID = "Zishaan7/AI_financial_fraud_dataset" |
|
|
FILENAME = "xgb_fraud_model2.joblib" |
|
|
|
|
|
st.title("π¨ Financial Fraud Detection App (XGBoost)") |
|
|
|
|
|
|
|
|
def feature_engineering(df: pd.DataFrame) -> pd.DataFrame: |
|
|
d = df.copy() |
|
|
d.drop(columns=["nameOrig", "nameDest"], inplace=True, errors="ignore") |
|
|
|
|
|
for c in ['step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest']: |
|
|
d[c] = pd.to_numeric(d.get(c, 0.0), errors='coerce').fillna(0.0) |
|
|
|
|
|
if 'type' in d.columns: |
|
|
type_dummies = pd.get_dummies(d['type'].astype(str), prefix="type") |
|
|
d = pd.concat([d, type_dummies], axis=1) |
|
|
else: |
|
|
d['type'] = "UNKNOWN" |
|
|
d = pd.concat([d, pd.get_dummies(d['type'], prefix="type")], axis=1) |
|
|
|
|
|
d['orig_delta'] = d['oldbalanceOrg'] - d['newbalanceOrig'] - d['amount'] |
|
|
d['dest_delta'] = d['newbalanceDest'] - d['oldbalanceDest'] - d['amount'] |
|
|
d['logAmount'] = np.log1p(d['amount'].clip(lower=0)) |
|
|
d['origBalanceRatio'] = np.where(d['oldbalanceOrg'] != 0, d['amount'] / d['oldbalanceOrg'], 0.0) |
|
|
d['destBalanceRatio'] = np.where(d['oldbalanceDest'] != 0, d['amount'] / d['oldbalanceDest'], 0.0) |
|
|
d['origZeroBalance'] = (d['oldbalanceOrg'] == 0).astype(int) |
|
|
d['destZeroBalance'] = (d['oldbalanceDest'] == 0).astype(int) |
|
|
d['rule_orig_inconsistent'] = (d['orig_delta'].abs() > 1e-9).astype(int) |
|
|
d['rule_dest_inconsistent'] = (d['dest_delta'].abs() > 1e-9).astype(int) |
|
|
d['rule_zero_origin_drain'] = ((d['newbalanceOrig'].abs() <= 1e-9) & |
|
|
(d['oldbalanceOrg'].sub(d['amount']).abs() <= 1e-9)).astype(int) |
|
|
d['rule_zero_dest_firstload'] = ((d['oldbalanceDest'].abs() <= 1e-9) & |
|
|
(d['newbalanceDest'].sub(d['amount']).abs() <= 1e-9)).astype(int) |
|
|
return d |
|
|
|
|
|
|
|
|
uploaded_file = st.file_uploader("π Upload your transaction CSV file", type=["csv"]) |
|
|
|
|
|
if uploaded_file is not None: |
|
|
with st.spinner("β³ Downloading model and processing file..."): |
|
|
|
|
|
try: |
|
|
model_path = hf_hub_download( |
|
|
repo_id=REPO_ID, |
|
|
filename=FILENAME, |
|
|
repo_type="dataset" |
|
|
) |
|
|
model = joblib.load(model_path) |
|
|
except Exception as e: |
|
|
st.error(f"β Error loading model: {e}") |
|
|
st.stop() |
|
|
|
|
|
try: |
|
|
df = pd.read_csv(uploaded_file) |
|
|
st.subheader("π Uploaded Data (First 5 rows)") |
|
|
st.write(df.head()) |
|
|
|
|
|
df_processed = feature_engineering(df) |
|
|
|
|
|
model_features = [ |
|
|
'step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest', |
|
|
'orig_delta','dest_delta','logAmount', |
|
|
'origBalanceRatio','destBalanceRatio', |
|
|
'origZeroBalance','destZeroBalance', |
|
|
'rule_orig_inconsistent','rule_dest_inconsistent', |
|
|
'rule_zero_origin_drain','rule_zero_dest_firstload' |
|
|
] |
|
|
type_features = [c for c in df_processed.columns if c.startswith("type_")] |
|
|
model_features.extend(type_features) |
|
|
|
|
|
for f in model_features: |
|
|
if f not in df_processed.columns: |
|
|
df_processed[f] = 0.0 |
|
|
|
|
|
df_model = df_processed[model_features] |
|
|
preds = model.predict(df_model) |
|
|
df["isFraud"] = np.where(preds == 1, "YES", "NO") |
|
|
|
|
|
st.subheader("β
Predictions") |
|
|
st.dataframe(df[["step","type","amount","oldbalanceOrg","newbalanceOrig", |
|
|
"oldbalanceDest","newbalanceDest","isFraud"]]) |
|
|
|
|
|
csv_download = df.to_csv(index=False).encode("utf-8") |
|
|
st.download_button("β¬οΈ Download Predictions CSV", csv_download, |
|
|
"fraud_predictions.csv", "text/csv") |
|
|
st.success("π File processed successfully!") |
|
|
except Exception as e: |
|
|
st.error(f"β Error processing file: {e}") |
|
|
else: |
|
|
st.info("π Please upload a CSV file to start.") |
|
|
|