Zishaan7's picture
Update src/streamlit_app.py
90ca891 verified
import os
import streamlit as st
import pandas as pd
import joblib
import numpy as np
from huggingface_hub import hf_hub_download
# --- Hugging Face Cache ---
os.environ["HF_HOME"] = "/tmp/hf_cache"
os.makedirs("/tmp/hf_cache", exist_ok=True)
REPO_ID = "Zishaan7/AI_financial_fraud_dataset"
FILENAME = "xgb_fraud_model2.joblib"
st.title("🚨 Financial Fraud Detection App (XGBoost)")
# --- Feature Engineering ---
def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
d = df.copy()
d.drop(columns=["nameOrig", "nameDest"], inplace=True, errors="ignore")
for c in ['step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest']:
d[c] = pd.to_numeric(d.get(c, 0.0), errors='coerce').fillna(0.0)
if 'type' in d.columns:
type_dummies = pd.get_dummies(d['type'].astype(str), prefix="type")
d = pd.concat([d, type_dummies], axis=1)
else:
d['type'] = "UNKNOWN"
d = pd.concat([d, pd.get_dummies(d['type'], prefix="type")], axis=1)
d['orig_delta'] = d['oldbalanceOrg'] - d['newbalanceOrig'] - d['amount']
d['dest_delta'] = d['newbalanceDest'] - d['oldbalanceDest'] - d['amount']
d['logAmount'] = np.log1p(d['amount'].clip(lower=0))
d['origBalanceRatio'] = np.where(d['oldbalanceOrg'] != 0, d['amount'] / d['oldbalanceOrg'], 0.0)
d['destBalanceRatio'] = np.where(d['oldbalanceDest'] != 0, d['amount'] / d['oldbalanceDest'], 0.0)
d['origZeroBalance'] = (d['oldbalanceOrg'] == 0).astype(int)
d['destZeroBalance'] = (d['oldbalanceDest'] == 0).astype(int)
d['rule_orig_inconsistent'] = (d['orig_delta'].abs() > 1e-9).astype(int)
d['rule_dest_inconsistent'] = (d['dest_delta'].abs() > 1e-9).astype(int)
d['rule_zero_origin_drain'] = ((d['newbalanceOrig'].abs() <= 1e-9) &
(d['oldbalanceOrg'].sub(d['amount']).abs() <= 1e-9)).astype(int)
d['rule_zero_dest_firstload'] = ((d['oldbalanceDest'].abs() <= 1e-9) &
(d['newbalanceDest'].sub(d['amount']).abs() <= 1e-9)).astype(int)
return d
# --- File uploader ---
uploaded_file = st.file_uploader("πŸ“‚ Upload your transaction CSV file", type=["csv"])
if uploaded_file is not None:
with st.spinner("⏳ Downloading model and processing file..."):
# Load model lazily here
try:
model_path = hf_hub_download(
repo_id=REPO_ID,
filename=FILENAME,
repo_type="dataset"
)
model = joblib.load(model_path)
except Exception as e:
st.error(f"❌ Error loading model: {e}")
st.stop()
try:
df = pd.read_csv(uploaded_file)
st.subheader("πŸ“‹ Uploaded Data (First 5 rows)")
st.write(df.head())
df_processed = feature_engineering(df)
model_features = [
'step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest',
'orig_delta','dest_delta','logAmount',
'origBalanceRatio','destBalanceRatio',
'origZeroBalance','destZeroBalance',
'rule_orig_inconsistent','rule_dest_inconsistent',
'rule_zero_origin_drain','rule_zero_dest_firstload'
]
type_features = [c for c in df_processed.columns if c.startswith("type_")]
model_features.extend(type_features)
for f in model_features:
if f not in df_processed.columns:
df_processed[f] = 0.0
df_model = df_processed[model_features]
preds = model.predict(df_model)
df["isFraud"] = np.where(preds == 1, "YES", "NO")
st.subheader("βœ… Predictions")
st.dataframe(df[["step","type","amount","oldbalanceOrg","newbalanceOrig",
"oldbalanceDest","newbalanceDest","isFraud"]])
csv_download = df.to_csv(index=False).encode("utf-8")
st.download_button("⬇️ Download Predictions CSV", csv_download,
"fraud_predictions.csv", "text/csv")
st.success("πŸŽ‰ File processed successfully!")
except Exception as e:
st.error(f"❌ Error processing file: {e}")
else:
st.info("πŸ‘† Please upload a CSV file to start.")