import os import streamlit as st import pandas as pd import joblib import numpy as np from huggingface_hub import hf_hub_download # --- Hugging Face Cache --- os.environ["HF_HOME"] = "/tmp/hf_cache" os.makedirs("/tmp/hf_cache", exist_ok=True) REPO_ID = "Zishaan7/AI_financial_fraud_dataset" FILENAME = "xgb_fraud_model2.joblib" st.title("🚨 Financial Fraud Detection App (XGBoost)") # --- Feature Engineering --- def feature_engineering(df: pd.DataFrame) -> pd.DataFrame: d = df.copy() d.drop(columns=["nameOrig", "nameDest"], inplace=True, errors="ignore") for c in ['step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest']: d[c] = pd.to_numeric(d.get(c, 0.0), errors='coerce').fillna(0.0) if 'type' in d.columns: type_dummies = pd.get_dummies(d['type'].astype(str), prefix="type") d = pd.concat([d, type_dummies], axis=1) else: d['type'] = "UNKNOWN" d = pd.concat([d, pd.get_dummies(d['type'], prefix="type")], axis=1) d['orig_delta'] = d['oldbalanceOrg'] - d['newbalanceOrig'] - d['amount'] d['dest_delta'] = d['newbalanceDest'] - d['oldbalanceDest'] - d['amount'] d['logAmount'] = np.log1p(d['amount'].clip(lower=0)) d['origBalanceRatio'] = np.where(d['oldbalanceOrg'] != 0, d['amount'] / d['oldbalanceOrg'], 0.0) d['destBalanceRatio'] = np.where(d['oldbalanceDest'] != 0, d['amount'] / d['oldbalanceDest'], 0.0) d['origZeroBalance'] = (d['oldbalanceOrg'] == 0).astype(int) d['destZeroBalance'] = (d['oldbalanceDest'] == 0).astype(int) d['rule_orig_inconsistent'] = (d['orig_delta'].abs() > 1e-9).astype(int) d['rule_dest_inconsistent'] = (d['dest_delta'].abs() > 1e-9).astype(int) d['rule_zero_origin_drain'] = ((d['newbalanceOrig'].abs() <= 1e-9) & (d['oldbalanceOrg'].sub(d['amount']).abs() <= 1e-9)).astype(int) d['rule_zero_dest_firstload'] = ((d['oldbalanceDest'].abs() <= 1e-9) & (d['newbalanceDest'].sub(d['amount']).abs() <= 1e-9)).astype(int) return d # --- File uploader --- uploaded_file = st.file_uploader("📂 Upload your transaction CSV file", type=["csv"]) if uploaded_file is not None: with st.spinner("⏳ Downloading model and processing file..."): # Load model lazily here try: model_path = hf_hub_download( repo_id=REPO_ID, filename=FILENAME, repo_type="dataset" ) model = joblib.load(model_path) except Exception as e: st.error(f"❌ Error loading model: {e}") st.stop() try: df = pd.read_csv(uploaded_file) st.subheader("📋 Uploaded Data (First 5 rows)") st.write(df.head()) df_processed = feature_engineering(df) model_features = [ 'step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest', 'orig_delta','dest_delta','logAmount', 'origBalanceRatio','destBalanceRatio', 'origZeroBalance','destZeroBalance', 'rule_orig_inconsistent','rule_dest_inconsistent', 'rule_zero_origin_drain','rule_zero_dest_firstload' ] type_features = [c for c in df_processed.columns if c.startswith("type_")] model_features.extend(type_features) for f in model_features: if f not in df_processed.columns: df_processed[f] = 0.0 df_model = df_processed[model_features] preds = model.predict(df_model) df["isFraud"] = np.where(preds == 1, "YES", "NO") st.subheader("✅ Predictions") st.dataframe(df[["step","type","amount","oldbalanceOrg","newbalanceOrig", "oldbalanceDest","newbalanceDest","isFraud"]]) csv_download = df.to_csv(index=False).encode("utf-8") st.download_button("⬇️ Download Predictions CSV", csv_download, "fraud_predictions.csv", "text/csv") st.success("🎉 File processed successfully!") except Exception as e: st.error(f"❌ Error processing file: {e}") else: st.info("👆 Please upload a CSV file to start.")