Spaces:

Zishaan7
/

AI-Financial-Fraud-Detection

Sleeping

App Files Files Community

AI-Financial-Fraud-Detection / src /streamlit_app.py

Zishaan7

Update src/streamlit_app.py

90ca891 verified 6 months ago

raw

history blame contribute delete

4.36 kB

	import os
	import streamlit as st
	import pandas as pd
	import joblib
	import numpy as np
	from huggingface_hub import hf_hub_download

	# --- Hugging Face Cache ---
	os.environ["HF_HOME"] = "/tmp/hf_cache"
	os.makedirs("/tmp/hf_cache", exist_ok=True)

	REPO_ID = "Zishaan7/AI_financial_fraud_dataset"
	FILENAME = "xgb_fraud_model2.joblib"

	st.title("🚨 Financial Fraud Detection App (XGBoost)")

	# --- Feature Engineering ---
	def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
	d = df.copy()
	d.drop(columns=["nameOrig", "nameDest"], inplace=True, errors="ignore")

	for c in ['step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest']:
	d[c] = pd.to_numeric(d.get(c, 0.0), errors='coerce').fillna(0.0)

	if 'type' in d.columns:
	type_dummies = pd.get_dummies(d['type'].astype(str), prefix="type")
	d = pd.concat([d, type_dummies], axis=1)
	else:
	d['type'] = "UNKNOWN"
	d = pd.concat([d, pd.get_dummies(d['type'], prefix="type")], axis=1)

	d['orig_delta'] = d['oldbalanceOrg'] - d['newbalanceOrig'] - d['amount']
	d['dest_delta'] = d['newbalanceDest'] - d['oldbalanceDest'] - d['amount']
	d['logAmount'] = np.log1p(d['amount'].clip(lower=0))
	d['origBalanceRatio'] = np.where(d['oldbalanceOrg'] != 0, d['amount'] / d['oldbalanceOrg'], 0.0)
	d['destBalanceRatio'] = np.where(d['oldbalanceDest'] != 0, d['amount'] / d['oldbalanceDest'], 0.0)
	d['origZeroBalance'] = (d['oldbalanceOrg'] == 0).astype(int)
	d['destZeroBalance'] = (d['oldbalanceDest'] == 0).astype(int)
	d['rule_orig_inconsistent'] = (d['orig_delta'].abs() > 1e-9).astype(int)
	d['rule_dest_inconsistent'] = (d['dest_delta'].abs() > 1e-9).astype(int)
	d['rule_zero_origin_drain'] = ((d['newbalanceOrig'].abs() <= 1e-9) &
	(d['oldbalanceOrg'].sub(d['amount']).abs() <= 1e-9)).astype(int)
	d['rule_zero_dest_firstload'] = ((d['oldbalanceDest'].abs() <= 1e-9) &
	(d['newbalanceDest'].sub(d['amount']).abs() <= 1e-9)).astype(int)
	return d

	# --- File uploader ---
	uploaded_file = st.file_uploader("📂 Upload your transaction CSV file", type=["csv"])

	if uploaded_file is not None:
	with st.spinner("⏳ Downloading model and processing file..."):
	# Load model lazily here
	try:
	model_path = hf_hub_download(
	repo_id=REPO_ID,
	filename=FILENAME,
	repo_type="dataset"
	)
	model = joblib.load(model_path)
	except Exception as e:
	st.error(f"❌ Error loading model: {e}")
	st.stop()

	try:
	df = pd.read_csv(uploaded_file)
	st.subheader("📋 Uploaded Data (First 5 rows)")
	st.write(df.head())

	df_processed = feature_engineering(df)

	model_features = [
	'step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest',
	'orig_delta','dest_delta','logAmount',
	'origBalanceRatio','destBalanceRatio',
	'origZeroBalance','destZeroBalance',
	'rule_orig_inconsistent','rule_dest_inconsistent',
	'rule_zero_origin_drain','rule_zero_dest_firstload'
	]
	type_features = [c for c in df_processed.columns if c.startswith("type_")]
	model_features.extend(type_features)

	for f in model_features:
	if f not in df_processed.columns:
	df_processed[f] = 0.0

	df_model = df_processed[model_features]
	preds = model.predict(df_model)
	df["isFraud"] = np.where(preds == 1, "YES", "NO")

	st.subheader("✅ Predictions")
	st.dataframe(df[["step","type","amount","oldbalanceOrg","newbalanceOrig",
	"oldbalanceDest","newbalanceDest","isFraud"]])

	csv_download = df.to_csv(index=False).encode("utf-8")
	st.download_button("⬇️ Download Predictions CSV", csv_download,
	"fraud_predictions.csv", "text/csv")
	st.success("🎉 File processed successfully!")
	except Exception as e:
	st.error(f"❌ Error processing file: {e}")
	else:
	st.info("👆 Please upload a CSV file to start.")