import os
import streamlit as st
import pandas as pd
import joblib
import numpy as np
from huggingface_hub import hf_hub_download

# --- Hugging Face Cache ---
os.environ["HF_HOME"] = "/tmp/hf_cache"
os.makedirs("/tmp/hf_cache", exist_ok=True)

REPO_ID = "Zishaan7/AI_financial_fraud_dataset"
FILENAME = "xgb_fraud_model2.joblib"

st.title("🚨 Financial Fraud Detection App (XGBoost)")

# --- Feature Engineering ---
def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
    d = df.copy()
    d.drop(columns=["nameOrig", "nameDest"], inplace=True, errors="ignore")

    for c in ['step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest']:
        d[c] = pd.to_numeric(d.get(c, 0.0), errors='coerce').fillna(0.0)

    if 'type' in d.columns:
        type_dummies = pd.get_dummies(d['type'].astype(str), prefix="type")
        d = pd.concat([d, type_dummies], axis=1)
    else:
        d['type'] = "UNKNOWN"
        d = pd.concat([d, pd.get_dummies(d['type'], prefix="type")], axis=1)

    d['orig_delta'] = d['oldbalanceOrg'] - d['newbalanceOrig'] - d['amount']
    d['dest_delta'] = d['newbalanceDest'] - d['oldbalanceDest'] - d['amount']
    d['logAmount'] = np.log1p(d['amount'].clip(lower=0))
    d['origBalanceRatio'] = np.where(d['oldbalanceOrg'] != 0, d['amount'] / d['oldbalanceOrg'], 0.0)
    d['destBalanceRatio'] = np.where(d['oldbalanceDest'] != 0, d['amount'] / d['oldbalanceDest'], 0.0)
    d['origZeroBalance'] = (d['oldbalanceOrg'] == 0).astype(int)
    d['destZeroBalance'] = (d['oldbalanceDest'] == 0).astype(int)
    d['rule_orig_inconsistent'] = (d['orig_delta'].abs() > 1e-9).astype(int)
    d['rule_dest_inconsistent'] = (d['dest_delta'].abs() > 1e-9).astype(int)
    d['rule_zero_origin_drain'] = ((d['newbalanceOrig'].abs() <= 1e-9) &
                                   (d['oldbalanceOrg'].sub(d['amount']).abs() <= 1e-9)).astype(int)
    d['rule_zero_dest_firstload'] = ((d['oldbalanceDest'].abs() <= 1e-9) &
                                     (d['newbalanceDest'].sub(d['amount']).abs() <= 1e-9)).astype(int)
    return d

# --- File uploader ---
uploaded_file = st.file_uploader("📂 Upload your transaction CSV file", type=["csv"])

if uploaded_file is not None:
    with st.spinner("⏳ Downloading model and processing file..."):
        # Load model lazily here
        try:
            model_path = hf_hub_download(
                repo_id=REPO_ID,
                filename=FILENAME,
                repo_type="dataset"
            )
            model = joblib.load(model_path)
        except Exception as e:
            st.error(f"❌ Error loading model: {e}")
            st.stop()

        try:
            df = pd.read_csv(uploaded_file)
            st.subheader("📋 Uploaded Data (First 5 rows)")
            st.write(df.head())

            df_processed = feature_engineering(df)

            model_features = [
                'step','amount','oldbalanceOrg','newbalanceOrig','oldbalanceDest','newbalanceDest',
                'orig_delta','dest_delta','logAmount',
                'origBalanceRatio','destBalanceRatio',
                'origZeroBalance','destZeroBalance',
                'rule_orig_inconsistent','rule_dest_inconsistent',
                'rule_zero_origin_drain','rule_zero_dest_firstload'
            ]
            type_features = [c for c in df_processed.columns if c.startswith("type_")]
            model_features.extend(type_features)

            for f in model_features:
                if f not in df_processed.columns:
                    df_processed[f] = 0.0

            df_model = df_processed[model_features]
            preds = model.predict(df_model)
            df["isFraud"] = np.where(preds == 1, "YES", "NO")

            st.subheader("✅ Predictions")
            st.dataframe(df[["step","type","amount","oldbalanceOrg","newbalanceOrig",
                             "oldbalanceDest","newbalanceDest","isFraud"]])

            csv_download = df.to_csv(index=False).encode("utf-8")
            st.download_button("⬇️ Download Predictions CSV", csv_download,
                               "fraud_predictions.csv", "text/csv")
            st.success("🎉 File processed successfully!")
        except Exception as e:
            st.error(f"❌ Error processing file: {e}")
else:
    st.info("👆 Please upload a CSV file to start.")