import streamlit as st
import pandas as pd
import numpy as np
import joblib

from huggingface_hub import hf_hub_download
from datasets import load_dataset

# -----------------------------
# CONFIG (edit if needed)
# -----------------------------
HF_MODEL_REPO = "nansri/wellness-purchase-predictor"
MODEL_FILENAME = "best_model.joblib"

# Used only to build dropdown options + default values
HF_DATASET_REPO = "nansri/visit-with-us-wellness"
TRAIN_FILE = "processed/train.csv"

st.set_page_config(page_title="Wellness Package Predictor", layout="centered")

# -----------------------------
# Load model from HF hub
# -----------------------------
@st.cache_resource
def load_model():
    model_path = hf_hub_download(repo_id=HF_MODEL_REPO, filename=MODEL_FILENAME, repo_type="model")
    return joblib.load(model_path)

# -----------------------------
# Load metadata (optional but helpful)
# -----------------------------
@st.cache_data
def load_train_metadata():
    ds = load_dataset(HF_DATASET_REPO, data_files={"train": TRAIN_FILE})
    train_df = ds["train"].to_pandas()
    # feature columns (exclude target)
    feature_cols = [c for c in train_df.columns if c != "ProdTaken"]
    # identify numeric vs categorical
    num_cols = train_df[feature_cols].select_dtypes(include=np.number).columns.tolist()
    cat_cols = [c for c in feature_cols if c not in num_cols]

    # defaults
    medians = {c: float(train_df[c].median()) for c in num_cols}
    modes = {c: str(train_df[c].mode(dropna=True).iloc[0]) if train_df[c].notna().any() else "" for c in cat_cols}
    # categories for dropdown
    categories = {c: sorted([str(x) for x in train_df[c].dropna().unique().tolist()]) for c in cat_cols}

    return feature_cols, num_cols, cat_cols, medians, modes, categories

model = load_model()
feature_cols, num_cols, cat_cols, medians, modes, categories = load_train_metadata()

# -----------------------------
# UI
# -----------------------------
st.title("Wellness Tourism Package Purchase Predictor")
st.write("Enter customer details to predict likelihood of purchasing the Wellness Tourism Package.")

inputs = {}

with st.form("input_form"):
    st.subheader("Customer & Interaction Inputs")

    # Numeric inputs
    st.markdown("**Numeric Features**")
    for col in num_cols:
        default_val = medians.get(col, 0.0)
        # int-like columns can still be float in data; allow float entry safely
        inputs[col] = st.number_input(col, value=float(default_val))

    # Categorical inputs
    st.markdown("**Categorical Features**")
    for col in cat_cols:
        opts = categories.get(col, [])
        default_opt = modes.get(col, opts[0] if opts else "")
        if default_opt not in opts and opts:
            default_opt = opts[0]
        if opts:
            inputs[col] = st.selectbox(col, options=opts, index=opts.index(default_opt))
        else:
            inputs[col] = st.text_input(col, value=default_opt)

    submitted = st.form_submit_button("Predict")

if submitted:
    # Create dataframe from inputs (rubric requirement)
    input_df = pd.DataFrame([inputs], columns=feature_cols)
    st.write("### Input DataFrame")
    st.dataframe(input_df)

    # Predict
    try:
        proba = model.predict_proba(input_df)[:, 1][0]
        pred = int(proba >= 0.5)
        st.success(f"Prediction (ProdTaken): {pred}  |  Purchase Probability: {proba:.3f}")
    except Exception:
        pred = int(model.predict(input_df)[0])
        st.success(f"Prediction (ProdTaken): {pred}")