# Importing packages
import streamlit as st
import pandas as pd
import numpy as np
from huggingface_hub import hf_hub_download
import joblib
import io

# App title and description
st.set_page_config(
    page_title="Wellness Tourism Prediction",
    page_icon="🏖️",
    layout="wide"
)

st.title("Wellness Tourism Prediction App")
st.markdown("""
This application predicts whether a customer is likely to purchase a wellness tourism package
based on their demographic, behavioral, and engagement data.
""")

# Sidebar 
with st.sidebar:
    st.header("About This Model")
    st.markdown("""
    **Model Details:**
    - Algorithm: XGBoost Classifier (pipeline with preprocessing)
    - Trained on: Wellness Tourism Dataset
    - Target: Product Taken (1 = Purchased, 0 = Not Purchased)
    **Key Features:**
    - Handles class imbalance with scale_pos_weight
    - Uses preprocessing pipeline (scaling + encoding)
    - Optimized for ROC-AUC score
    """)

    st.subheader("Model Performance")
    st.metric("ROC AUC", "0.9683")
    st.metric("Precision (Class 1)", "0.867")
    st.metric("Recall (Class 1)", "0.818")

# Load Model 
MODEL_REPO_ID = "simnid/wellness-tourism-model"
MODEL_FILENAME = "best_wellness_tourism_model.joblib"

@st.cache_resource
def load_model():
    try:
        model_path = hf_hub_download(
            repo_id=MODEL_REPO_ID,
            filename=MODEL_FILENAME,
            repo_type="model"
        )
        return joblib.load(model_path)
    except Exception as e:
        st.error(f"Error loading model: {e}")
        return None

model = load_model()
if model is None:
    st.warning("Model could not be loaded.")
    st.stop()

# --- Customer Input ---
st.header("Customer Information")
col1, col2, col3 = st.columns(3)

with col1:
    st.subheader("Demographics")
    Age = st.number_input("Age", 18, 80, 35, 1)
    Gender = st.selectbox("Gender", ["Male", "Female"])
    MaritalStatus = st.selectbox("Marital Status", ["Single", "Married", "Divorced", "Unmarried"])
    NumberOfChildrenVisiting = st.number_input("Number of Children Visiting", 0, 5, 0)
    Designation = st.selectbox("Designation", ["Executive", "Manager", "Senior Manager", "AVP", "VP"])

with col2:
    st.subheader("Travel Preferences")
    CityTier = st.selectbox("City Tier", [1, 2, 3])
    PreferredPropertyStar = st.selectbox("Preferred Property Star Rating", [3, 4, 5])
    Passport = st.selectbox("Has Passport", [0, 1], format_func=lambda x: "No" if x == 0 else "Yes")
    OwnCar = st.selectbox("Owns Car", [0, 1], format_func=lambda x: "No" if x == 0 else "Yes")
    NumberOfTrips = st.number_input("Number of Previous Trips", 0, 20, 2)

with col3:
    st.subheader("Engagement Details")
    TypeofContact = st.selectbox("Type of Contact", ["Self Enquiry", "Company Invited"])
    DurationOfPitch = st.number_input("Duration of Pitch (minutes)", 0.0, 60.0, 15.0, 0.5)
    NumberOfPersonVisiting = st.number_input("Number of People Visiting", 1, 10, 2)
    NumberOfFollowups = st.number_input("Number of Follow-ups", 0, 10, 3)
    ProductPitched = st.selectbox("Product Pitched", ["Basic", "Deluxe", "Standard", "Super Deluxe", "King"])
    PitchSatisfactionScore = st.slider("Pitch Satisfaction Score", 0.0, 5.0, 3.0, 0.1)

# Financial Information 
st.subheader("Financial Information")
col4, col5 = st.columns(2)
with col4:
    Occupation = st.selectbox("Occupation", ["Salaried", "Small Business", "Large Business", "Free Lancer"])
    MonthlyIncome = st.number_input("Monthly Income ($)", 1000, 1000000, 15000, 500)

with col5:
    PitchEfficiency = DurationOfPitch * PitchSatisfactionScore
    st.metric("Calculated Pitch Efficiency", f"{PitchEfficiency:.2f}")

# Assemble Input 
input_data = pd.DataFrame([{
    'Age': Age,
    'TypeofContact': TypeofContact,
    'CityTier': CityTier,
    'DurationOfPitch': DurationOfPitch,
    'Occupation': Occupation,
    'Gender': Gender,
    'NumberOfPersonVisiting': NumberOfPersonVisiting,
    'NumberOfFollowups': NumberOfFollowups,
    'ProductPitched': ProductPitched,
    'PreferredPropertyStar': PreferredPropertyStar,
    'MaritalStatus': MaritalStatus,
    'NumberOfTrips': NumberOfTrips,
    'Passport': Passport,
    'PitchSatisfactionScore': PitchSatisfactionScore,
    'OwnCar': OwnCar,
    'NumberOfChildrenVisiting': NumberOfChildrenVisiting,
    'Designation': Designation,
    'MonthlyIncome': MonthlyIncome,
    'PitchEfficiency': PitchEfficiency
}])

with st.expander("View Input Data"):
    st.dataframe(input_data)
    csv = input_data.to_csv(index=False).encode('utf-8')
    st.download_button("Download Input Data", csv, "input_data.csv", "text/csv")

# Prediction 
st.header("Prediction")
if st.button("Predict Purchase Probability", type="primary", use_container_width=True):
    with st.spinner("Making prediction..."):
        try:
            prediction_proba = model.predict_proba(input_data)[0]
            prediction_class = model.predict(input_data)[0]

            col_result1, col_result2 = st.columns(2)
            with col_result1:
                st.subheader("Prediction Result")
                if prediction_class == 1:
                    st.success("Customer is LIKELY to purchase")
                    st.balloons()
                else:
                    st.info("Customer is UNLIKELY to purchase")
            with col_result2:
                st.subheader("Probability Scores")
                st.metric("Probability of Purchase", f"{prediction_proba[1]*100:.1f}%")
                st.metric("Probability of No Purchase", f"{prediction_proba[0]*100:.1f}%")
                st.progress(int(prediction_proba[1]*100))

        except Exception as e:
            st.error(f"Error making prediction: {e}")

# Bulk CSV Prediction 
st.header("Bulk CSV Prediction")
BULK_TEST_FILENAME = "bulk_test_sample.csv"

@st.cache_resource
def load_bulk_sample():
    try:
        path = hf_hub_download(
            repo_id="simnid/wellness-tourism-dataset",
            filename=BULK_TEST_FILENAME,
            repo_type="dataset"
        )
        return pd.read_csv(path)
    except Exception as e:
        st.warning(f"Could not load bulk CSV: {e}")
        return None

bulk_sample = load_bulk_sample()
uploaded_file = st.file_uploader("Upload your CSV for bulk prediction", type=["csv"])
if uploaded_file:
    bulk_sample = pd.read_csv(uploaded_file)

if bulk_sample is not None:
    st.write("Bulk data preview:")
    st.dataframe(bulk_sample.head())
    if st.button("Predict Bulk Probabilities"):
        with st.spinner("Predicting..."):
            try:
                preds_proba = model.predict_proba(bulk_sample)
                preds_class = model.predict(bulk_sample)
                bulk_sample['Probability_Purchase'] = preds_proba[:,1]
                bulk_sample['Prediction'] = preds_class
                st.dataframe(bulk_sample)
                csv_bulk = bulk_sample.to_csv(index=False).encode('utf-8')
                st.download_button("Download Bulk Predictions", csv_bulk, "bulk_predictions.csv", "text/csv")
            except Exception as e:
                st.error(f"Error predicting bulk data: {e}")

# Footer 
st.markdown("---")
st.caption("Wellness Tourism Prediction Model | Built with XGBoost & Streamlit")