Spaces:
Sleeping
Sleeping
| # Importing packages | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from huggingface_hub import hf_hub_download | |
| import joblib | |
| import io | |
| # App title and description | |
| st.set_page_config( | |
| page_title="Wellness Tourism Prediction", | |
| page_icon="🏖️", | |
| layout="wide" | |
| ) | |
| st.title("Wellness Tourism Prediction App") | |
| st.markdown(""" | |
| This application predicts whether a customer is likely to purchase a wellness tourism package | |
| based on their demographic, behavioral, and engagement data. | |
| """) | |
| # Sidebar | |
| with st.sidebar: | |
| st.header("About This Model") | |
| st.markdown(""" | |
| **Model Details:** | |
| - Algorithm: XGBoost Classifier (pipeline with preprocessing) | |
| - Trained on: Wellness Tourism Dataset | |
| - Target: Product Taken (1 = Purchased, 0 = Not Purchased) | |
| **Key Features:** | |
| - Handles class imbalance with scale_pos_weight | |
| - Uses preprocessing pipeline (scaling + encoding) | |
| - Optimized for ROC-AUC score | |
| """) | |
| st.subheader("Model Performance") | |
| st.metric("ROC AUC", "0.9683") | |
| st.metric("Precision (Class 1)", "0.867") | |
| st.metric("Recall (Class 1)", "0.818") | |
| # Load Model | |
| MODEL_REPO_ID = "simnid/wellness-tourism-model" | |
| MODEL_FILENAME = "best_wellness_tourism_model.joblib" | |
| def load_model(): | |
| try: | |
| model_path = hf_hub_download( | |
| repo_id=MODEL_REPO_ID, | |
| filename=MODEL_FILENAME, | |
| repo_type="model" | |
| ) | |
| return joblib.load(model_path) | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| return None | |
| model = load_model() | |
| if model is None: | |
| st.warning("Model could not be loaded.") | |
| st.stop() | |
| # --- Customer Input --- | |
| st.header("Customer Information") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.subheader("Demographics") | |
| Age = st.number_input("Age", 18, 80, 35, 1) | |
| Gender = st.selectbox("Gender", ["Male", "Female"]) | |
| MaritalStatus = st.selectbox("Marital Status", ["Single", "Married", "Divorced", "Unmarried"]) | |
| NumberOfChildrenVisiting = st.number_input("Number of Children Visiting", 0, 5, 0) | |
| Designation = st.selectbox("Designation", ["Executive", "Manager", "Senior Manager", "AVP", "VP"]) | |
| with col2: | |
| st.subheader("Travel Preferences") | |
| CityTier = st.selectbox("City Tier", [1, 2, 3]) | |
| PreferredPropertyStar = st.selectbox("Preferred Property Star Rating", [3, 4, 5]) | |
| Passport = st.selectbox("Has Passport", [0, 1], format_func=lambda x: "No" if x == 0 else "Yes") | |
| OwnCar = st.selectbox("Owns Car", [0, 1], format_func=lambda x: "No" if x == 0 else "Yes") | |
| NumberOfTrips = st.number_input("Number of Previous Trips", 0, 20, 2) | |
| with col3: | |
| st.subheader("Engagement Details") | |
| TypeofContact = st.selectbox("Type of Contact", ["Self Enquiry", "Company Invited"]) | |
| DurationOfPitch = st.number_input("Duration of Pitch (minutes)", 0.0, 60.0, 15.0, 0.5) | |
| NumberOfPersonVisiting = st.number_input("Number of People Visiting", 1, 10, 2) | |
| NumberOfFollowups = st.number_input("Number of Follow-ups", 0, 10, 3) | |
| ProductPitched = st.selectbox("Product Pitched", ["Basic", "Deluxe", "Standard", "Super Deluxe", "King"]) | |
| PitchSatisfactionScore = st.slider("Pitch Satisfaction Score", 0.0, 5.0, 3.0, 0.1) | |
| # Financial Information | |
| st.subheader("Financial Information") | |
| col4, col5 = st.columns(2) | |
| with col4: | |
| Occupation = st.selectbox("Occupation", ["Salaried", "Small Business", "Large Business", "Free Lancer"]) | |
| MonthlyIncome = st.number_input("Monthly Income ($)", 1000, 1000000, 15000, 500) | |
| with col5: | |
| PitchEfficiency = DurationOfPitch * PitchSatisfactionScore | |
| st.metric("Calculated Pitch Efficiency", f"{PitchEfficiency:.2f}") | |
| # Assemble Input | |
| input_data = pd.DataFrame([{ | |
| 'Age': Age, | |
| 'TypeofContact': TypeofContact, | |
| 'CityTier': CityTier, | |
| 'DurationOfPitch': DurationOfPitch, | |
| 'Occupation': Occupation, | |
| 'Gender': Gender, | |
| 'NumberOfPersonVisiting': NumberOfPersonVisiting, | |
| 'NumberOfFollowups': NumberOfFollowups, | |
| 'ProductPitched': ProductPitched, | |
| 'PreferredPropertyStar': PreferredPropertyStar, | |
| 'MaritalStatus': MaritalStatus, | |
| 'NumberOfTrips': NumberOfTrips, | |
| 'Passport': Passport, | |
| 'PitchSatisfactionScore': PitchSatisfactionScore, | |
| 'OwnCar': OwnCar, | |
| 'NumberOfChildrenVisiting': NumberOfChildrenVisiting, | |
| 'Designation': Designation, | |
| 'MonthlyIncome': MonthlyIncome, | |
| 'PitchEfficiency': PitchEfficiency | |
| }]) | |
| with st.expander("View Input Data"): | |
| st.dataframe(input_data) | |
| csv = input_data.to_csv(index=False).encode('utf-8') | |
| st.download_button("Download Input Data", csv, "input_data.csv", "text/csv") | |
| # Prediction | |
| st.header("Prediction") | |
| if st.button("Predict Purchase Probability", type="primary", use_container_width=True): | |
| with st.spinner("Making prediction..."): | |
| try: | |
| prediction_proba = model.predict_proba(input_data)[0] | |
| prediction_class = model.predict(input_data)[0] | |
| col_result1, col_result2 = st.columns(2) | |
| with col_result1: | |
| st.subheader("Prediction Result") | |
| if prediction_class == 1: | |
| st.success("Customer is LIKELY to purchase") | |
| st.balloons() | |
| else: | |
| st.info("Customer is UNLIKELY to purchase") | |
| with col_result2: | |
| st.subheader("Probability Scores") | |
| st.metric("Probability of Purchase", f"{prediction_proba[1]*100:.1f}%") | |
| st.metric("Probability of No Purchase", f"{prediction_proba[0]*100:.1f}%") | |
| st.progress(int(prediction_proba[1]*100)) | |
| except Exception as e: | |
| st.error(f"Error making prediction: {e}") | |
| # Bulk CSV Prediction | |
| st.header("Bulk CSV Prediction") | |
| BULK_TEST_FILENAME = "bulk_test_sample.csv" | |
| def load_bulk_sample(): | |
| try: | |
| path = hf_hub_download( | |
| repo_id="simnid/wellness-tourism-dataset", | |
| filename=BULK_TEST_FILENAME, | |
| repo_type="dataset" | |
| ) | |
| return pd.read_csv(path) | |
| except Exception as e: | |
| st.warning(f"Could not load bulk CSV: {e}") | |
| return None | |
| bulk_sample = load_bulk_sample() | |
| uploaded_file = st.file_uploader("Upload your CSV for bulk prediction", type=["csv"]) | |
| if uploaded_file: | |
| bulk_sample = pd.read_csv(uploaded_file) | |
| if bulk_sample is not None: | |
| st.write("Bulk data preview:") | |
| st.dataframe(bulk_sample.head()) | |
| if st.button("Predict Bulk Probabilities"): | |
| with st.spinner("Predicting..."): | |
| try: | |
| preds_proba = model.predict_proba(bulk_sample) | |
| preds_class = model.predict(bulk_sample) | |
| bulk_sample['Probability_Purchase'] = preds_proba[:,1] | |
| bulk_sample['Prediction'] = preds_class | |
| st.dataframe(bulk_sample) | |
| csv_bulk = bulk_sample.to_csv(index=False).encode('utf-8') | |
| st.download_button("Download Bulk Predictions", csv_bulk, "bulk_predictions.csv", "text/csv") | |
| except Exception as e: | |
| st.error(f"Error predicting bulk data: {e}") | |
| # Footer | |
| st.markdown("---") | |
| st.caption("Wellness Tourism Prediction Model | Built with XGBoost & Streamlit") | |