""" Wellness Tourism Package Prediction App Production-grade Streamlit application for predicting customer purchase likelihood """ import streamlit as st import pandas as pd import numpy as np import joblib from huggingface_hub import hf_hub_download import plotly.graph_objects as go import plotly.express as px from datetime import datetime import os # Page configuration st.set_page_config( page_title="Wellness Tourism Predictor", page_icon="✈️", layout="wide", initial_sidebar_state="expanded" ) # Custom CSS for better UI st.markdown(""" """, unsafe_allow_html=True) @st.cache_resource def load_model(): """ Load the trained model from Hugging Face Hub Uses caching to avoid reloading on every interaction """ try: model_path = hf_hub_download( repo_id="TheHumanAgent/tour_pkg_pred_model", filename="final_tour_pkg_pred_model_v1.joblib", repo_type="model" ) model = joblib.load(model_path) return model except Exception as e: st.error(f"Error loading model: {str(e)}") st.error("Please ensure the model is uploaded to Hugging Face Hub") st.stop() def create_input_features(): """ Create input form for all features required by the model Returns a dictionary with user inputs based on actual data ranges """ st.sidebar.header("📋 Customer Information") # Initialize session state for form if 'prediction_made' not in st.session_state: st.session_state.prediction_made = False with st.sidebar: st.subheader("👤 Personal Details") # Age: Range from 18-61 based on data age = st.slider("Age", min_value=18, max_value=61, value=36, # median help="Customer's age (18-61 years)") # Gender: Male, Female, Fe Male (as seen in data) gender = st.selectbox("Gender", ["Female", "Male", "Fe Male"], help="Customer's gender") # MaritalStatus: Single, Married, Divorced, Unmarried marital_status = st.selectbox("Marital Status", ["Single", "Divorced", "Married", "Unmarried"], help="Customer's marital status") # CityTier: 1, 2, 3 city_tier = st.selectbox("City Tier", [1, 2, 3], index=0, # median is 1 help="City development level (1=Most developed, 3=Least developed)") st.markdown("---") st.subheader("💼 Professional Details") # Occupation: Salaried, Small Business, Large Business, Free Lancer occupation = st.selectbox("Occupation", ["Salaried", "Free Lancer", "Small Business", "Large Business"], help="Customer's occupation type") # Designation: Executive, Manager, Senior Manager, AVP, VP designation = st.selectbox("Designation", ["Manager", "Executive", "Senior Manager", "AVP", "VP"], help="Customer's job designation") # MonthlyIncome: Range from 1000 to 98678 monthly_income = st.number_input("Monthly Income (₹)", min_value=1000, max_value=100000, value=22418, # median step=1000, help="Gross monthly income in Rupees (₹1,000 - ₹98,678)") st.markdown("---") st.subheader("✈️ Travel Preferences") # NumberOfTrips: Range from 1-22 num_trips = st.slider("Number of Trips (Annually)", min_value=1, max_value=22, value=3, # median help="Average annual trips taken (1-22)") # Passport: 0 or 1 passport = st.selectbox("Valid Passport", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No", index=0, # median is 0 help="Does customer have a valid passport?") # OwnCar: 0 or 1 own_car = st.selectbox("Own Car", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No", index=1, # median is 1 help="Does customer own a car?") # PreferredPropertyStar: 3, 4, 5 preferred_property_star = st.selectbox("Preferred Hotel Rating", [3, 4, 5], index=0, # median is 3 help="Preferred hotel star rating (3-5 stars)") st.markdown("---") st.subheader("👨‍👩‍👧‍👦 Trip Details") # NumberOfPersonVisiting: Range from 1-5 num_persons = st.slider("Number of Persons Visiting", min_value=1, max_value=5, value=3, # median help="Total people in the group (1-5)") # NumberOfChildrenVisiting: Range from 0-3 num_children = st.slider("Number of Children (<5 years)", min_value=0, max_value=3, value=1, # median help="Number of children under 5 years (0-3)") st.markdown("---") st.subheader("📞 Interaction Details") # TypeofContact: Company Invited, Self Enquiry type_of_contact = st.selectbox("Type of Contact", ["Self Enquiry", "Company Invited"], help="How was the customer contacted?") # ProductPitched: Basic, Standard, Deluxe, Super Deluxe, King product_pitched = st.selectbox("Product Pitched", ["Deluxe", "Basic", "Standard", "Super Deluxe", "King"], help="Type of package pitched to the customer") # DurationOfPitch: Range from 5-127 minutes duration_of_pitch = st.slider("Duration of Pitch (minutes)", min_value=5, max_value=127, value=14, # median help="Sales pitch duration in minutes (5-127)") # NumberOfFollowups: Range from 1-6 num_followups = st.slider("Number of Follow-ups", min_value=1, max_value=6, value=4, # median help="Total follow-ups after initial pitch (1-6)") # PitchSatisfactionScore: Range from 1-5 pitch_satisfaction = st.slider("Pitch Satisfaction Score", min_value=1, max_value=5, value=3, # median help="Customer satisfaction with the pitch (1=Very Low, 5=Very High)") # Create feature dictionary matching exact column names from training data features = { 'Age': age, 'CityTier': city_tier, 'DurationOfPitch': duration_of_pitch, 'NumberOfPersonVisiting': num_persons, 'NumberOfFollowups': num_followups, 'PreferredPropertyStar': preferred_property_star, 'NumberOfTrips': num_trips, 'Passport': passport, 'PitchSatisfactionScore': pitch_satisfaction, 'NumberOfChildrenVisiting': num_children, 'MonthlyIncome': monthly_income, 'TypeofContact': type_of_contact, 'Occupation': occupation, 'Gender': gender, 'OwnCar': own_car, 'ProductPitched': product_pitched, 'MaritalStatus': marital_status, 'Designation': designation } return features def create_gauge_chart(probability): """ Create a gauge chart to visualize purchase probability """ fig = go.Figure(go.Indicator( mode = "gauge+number+delta", value = probability * 100, domain = {'x': [0, 1], 'y': [0, 1]}, title = {'text': "Purchase Probability (%)", 'font': {'size': 24}}, delta = {'reference': 45, 'increasing': {'color': "green"}}, gauge = { 'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"}, 'bar': {'color': "darkblue"}, 'bgcolor': "white", 'borderwidth': 2, 'bordercolor': "gray", 'steps': [ {'range': [0, 30], 'color': '#ffcccc'}, {'range': [30, 70], 'color': '#ffffcc'}, {'range': [70, 100], 'color': '#ccffcc'} ], 'threshold': { 'line': {'color': "red", 'width': 4}, 'thickness': 0.75, 'value': 45 } } )) fig.update_layout( height=300, margin=dict(l=20, r=20, t=50, b=20) ) return fig def create_feature_importance_chart(features_df): """ Create a bar chart showing key customer metrics """ # Select key features for visualization key_features = { 'Monthly Income (₹K)': features_df['MonthlyIncome'].values[0] / 1000, 'Age': features_df['Age'].values[0], 'Annual Trips': features_df['NumberOfTrips'].values[0], 'Pitch Duration (min)': features_df['DurationOfPitch'].values[0], 'Follow-ups': features_df['NumberOfFollowups'].values[0], 'Satisfaction': features_df['PitchSatisfactionScore'].values[0], 'Hotel Rating': features_df['PreferredPropertyStar'].values[0], 'Group Size': features_df['NumberOfPersonVisiting'].values[0] } fig = px.bar( x=list(key_features.values()), y=list(key_features.keys()), orientation='h', title='Key Customer Metrics Overview', labels={'x': 'Value', 'y': 'Feature'}, color=list(key_features.values()), color_continuous_scale='Blues' ) fig.update_layout( height=400, showlegend=False, margin=dict(l=20, r=20, t=50, b=20) ) return fig def get_recommendation(probability, features): """ Generate actionable recommendations based on prediction and customer profile """ recommendations = [] # Priority level based on probability if probability >= 0.7: recommendations.append("✅ **HIGH PRIORITY LEAD** - Strong purchase likelihood") recommendations.append("🎯 **Action**: Schedule immediate follow-up call within 24 hours") recommendations.append("💎 **Strategy**: Offer premium package options and exclusive benefits") elif probability >= 0.45: recommendations.append("⚠️ **MEDIUM PRIORITY LEAD** - Moderate purchase likelihood") recommendations.append("📧 **Action**: Send personalized email highlighting package benefits") recommendations.append("🎁 **Strategy**: Consider offering limited-time discount (5-10%)") else: recommendations.append("❌ **LOW PRIORITY LEAD** - Lower purchase likelihood") recommendations.append("📬 **Action**: Add to nurture email campaign") recommendations.append("🔄 **Strategy**: Re-engage after 2-3 months with seasonal offers") recommendations.append("") # Spacing # Additional contextual recommendations based on specific features if features['NumberOfFollowups'] <= 2: recommendations.append("📌 **Insight**: Low follow-up count - Increase engagement frequency") if features['PitchSatisfactionScore'] <= 2: recommendations.append("⚠️ **Alert**: Low satisfaction score - Review and improve pitch approach") elif features['PitchSatisfactionScore'] >= 4: recommendations.append("⭐ **Positive**: High satisfaction - Customer is engaged, act quickly!") if features['MonthlyIncome'] >= 30000: recommendations.append("💰 **Insight**: High-income customer - Emphasize luxury and premium features") if features['NumberOfTrips'] >= 5: recommendations.append("✈️ **Insight**: Frequent traveler - Highlight loyalty benefits and travel perks") if features['Passport'] == 0: recommendations.append("🛂 **Note**: No passport - Consider domestic package options") if features['NumberOfChildrenVisiting'] >= 2: recommendations.append("👨‍👩‍👧‍👦 **Insight**: Family with children - Emphasize family-friendly amenities") if features['DurationOfPitch'] < 10: recommendations.append("⏱️ **Note**: Short pitch duration - May need more detailed product information") return recommendations def display_customer_summary(features): """ Display a formatted summary of customer information """ col1, col2, col3, col4 = st.columns(4) with col1: st.metric("👤 Age", f"{features['Age']} years") st.metric("🏙️ City Tier", f"Tier {features['CityTier']}") with col2: st.metric("💰 Income", f"₹{features['MonthlyIncome']:,}") st.metric("✈️ Annual Trips", features['NumberOfTrips']) with col3: st.metric("📞 Follow-ups", features['NumberOfFollowups']) st.metric("⭐ Satisfaction", f"{features['PitchSatisfactionScore']}/5") with col4: st.metric("👥 Group Size", features['NumberOfPersonVisiting']) st.metric("🏨 Hotel Pref", f"{features['PreferredPropertyStar']} Star") def main(): """ Main application function """ # Header st.markdown('

✈️ Wellness Tourism Package Predictor

', unsafe_allow_html=True) st.markdown('

AI-Powered Customer Purchase Prediction System

', unsafe_allow_html=True) # Load model with st.spinner("🔄 Loading ML model..."): model = load_model() st.success("✅ Model loaded successfully!") # Create input form features = create_input_features() # Main content area st.markdown("---") st.subheader("📊 Customer Profile Summary") display_customer_summary(features) # Show detailed information in expandable section with st.expander("📋 View Complete Customer Details"): df_display = pd.DataFrame([features]).T df_display.columns = ['Value'] st.dataframe(df_display, use_container_width=True, height=600) st.markdown("---") # Prediction section col_left, col_right = st.columns([2, 1]) with col_right: st.subheader("🎯 Make Prediction") predict_button = st.button("🔮 Predict Purchase Likelihood", type="primary", use_container_width=True) if st.button("🔄 Reset", use_container_width=True): st.session_state.prediction_made = False st.rerun() with col_left: if predict_button: with st.spinner("🤖 Analyzing customer data..."): # Create DataFrame with exact feature order input_df = pd.DataFrame([features]) # Make prediction try: prediction_proba = model.predict_proba(input_df)[0, 1] prediction = 1 if prediction_proba >= 0.45 else 0 # Store in session state st.session_state.prediction_made = True st.session_state.prediction = prediction st.session_state.probability = prediction_proba st.session_state.features = features st.session_state.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S") except Exception as e: st.error(f"❌ Prediction Error: {str(e)}") st.error("Please check that all input values are valid.") st.stop() # Display prediction results if st.session_state.prediction_made: st.markdown("---") st.subheader("📈 Prediction Results") prediction = st.session_state.prediction probability = st.session_state.probability # Prediction box with color coding if prediction == 1: st.markdown(f"""

✅ LIKELY TO PURCHASE
{probability*100:.1f}%
Confidence Level

""", unsafe_allow_html=True) else: st.markdown(f"""

❌ UNLIKELY TO PURCHASE
{(1-probability)*100:.1f}%
Confidence Level (Not Buying)

""", unsafe_allow_html=True) # Visualization section st.markdown("---") st.subheader("📊 Visual Analysis") viz_col1, viz_col2 = st.columns([1, 1]) with viz_col1: st.plotly_chart(create_gauge_chart(probability), use_container_width=True) with viz_col2: input_df = pd.DataFrame([st.session_state.features]) st.plotly_chart(create_feature_importance_chart(input_df), use_container_width=True) # Recommendations section st.markdown("---") st.subheader("💡 Actionable Recommendations") recommendations = get_recommendation(probability, st.session_state.features) for rec in recommendations: if rec: # Skip empty strings st.markdown(f"{rec}") # Model explanation with st.expander("🤔 How does the model work?"): st.markdown(""" **Model Details:** - **Algorithm**: XGBoost (Extreme Gradient Boosting) - **Classification Threshold**: 45% - **Training Data**: 4,128 customer records - **Features**: 18 input variables including demographics, travel preferences, and interaction history **Prediction Logic:** - Probability ≥ 45% → Customer likely to purchase - Probability < 45% → Customer unlikely to purchase **Key Factors Considered:** - Customer demographics (age, income, occupation) - Travel behavior (past trips, preferences) - Sales interaction (pitch satisfaction, follow-ups) - Family situation (marital status, children) The model has been trained to identify patterns that indicate purchase likelihood based on historical customer data. """) # Export functionality st.markdown("---") st.subheader("📥 Export Prediction Report") report_col1, report_col2 = st.columns([2, 1]) with report_col1: st.info("💾 Download a detailed report with all customer information and prediction results") with report_col2: # Create comprehensive report report_data = { 'Timestamp': [st.session_state.timestamp], 'Prediction': ['Will Purchase' if prediction == 1 else 'Will Not Purchase'], 'Purchase_Probability': [f"{probability*100:.2f}%"], 'Confidence_Level': ['High' if abs(probability - 0.5) > 0.2 else 'Medium'], **st.session_state.features } report_df = pd.DataFrame(report_data) csv = report_df.to_csv(index=False) st.download_button( label="📄 Download CSV Report", data=csv, file_name=f"customer_prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv", mime="text/csv", use_container_width=True ) # Footer st.markdown("---") st.markdown("""

🏢 Visit with Us - Wellness Tourism Package Prediction System

Model Version: v1.0 | Last Updated: December 2024

""", unsafe_allow_html=True) # Sidebar footer with statistics with st.sidebar: st.markdown("---") st.info(""" **ℹ️ About This Application** This ML-powered system predicts whether a customer will purchase the Wellness Tourism Package based on their profile and interaction history. **📊 Model Statistics:** - **Training Data**: 4,128 customers - **Purchase Rate**: 19.3% - **Algorithm**: XGBoost Classifier - **Threshold**: 45% - **Features**: 18 variables **🎯 How to Use:** 1. Enter customer details in the form 2. Click 'Predict Purchase Likelihood' 3. Review prediction and recommendations 4. Download detailed report (optional) **📈 Prediction Accuracy:** The model considers demographics, travel preferences, and sales interaction history to make accurate predictions. """) st.warning(""" **⚠️ Important Notes:** - Ensure all fields are filled accurately - Income should be in Indian Rupees (₹) - Follow-ups range from 1-6 - Pitch duration in minutes (5-127) """) if __name__ == "__main__": main()