TheHumanAgent's picture
Upload folder using huggingface_hub
52509c8 verified
"""
Wellness Tourism Package Prediction App
Production-grade Streamlit application for predicting customer purchase likelihood
"""
import streamlit as st
import pandas as pd
import numpy as np
import joblib
from huggingface_hub import hf_hub_download
import plotly.graph_objects as go
import plotly.express as px
from datetime import datetime
import os
# Page configuration
st.set_page_config(
page_title="Wellness Tourism Predictor",
page_icon="✈️",
layout="wide",
initial_sidebar_state="expanded"
)
# Custom CSS for better UI
st.markdown("""
<style>
.main-header {
font-size: 2.5rem;
color: #1f77b4;
text-align: center;
margin-bottom: 2rem;
}
.sub-header {
font-size: 1.2rem;
color: #555;
text-align: center;
margin-bottom: 2rem;
}
.prediction-box {
padding: 2rem;
border-radius: 10px;
text-align: center;
font-size: 1.5rem;
font-weight: bold;
margin: 2rem 0;
}
.prediction-positive {
background-color: #d4edda;
color: #155724;
border: 2px solid #c3e6cb;
}
.prediction-negative {
background-color: #f8d7da;
color: #721c24;
border: 2px solid #f5c6cb;
}
.metric-card {
background-color: #f0f2f6;
padding: 1rem;
border-radius: 5px;
margin: 0.5rem 0;
}
.stDownloadButton button {
width: 100%;
}
</style>
""", unsafe_allow_html=True)
@st.cache_resource
def load_model():
"""
Load the trained model from Hugging Face Hub
Uses caching to avoid reloading on every interaction
"""
try:
model_path = hf_hub_download(
repo_id="TheHumanAgent/tour_pkg_pred_model",
filename="final_tour_pkg_pred_model_v1.joblib",
repo_type="model"
)
model = joblib.load(model_path)
return model
except Exception as e:
st.error(f"Error loading model: {str(e)}")
st.error("Please ensure the model is uploaded to Hugging Face Hub")
st.stop()
def create_input_features():
"""
Create input form for all features required by the model
Returns a dictionary with user inputs based on actual data ranges
"""
st.sidebar.header("📋 Customer Information")
# Initialize session state for form
if 'prediction_made' not in st.session_state:
st.session_state.prediction_made = False
with st.sidebar:
st.subheader("👤 Personal Details")
# Age: Range from 18-61 based on data
age = st.slider("Age",
min_value=18,
max_value=61,
value=36, # median
help="Customer's age (18-61 years)")
# Gender: Male, Female, Fe Male (as seen in data)
gender = st.selectbox("Gender",
["Female", "Male", "Fe Male"],
help="Customer's gender")
# MaritalStatus: Single, Married, Divorced, Unmarried
marital_status = st.selectbox("Marital Status",
["Single", "Divorced", "Married", "Unmarried"],
help="Customer's marital status")
# CityTier: 1, 2, 3
city_tier = st.selectbox("City Tier",
[1, 2, 3],
index=0, # median is 1
help="City development level (1=Most developed, 3=Least developed)")
st.markdown("---")
st.subheader("💼 Professional Details")
# Occupation: Salaried, Small Business, Large Business, Free Lancer
occupation = st.selectbox("Occupation",
["Salaried", "Free Lancer", "Small Business", "Large Business"],
help="Customer's occupation type")
# Designation: Executive, Manager, Senior Manager, AVP, VP
designation = st.selectbox("Designation",
["Manager", "Executive", "Senior Manager", "AVP", "VP"],
help="Customer's job designation")
# MonthlyIncome: Range from 1000 to 98678
monthly_income = st.number_input("Monthly Income (₹)",
min_value=1000,
max_value=100000,
value=22418, # median
step=1000,
help="Gross monthly income in Rupees (₹1,000 - ₹98,678)")
st.markdown("---")
st.subheader("✈️ Travel Preferences")
# NumberOfTrips: Range from 1-22
num_trips = st.slider("Number of Trips (Annually)",
min_value=1,
max_value=22,
value=3, # median
help="Average annual trips taken (1-22)")
# Passport: 0 or 1
passport = st.selectbox("Valid Passport",
[0, 1],
format_func=lambda x: "Yes" if x == 1 else "No",
index=0, # median is 0
help="Does customer have a valid passport?")
# OwnCar: 0 or 1
own_car = st.selectbox("Own Car",
[0, 1],
format_func=lambda x: "Yes" if x == 1 else "No",
index=1, # median is 1
help="Does customer own a car?")
# PreferredPropertyStar: 3, 4, 5
preferred_property_star = st.selectbox("Preferred Hotel Rating",
[3, 4, 5],
index=0, # median is 3
help="Preferred hotel star rating (3-5 stars)")
st.markdown("---")
st.subheader("👨‍👩‍👧‍👦 Trip Details")
# NumberOfPersonVisiting: Range from 1-5
num_persons = st.slider("Number of Persons Visiting",
min_value=1,
max_value=5,
value=3, # median
help="Total people in the group (1-5)")
# NumberOfChildrenVisiting: Range from 0-3
num_children = st.slider("Number of Children (<5 years)",
min_value=0,
max_value=3,
value=1, # median
help="Number of children under 5 years (0-3)")
st.markdown("---")
st.subheader("📞 Interaction Details")
# TypeofContact: Company Invited, Self Enquiry
type_of_contact = st.selectbox("Type of Contact",
["Self Enquiry", "Company Invited"],
help="How was the customer contacted?")
# ProductPitched: Basic, Standard, Deluxe, Super Deluxe, King
product_pitched = st.selectbox("Product Pitched",
["Deluxe", "Basic", "Standard", "Super Deluxe", "King"],
help="Type of package pitched to the customer")
# DurationOfPitch: Range from 5-127 minutes
duration_of_pitch = st.slider("Duration of Pitch (minutes)",
min_value=5,
max_value=127,
value=14, # median
help="Sales pitch duration in minutes (5-127)")
# NumberOfFollowups: Range from 1-6
num_followups = st.slider("Number of Follow-ups",
min_value=1,
max_value=6,
value=4, # median
help="Total follow-ups after initial pitch (1-6)")
# PitchSatisfactionScore: Range from 1-5
pitch_satisfaction = st.slider("Pitch Satisfaction Score",
min_value=1,
max_value=5,
value=3, # median
help="Customer satisfaction with the pitch (1=Very Low, 5=Very High)")
# Create feature dictionary matching exact column names from training data
features = {
'Age': age,
'CityTier': city_tier,
'DurationOfPitch': duration_of_pitch,
'NumberOfPersonVisiting': num_persons,
'NumberOfFollowups': num_followups,
'PreferredPropertyStar': preferred_property_star,
'NumberOfTrips': num_trips,
'Passport': passport,
'PitchSatisfactionScore': pitch_satisfaction,
'NumberOfChildrenVisiting': num_children,
'MonthlyIncome': monthly_income,
'TypeofContact': type_of_contact,
'Occupation': occupation,
'Gender': gender,
'OwnCar': own_car,
'ProductPitched': product_pitched,
'MaritalStatus': marital_status,
'Designation': designation
}
return features
def create_gauge_chart(probability):
"""
Create a gauge chart to visualize purchase probability
"""
fig = go.Figure(go.Indicator(
mode = "gauge+number+delta",
value = probability * 100,
domain = {'x': [0, 1], 'y': [0, 1]},
title = {'text': "Purchase Probability (%)", 'font': {'size': 24}},
delta = {'reference': 45, 'increasing': {'color': "green"}},
gauge = {
'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"},
'bar': {'color': "darkblue"},
'bgcolor': "white",
'borderwidth': 2,
'bordercolor': "gray",
'steps': [
{'range': [0, 30], 'color': '#ffcccc'},
{'range': [30, 70], 'color': '#ffffcc'},
{'range': [70, 100], 'color': '#ccffcc'}
],
'threshold': {
'line': {'color': "red", 'width': 4},
'thickness': 0.75,
'value': 45
}
}
))
fig.update_layout(
height=300,
margin=dict(l=20, r=20, t=50, b=20)
)
return fig
def create_feature_importance_chart(features_df):
"""
Create a bar chart showing key customer metrics
"""
# Select key features for visualization
key_features = {
'Monthly Income (₹K)': features_df['MonthlyIncome'].values[0] / 1000,
'Age': features_df['Age'].values[0],
'Annual Trips': features_df['NumberOfTrips'].values[0],
'Pitch Duration (min)': features_df['DurationOfPitch'].values[0],
'Follow-ups': features_df['NumberOfFollowups'].values[0],
'Satisfaction': features_df['PitchSatisfactionScore'].values[0],
'Hotel Rating': features_df['PreferredPropertyStar'].values[0],
'Group Size': features_df['NumberOfPersonVisiting'].values[0]
}
fig = px.bar(
x=list(key_features.values()),
y=list(key_features.keys()),
orientation='h',
title='Key Customer Metrics Overview',
labels={'x': 'Value', 'y': 'Feature'},
color=list(key_features.values()),
color_continuous_scale='Blues'
)
fig.update_layout(
height=400,
showlegend=False,
margin=dict(l=20, r=20, t=50, b=20)
)
return fig
def get_recommendation(probability, features):
"""
Generate actionable recommendations based on prediction and customer profile
"""
recommendations = []
# Priority level based on probability
if probability >= 0.7:
recommendations.append("✅ **HIGH PRIORITY LEAD** - Strong purchase likelihood")
recommendations.append("🎯 **Action**: Schedule immediate follow-up call within 24 hours")
recommendations.append("💎 **Strategy**: Offer premium package options and exclusive benefits")
elif probability >= 0.45:
recommendations.append("⚠️ **MEDIUM PRIORITY LEAD** - Moderate purchase likelihood")
recommendations.append("📧 **Action**: Send personalized email highlighting package benefits")
recommendations.append("🎁 **Strategy**: Consider offering limited-time discount (5-10%)")
else:
recommendations.append("❌ **LOW PRIORITY LEAD** - Lower purchase likelihood")
recommendations.append("📬 **Action**: Add to nurture email campaign")
recommendations.append("🔄 **Strategy**: Re-engage after 2-3 months with seasonal offers")
recommendations.append("") # Spacing
# Additional contextual recommendations based on specific features
if features['NumberOfFollowups'] <= 2:
recommendations.append("📌 **Insight**: Low follow-up count - Increase engagement frequency")
if features['PitchSatisfactionScore'] <= 2:
recommendations.append("⚠️ **Alert**: Low satisfaction score - Review and improve pitch approach")
elif features['PitchSatisfactionScore'] >= 4:
recommendations.append("⭐ **Positive**: High satisfaction - Customer is engaged, act quickly!")
if features['MonthlyIncome'] >= 30000:
recommendations.append("💰 **Insight**: High-income customer - Emphasize luxury and premium features")
if features['NumberOfTrips'] >= 5:
recommendations.append("✈️ **Insight**: Frequent traveler - Highlight loyalty benefits and travel perks")
if features['Passport'] == 0:
recommendations.append("🛂 **Note**: No passport - Consider domestic package options")
if features['NumberOfChildrenVisiting'] >= 2:
recommendations.append("👨‍👩‍👧‍👦 **Insight**: Family with children - Emphasize family-friendly amenities")
if features['DurationOfPitch'] < 10:
recommendations.append("⏱️ **Note**: Short pitch duration - May need more detailed product information")
return recommendations
def display_customer_summary(features):
"""
Display a formatted summary of customer information
"""
col1, col2, col3, col4 = st.columns(4)
with col1:
st.metric("👤 Age", f"{features['Age']} years")
st.metric("🏙️ City Tier", f"Tier {features['CityTier']}")
with col2:
st.metric("💰 Income", f"₹{features['MonthlyIncome']:,}")
st.metric("✈️ Annual Trips", features['NumberOfTrips'])
with col3:
st.metric("📞 Follow-ups", features['NumberOfFollowups'])
st.metric("⭐ Satisfaction", f"{features['PitchSatisfactionScore']}/5")
with col4:
st.metric("👥 Group Size", features['NumberOfPersonVisiting'])
st.metric("🏨 Hotel Pref", f"{features['PreferredPropertyStar']} Star")
def main():
"""
Main application function
"""
# Header
st.markdown('<p class="main-header">✈️ Wellness Tourism Package Predictor</p>',
unsafe_allow_html=True)
st.markdown('<p class="sub-header">AI-Powered Customer Purchase Prediction System</p>',
unsafe_allow_html=True)
# Load model
with st.spinner("🔄 Loading ML model..."):
model = load_model()
st.success("✅ Model loaded successfully!")
# Create input form
features = create_input_features()
# Main content area
st.markdown("---")
st.subheader("📊 Customer Profile Summary")
display_customer_summary(features)
# Show detailed information in expandable section
with st.expander("📋 View Complete Customer Details"):
df_display = pd.DataFrame([features]).T
df_display.columns = ['Value']
st.dataframe(df_display, use_container_width=True, height=600)
st.markdown("---")
# Prediction section
col_left, col_right = st.columns([2, 1])
with col_right:
st.subheader("🎯 Make Prediction")
predict_button = st.button("🔮 Predict Purchase Likelihood",
type="primary",
use_container_width=True)
if st.button("🔄 Reset", use_container_width=True):
st.session_state.prediction_made = False
st.rerun()
with col_left:
if predict_button:
with st.spinner("🤖 Analyzing customer data..."):
# Create DataFrame with exact feature order
input_df = pd.DataFrame([features])
# Make prediction
try:
prediction_proba = model.predict_proba(input_df)[0, 1]
prediction = 1 if prediction_proba >= 0.45 else 0
# Store in session state
st.session_state.prediction_made = True
st.session_state.prediction = prediction
st.session_state.probability = prediction_proba
st.session_state.features = features
st.session_state.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
except Exception as e:
st.error(f"❌ Prediction Error: {str(e)}")
st.error("Please check that all input values are valid.")
st.stop()
# Display prediction results
if st.session_state.prediction_made:
st.markdown("---")
st.subheader("📈 Prediction Results")
prediction = st.session_state.prediction
probability = st.session_state.probability
# Prediction box with color coding
if prediction == 1:
st.markdown(f"""
<div class="prediction-box prediction-positive">
✅ LIKELY TO PURCHASE<br>
<span style="font-size: 2rem;">{probability*100:.1f}%</span><br>
Confidence Level
</div>
""", unsafe_allow_html=True)
else:
st.markdown(f"""
<div class="prediction-box prediction-negative">
❌ UNLIKELY TO PURCHASE<br>
<span style="font-size: 2rem;">{(1-probability)*100:.1f}%</span><br>
Confidence Level (Not Buying)
</div>
""", unsafe_allow_html=True)
# Visualization section
st.markdown("---")
st.subheader("📊 Visual Analysis")
viz_col1, viz_col2 = st.columns([1, 1])
with viz_col1:
st.plotly_chart(create_gauge_chart(probability),
use_container_width=True)
with viz_col2:
input_df = pd.DataFrame([st.session_state.features])
st.plotly_chart(create_feature_importance_chart(input_df),
use_container_width=True)
# Recommendations section
st.markdown("---")
st.subheader("💡 Actionable Recommendations")
recommendations = get_recommendation(probability, st.session_state.features)
for rec in recommendations:
if rec: # Skip empty strings
st.markdown(f"{rec}")
# Model explanation
with st.expander("🤔 How does the model work?"):
st.markdown("""
**Model Details:**
- **Algorithm**: XGBoost (Extreme Gradient Boosting)
- **Classification Threshold**: 45%
- **Training Data**: 4,128 customer records
- **Features**: 18 input variables including demographics, travel preferences, and interaction history
**Prediction Logic:**
- Probability ≥ 45% → Customer likely to purchase
- Probability < 45% → Customer unlikely to purchase
**Key Factors Considered:**
- Customer demographics (age, income, occupation)
- Travel behavior (past trips, preferences)
- Sales interaction (pitch satisfaction, follow-ups)
- Family situation (marital status, children)
The model has been trained to identify patterns that indicate purchase likelihood based on historical customer data.
""")
# Export functionality
st.markdown("---")
st.subheader("📥 Export Prediction Report")
report_col1, report_col2 = st.columns([2, 1])
with report_col1:
st.info("💾 Download a detailed report with all customer information and prediction results")
with report_col2:
# Create comprehensive report
report_data = {
'Timestamp': [st.session_state.timestamp],
'Prediction': ['Will Purchase' if prediction == 1 else 'Will Not Purchase'],
'Purchase_Probability': [f"{probability*100:.2f}%"],
'Confidence_Level': ['High' if abs(probability - 0.5) > 0.2 else 'Medium'],
**st.session_state.features
}
report_df = pd.DataFrame(report_data)
csv = report_df.to_csv(index=False)
st.download_button(
label="📄 Download CSV Report",
data=csv,
file_name=f"customer_prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
mime="text/csv",
use_container_width=True
)
# Footer
st.markdown("---")
st.markdown("""
<div style='text-align: center; color: #888; padding: 1rem;'>
<p><b>🏢 Visit with Us</b> - Wellness Tourism Package Prediction System</p>
<p>Powered by XGBoost ML Model | Classification Threshold: 45% | Trained on 4,128 customers</p>
<p style='font-size: 0.85rem;'>Model Version: v1.0 | Last Updated: December 2024</p>
</div>
""", unsafe_allow_html=True)
# Sidebar footer with statistics
with st.sidebar:
st.markdown("---")
st.info("""
**ℹ️ About This Application**
This ML-powered system predicts whether a customer will purchase
the Wellness Tourism Package based on their profile and interaction history.
**📊 Model Statistics:**
- **Training Data**: 4,128 customers
- **Purchase Rate**: 19.3%
- **Algorithm**: XGBoost Classifier
- **Threshold**: 45%
- **Features**: 18 variables
**🎯 How to Use:**
1. Enter customer details in the form
2. Click 'Predict Purchase Likelihood'
3. Review prediction and recommendations
4. Download detailed report (optional)
**📈 Prediction Accuracy:**
The model considers demographics, travel preferences,
and sales interaction history to make accurate predictions.
""")
st.warning("""
**⚠️ Important Notes:**
- Ensure all fields are filled accurately
- Income should be in Indian Rupees (₹)
- Follow-ups range from 1-6
- Pitch duration in minutes (5-127)
""")
if __name__ == "__main__":
main()