Spaces:

TheHumanAgent
/

tour-pkg-predictor-app

Sleeping

App Files Files Community

TheHumanAgent commited on Dec 25, 2025

Commit

564fa86

verified ·

1 Parent(s): 19e46f5

Upload folder using huggingface_hub

Browse files

Files changed (3) hide show

Dockerfile +23 -0
app.py +609 -0
requirements.txt +22 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,23 @@

+# Use a minimal base image with Python 3.9 installed
+FROM python:3.9
+# Set the working directory inside the container to /app
+WORKDIR /app
+# Copy all files from the current directory on the host to the container's /app directory
+COPY . .
+# Install Python dependencies listed in requirements.txt
+RUN pip3 install -r requirements.txt
+RUN useradd -m -u 1000 user
+USER user
+ENV HOME=/home/user \
+	PATH=/home/user/.local/bin:$PATH
+WORKDIR $HOME/app
+COPY --chown=user . $HOME/app
+# Define the command to run the Streamlit app on port "8501" and make it accessible externally
+CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]

app.py ADDED Viewed

	@@ -0,0 +1,609 @@

+"""
+Wellness Tourism Package Prediction App
+Production-grade Streamlit application for predicting customer purchase likelihood
+"""
+import streamlit as st
+import pandas as pd
+import numpy as np
+import joblib
+from huggingface_hub import hf_hub_download
+import plotly.graph_objects as go
+import plotly.express as px
+from datetime import datetime
+import os
+# Page configuration
+st.set_page_config(
+    page_title="Wellness Tourism Predictor",
+    page_icon="✈️",
+    layout="wide",
+    initial_sidebar_state="expanded"
+)
+# Custom CSS for better UI
+st.markdown("""
+    <style>
+    .main-header {
+        font-size: 2.5rem;
+        color: #1f77b4;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .sub-header {
+        font-size: 1.2rem;
+        color: #555;
+        text-align: center;
+        margin-bottom: 2rem;
+    }
+    .prediction-box {
+        padding: 2rem;
+        border-radius: 10px;
+        text-align: center;
+        font-size: 1.5rem;
+        font-weight: bold;
+        margin: 2rem 0;
+    }
+    .prediction-positive {
+        background-color: #d4edda;
+        color: #155724;
+        border: 2px solid #c3e6cb;
+    }
+    .prediction-negative {
+        background-color: #f8d7da;
+        color: #721c24;
+        border: 2px solid #f5c6cb;
+    }
+    .metric-card {
+        background-color: #f0f2f6;
+        padding: 1rem;
+        border-radius: 5px;
+        margin: 0.5rem 0;
+    }
+    .stDownloadButton button {
+        width: 100%;
+    }
+    </style>
+""", unsafe_allow_html=True)
+@st.cache_resource
+def load_model():
+    """
+    Load the trained model from Hugging Face Hub
+    Uses caching to avoid reloading on every interaction
+    """
+    try:
+        model_path = hf_hub_download(
+            repo_id="TheHumanAgent/tour_pkg_pred_model",
+            filename="final_tour_pkg_pred_model_v1.joblib",
+            repo_type="model"
+        )
+        model = joblib.load(model_path)
+        return model
+    except Exception as e:
+        st.error(f"Error loading model: {str(e)}")
+        st.error("Please ensure the model is uploaded to Hugging Face Hub")
+        st.stop()
+def create_input_features():
+    """
+    Create input form for all features required by the model
+    Returns a dictionary with user inputs based on actual data ranges
+    """
+    st.sidebar.header("📋 Customer Information")
+    # Initialize session state for form
+    if 'prediction_made' not in st.session_state:
+        st.session_state.prediction_made = False
+    with st.sidebar:
+        st.subheader("👤 Personal Details")
+        # Age: Range from 18-61 based on data
+        age = st.slider("Age",
+                       min_value=18,
+                       max_value=61,
+                       value=36,  # median
+                       help="Customer's age (18-61 years)")
+        # Gender: Male, Female, Fe Male (as seen in data)
+        gender = st.selectbox("Gender",
+                             ["Female" "Male" "Fe Male"],
+                             help="Customer's gender")
+        # MaritalStatus: Single, Married, Divorced, Unmarried
+        marital_status = st.selectbox("Marital Status",
+                                     ["Single" "Divorced" "Married" "Unmarried"],
+                                     help="Customer's marital status")
+        # CityTier: 1, 2, 3
+        city_tier = st.selectbox("City Tier",
+                                [1, 2, 3],
+                                index=0,  # median is 1
+                                help="City development level (1=Most developed, 3=Least developed)")
+        st.markdown("---")
+        st.subheader("💼 Professional Details")
+        # Occupation: Salaried, Small Business, Large Business, Free Lancer
+        occupation = st.selectbox("Occupation",
+                                 ["Salaried" "Free Lancer" "Small Business" "Large Business"],
+                                 help="Customer's occupation type")
+        # Designation: Executive, Manager, Senior Manager, AVP, VP
+        designation = st.selectbox("Designation",
+                                  ["Manager" "Executive" "Senior Manager" "AVP" "VP"],
+                                  help="Customer's job designation")
+        # MonthlyIncome: Range from 1000 to 98678
+        monthly_income = st.number_input("Monthly Income (₹)",
+                                        min_value=1000,
+                                        max_value=100000,
+                                        value=22418,  # median
+                                        step=1000,
+                                        help="Gross monthly income in Rupees (₹1,000 - ₹98,678)")
+        st.markdown("---")
+        st.subheader("✈️ Travel Preferences")
+        # NumberOfTrips: Range from 1-22
+        num_trips = st.slider("Number of Trips (Annually)",
+                             min_value=1,
+                             max_value=22,
+                             value=3,  # median
+                             help="Average annual trips taken (1-22)")
+        # Passport: 0 or 1
+        passport = st.selectbox("Valid Passport",
+                               [0, 1],
+                               format_func=lambda x: "Yes" if x == 1 else "No",
+                               index=0,  # median is 0
+                               help="Does customer have a valid passport?")
+        # OwnCar: 0 or 1
+        own_car = st.selectbox("Own Car",
+                              [0, 1],
+                              format_func=lambda x: "Yes" if x == 1 else "No",
+                              index=1,  # median is 1
+                              help="Does customer own a car?")
+        # PreferredPropertyStar: 3, 4, 5
+        preferred_property_star = st.selectbox("Preferred Hotel Rating",
+                                              [3, 4, 5],
+                                              index=0,  # median is 3
+                                              help="Preferred hotel star rating (3-5 stars)")
+        st.markdown("---")
+        st.subheader("👨‍👩‍👧‍👦 Trip Details")
+        # NumberOfPersonVisiting: Range from 1-5
+        num_persons = st.slider("Number of Persons Visiting",
+                               min_value=1,
+                               max_value=5,
+                               value=3,  # median
+                               help="Total people in the group (1-5)")
+        # NumberOfChildrenVisiting: Range from 0-3
+        num_children = st.slider("Number of Children (<5 years)",
+                                min_value=0,
+                                max_value=3,
+                                value=1,  # median
+                                help="Number of children under 5 years (0-3)")
+        st.markdown("---")
+        st.subheader("📞 Interaction Details")
+        # TypeofContact: Company Invited, Self Enquiry
+        type_of_contact = st.selectbox("Type of Contact",
+                                      ["Self Enquiry" "Company Invited"],
+                                      help="How was the customer contacted?")
+        # ProductPitched: Basic, Standard, Deluxe, Super Deluxe, King
+        product_pitched = st.selectbox("Product Pitched",
+                                      ["Deluxe" "Basic" "Standard" "Super Deluxe" "King"],
+                                      help="Type of package pitched to the customer")
+        # DurationOfPitch: Range from 5-127 minutes
+        duration_of_pitch = st.slider("Duration of Pitch (minutes)",
+                                     min_value=5,
+                                     max_value=127,
+                                     value=14,  # median
+                                     help="Sales pitch duration in minutes (5-127)")
+        # NumberOfFollowups: Range from 1-6
+        num_followups = st.slider("Number of Follow-ups",
+                                 min_value=1,
+                                 max_value=6,
+                                 value=4,  # median
+                                 help="Total follow-ups after initial pitch (1-6)")
+        # PitchSatisfactionScore: Range from 1-5
+        pitch_satisfaction = st.slider("Pitch Satisfaction Score",
+                                      min_value=1,
+                                      max_value=5,
+                                      value=3,  # median
+                                      help="Customer satisfaction with the pitch (1=Very Low, 5=Very High)")
+    # Create feature dictionary matching exact column names from training data
+    features = {
+        'Age': age,
+        'CityTier': city_tier,
+        'DurationOfPitch': duration_of_pitch,
+        'NumberOfPersonVisiting': num_persons,
+        'NumberOfFollowups': num_followups,
+        'PreferredPropertyStar': preferred_property_star,
+        'NumberOfTrips': num_trips,
+        'Passport': passport,
+        'PitchSatisfactionScore': pitch_satisfaction,
+        'NumberOfChildrenVisiting': num_children,
+        'MonthlyIncome': monthly_income,
+        'TypeofContact': type_of_contact,
+        'Occupation': occupation,
+        'Gender': gender,
+        'OwnCar': own_car,
+        'ProductPitched': product_pitched,
+        'MaritalStatus': marital_status,
+        'Designation': designation
+    }
+    return features
+def create_gauge_chart(probability):
+    """
+    Create a gauge chart to visualize purchase probability
+    """
+    fig = go.Figure(go.Indicator(
+        mode = "gauge+number+delta",
+        value = probability * 100,
+        domain = {'x': [0, 1], 'y': [0, 1]},
+        title = {'text': "Purchase Probability (%)", 'font': {'size': 24}},
+        delta = {'reference': 45, 'increasing': {'color': "green"}},
+        gauge = {
+            'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"},
+            'bar': {'color': "darkblue"},
+            'bgcolor': "white",
+            'borderwidth': 2,
+            'bordercolor': "gray",
+            'steps': [
+                {'range': [0, 30], 'color': '#ffcccc'},
+                {'range': [30, 70], 'color': '#ffffcc'},
+                {'range': [70, 100], 'color': '#ccffcc'}
+            ],
+            'threshold': {
+                'line': {'color': "red", 'width': 4},
+                'thickness': 0.75,
+                'value': 45
+            }
+        }
+    ))
+    fig.update_layout(
+        height=300,
+        margin=dict(l=20, r=20, t=50, b=20)
+    )
+    return fig
+def create_feature_importance_chart(features_df):
+    """
+    Create a bar chart showing key customer metrics
+    """
+    # Select key features for visualization
+    key_features = {
+        'Monthly Income (₹K)': features_df['MonthlyIncome'].values[0] / 1000,
+        'Age': features_df['Age'].values[0],
+        'Annual Trips': features_df['NumberOfTrips'].values[0],
+        'Pitch Duration (min)': features_df['DurationOfPitch'].values[0],
+        'Follow-ups': features_df['NumberOfFollowups'].values[0],
+        'Satisfaction': features_df['PitchSatisfactionScore'].values[0],
+        'Hotel Rating': features_df['PreferredPropertyStar'].values[0],
+        'Group Size': features_df['NumberOfPersonVisiting'].values[0]
+    }
+    fig = px.bar(
+        x=list(key_features.values()),
+        y=list(key_features.keys()),
+        orientation='h',
+        title='Key Customer Metrics Overview',
+        labels={'x': 'Value', 'y': 'Feature'},
+        color=list(key_features.values()),
+        color_continuous_scale='Blues'
+    )
+    fig.update_layout(
+        height=400,
+        showlegend=False,
+        margin=dict(l=20, r=20, t=50, b=20)
+    )
+    return fig
+def get_recommendation(probability, features):
+    """
+    Generate actionable recommendations based on prediction and customer profile
+    """
+    recommendations = []
+    # Priority level based on probability
+    if probability >= 0.7:
+        recommendations.append("✅ **HIGH PRIORITY LEAD** - Strong purchase likelihood")
+        recommendations.append("🎯 **Action**: Schedule immediate follow-up call within 24 hours")
+        recommendations.append("💎 **Strategy**: Offer premium package options and exclusive benefits")
+    elif probability >= 0.45:
+        recommendations.append("⚠️ **MEDIUM PRIORITY LEAD** - Moderate purchase likelihood")
+        recommendations.append("📧 **Action**: Send personalized email highlighting package benefits")
+        recommendations.append("🎁 **Strategy**: Consider offering limited-time discount (5-10%)")
+    else:
+        recommendations.append("❌ **LOW PRIORITY LEAD** - Lower purchase likelihood")
+        recommendations.append("📬 **Action**: Add to nurture email campaign")
+        recommendations.append("🔄 **Strategy**: Re-engage after 2-3 months with seasonal offers")
+    recommendations.append("")  # Spacing
+    # Additional contextual recommendations based on specific features
+    if features['NumberOfFollowups'] <= 2:
+        recommendations.append("📌 **Insight**: Low follow-up count - Increase engagement frequency")
+    if features['PitchSatisfactionScore'] <= 2:
+        recommendations.append("⚠️ **Alert**: Low satisfaction score - Review and improve pitch approach")
+    elif features['PitchSatisfactionScore'] >= 4:
+        recommendations.append("⭐ **Positive**: High satisfaction - Customer is engaged, act quickly!")
+    if features['MonthlyIncome'] >= 30000:
+        recommendations.append("💰 **Insight**: High-income customer - Emphasize luxury and premium features")
+    if features['NumberOfTrips'] >= 5:
+        recommendations.append("✈️ **Insight**: Frequent traveler - Highlight loyalty benefits and travel perks")
+    if features['Passport'] == 0:
+        recommendations.append("🛂 **Note**: No passport - Consider domestic package options")
+    if features['NumberOfChildrenVisiting'] >= 2:
+        recommendations.append("👨‍👩‍👧‍👦 **Insight**: Family with children - Emphasize family-friendly amenities")
+    if features['DurationOfPitch'] < 10:
+        recommendations.append("⏱️ **Note**: Short pitch duration - May need more detailed product information")
+    return recommendations
+def display_customer_summary(features):
+    """
+    Display a formatted summary of customer information
+    """
+    col1, col2, col3, col4 = st.columns(4)
+    with col1:
+        st.metric("👤 Age", f"{features['Age']} years")
+        st.metric("🏙️ City Tier", f"Tier {features['CityTier']}")
+    with col2:
+        st.metric("💰 Income", f"₹{features['MonthlyIncome']:,}")
+        st.metric("✈️ Annual Trips", features['NumberOfTrips'])
+    with col3:
+        st.metric("📞 Follow-ups", features['NumberOfFollowups'])
+        st.metric("⭐ Satisfaction", f"{features['PitchSatisfactionScore']}/5")
+    with col4:
+        st.metric("👥 Group Size", features['NumberOfPersonVisiting'])
+        st.metric("🏨 Hotel Pref", f"{features['PreferredPropertyStar']} Star")
+def main():
+    """
+    Main application function
+    """
+    # Header
+    st.markdown('<p class="main-header">✈️ Wellness Tourism Package Predictor</p>',
+                unsafe_allow_html=True)
+    st.markdown('<p class="sub-header">AI-Powered Customer Purchase Prediction System</p>',
+                unsafe_allow_html=True)
+    # Load model
+    with st.spinner("🔄 Loading ML model..."):
+        model = load_model()
+    st.success("✅ Model loaded successfully!")
+    # Create input form
+    features = create_input_features()
+    # Main content area
+    st.markdown("---")
+    st.subheader("📊 Customer Profile Summary")
+    display_customer_summary(features)
+    # Show detailed information in expandable section
+    with st.expander("📋 View Complete Customer Details"):
+        df_display = pd.DataFrame([features]).T
+        df_display.columns = ['Value']
+        st.dataframe(df_display, use_container_width=True, height=600)
+    st.markdown("---")
+    # Prediction section
+    col_left, col_right = st.columns([2, 1])
+    with col_right:
+        st.subheader("🎯 Make Prediction")
+        predict_button = st.button("🔮 Predict Purchase Likelihood",
+                                   type="primary",
+                                   use_container_width=True)
+        if st.button("🔄 Reset", use_container_width=True):
+            st.session_state.prediction_made = False
+            st.rerun()
+    with col_left:
+        if predict_button:
+            with st.spinner("🤖 Analyzing customer data..."):
+                # Create DataFrame with exact feature order
+                input_df = pd.DataFrame([features])
+                # Make prediction
+                try:
+                    prediction_proba = model.predict_proba(input_df)[0, 1]
+                    prediction = 1 if prediction_proba >= 0.45 else 0
+                    # Store in session state
+                    st.session_state.prediction_made = True
+                    st.session_state.prediction = prediction
+                    st.session_state.probability = prediction_proba
+                    st.session_state.features = features
+                    st.session_state.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+                except Exception as e:
+                    st.error(f"❌ Prediction Error: {str(e)}")
+                    st.error("Please check that all input values are valid.")
+                    st.stop()
+    # Display prediction results
+    if st.session_state.prediction_made:
+        st.markdown("---")
+        st.subheader("📈 Prediction Results")
+        prediction = st.session_state.prediction
+        probability = st.session_state.probability
+        # Prediction box with color coding
+        if prediction == 1:
+            st.markdown(f"""
+                <div class="prediction-box prediction-positive">
+                    ✅ LIKELY TO PURCHASE<br>
+                    <span style="font-size: 2rem;">{probability*100:.1f}%</span><br>
+                    Confidence Level
+                </div>
+            """, unsafe_allow_html=True)
+        else:
+            st.markdown(f"""
+                <div class="prediction-box prediction-negative">
+                    ❌ UNLIKELY TO PURCHASE<br>
+                    <span style="font-size: 2rem;">{(1-probability)*100:.1f}%</span><br>
+                    Confidence Level (Not Buying)
+                </div>
+            """, unsafe_allow_html=True)
+        # Visualization section
+        st.markdown("---")
+        st.subheader("📊 Visual Analysis")
+        viz_col1, viz_col2 = st.columns([1, 1])
+        with viz_col1:
+            st.plotly_chart(create_gauge_chart(probability),
+                          use_container_width=True)
+        with viz_col2:
+            input_df = pd.DataFrame([st.session_state.features])
+            st.plotly_chart(create_feature_importance_chart(input_df),
+                          use_container_width=True)
+        # Recommendations section
+        st.markdown("---")
+        st.subheader("💡 Actionable Recommendations")
+        recommendations = get_recommendation(probability, st.session_state.features)
+        for rec in recommendations:
+            if rec:  # Skip empty strings
+                st.markdown(f"{rec}")
+        # Model explanation
+        with st.expander("🤔 How does the model work?"):
+            st.markdown("""
+            **Model Details:**
+            - **Algorithm**: XGBoost (Extreme Gradient Boosting)
+            - **Classification Threshold**: 45%
+            - **Training Data**: 4,128 customer records
+            - **Features**: 18 input variables including demographics, travel preferences, and interaction history
+            **Prediction Logic:**
+            - Probability ≥ 45% → Customer likely to purchase
+            - Probability < 45% → Customer unlikely to purchase
+            **Key Factors Considered:**
+            - Customer demographics (age, income, occupation)
+            - Travel behavior (past trips, preferences)
+            - Sales interaction (pitch satisfaction, follow-ups)
+            - Family situation (marital status, children)
+            The model has been trained to identify patterns that indicate purchase likelihood based on historical customer data.
+            """)
+        # Export functionality
+        st.markdown("---")
+        st.subheader("📥 Export Prediction Report")
+        report_col1, report_col2 = st.columns([2, 1])
+        with report_col1:
+            st.info("💾 Download a detailed report with all customer information and prediction results")
+        with report_col2:
+            # Create comprehensive report
+            report_data = {
+                'Timestamp': [st.session_state.timestamp],
+                'Prediction': ['Will Purchase' if prediction == 1 else 'Will Not Purchase'],
+                'Purchase_Probability': [f"{probability*100:.2f}%"],
+                'Confidence_Level': ['High' if abs(probability - 0.5) > 0.2 else 'Medium'],
+                **st.session_state.features
+            }
+            report_df = pd.DataFrame(report_data)
+            csv = report_df.to_csv(index=False)
+            st.download_button(
+                label="📄 Download CSV Report",
+                data=csv,
+                file_name=f"customer_prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
+                mime="text/csv",
+                use_container_width=True
+            )
+    # Footer
+    st.markdown("---")
+    st.markdown("""
+        <div style='text-align: center; color: #888; padding: 1rem;'>
+            <p><b>🏢 Visit with Us</b> - Wellness Tourism Package Prediction System</p>
+            <p>Powered by XGBoost ML Model | Classification Threshold: 45% | Trained on 4,128 customers</p>
+            <p style='font-size: 0.85rem;'>Model Version: v1.0 | Last Updated: December 2024</p>
+        </div>
+    """, unsafe_allow_html=True)
+    # Sidebar footer with statistics
+    with st.sidebar:
+        st.markdown("---")
+        st.info("""
+        **ℹ️ About This Application**
+        This ML-powered system predicts whether a customer will purchase
+        the Wellness Tourism Package based on their profile and interaction history.
+        **📊 Model Statistics:**
+        - **Training Data**: 4,128 customers
+        - **Purchase Rate**: 19.3%
+        - **Algorithm**: XGBoost Classifier
+        - **Threshold**: 45%
+        - **Features**: 18 variables
+        **🎯 How to Use:**
+        1. Enter customer details in the form
+        2. Click 'Predict Purchase Likelihood'
+        3. Review prediction and recommendations
+        4. Download detailed report (optional)
+        **📈 Prediction Accuracy:**
+        The model considers demographics, travel preferences,
+        and sales interaction history to make accurate predictions.
+        """)
+        st.warning("""
+        **⚠️ Important Notes:**
+        - Ensure all fields are filled accurately
+        - Income should be in Indian Rupees (₹)
+        - Follow-ups range from 1-6
+        - Pitch duration in minutes (5-127)
+        """)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,22 @@

+# Core dependencies
+streamlit==1.31.0
+pandas==2.1.4
+numpy==1.26.3
+# ML dependencies
+scikit-learn==1.4.0
+xgboost==2.0.3
+joblib==1.3.2
+# Hugging Face integration
+huggingface-hub==0.20.2
+# Visualization
+plotly==5.18.0
+# Excel file support (if needed for future enhancements)
+openpyxl==3.1.2
+# Additional utilities
+python-dateutil==2.8.2
+pytz==2023.3