File size: 5,577 Bytes
8d8e847
 
 
 
 
 
 
 
 
098cb98
8d8e847
 
 
 
 
a901e39
8d8e847
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
098cb98
8d8e847
 
 
 
 
098cb98
8d8e847
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import streamlit as st
import pandas as pd
from huggingface_hub import hf_hub_download
import joblib
import os

# --- Mappings for Label Encoding (based on alphabetical sort of unique values from original data) ---
TYPEOFCONTACT_MAP = {'Company Invited': 0, 'Self Inquiry': 1}
OCCUPATION_MAP = {'Free Lancer': 0, 'Large Business': 1, 'Salaried': 2, 'Small Business': 3}
GENDER_MAP = {'Female': 0, 'Male': 1} # Corrected mapping based on unique values in cleaned data
PRODUCTPITCHED_MAP = {'Basic': 0, 'Deluxe': 1, 'King': 2, 'Standard': 3, 'Super Deluxe': 4}
MARITALSTATUS_MAP = {'Divorced': 0, 'Married': 1, 'Single': 2}
DESIGNATION_MAP = {'AVP': 0, 'Executive': 1, 'Manager': 2, 'Senior Manager': 3, 'VP': 4}

# Download and load the model
MODEL_REPO_ID = "deepakpathania/tourism-xgboost-model"
MODEL_FILENAME = "xgboost_model/best_tourism_model_v1.joblib"

try:
    model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME)
    model = joblib.load(model_path)
except Exception as e:
    st.error(f"Error loading model from Hugging Face Hub: {e}")
    st.stop()

# Streamlit UI for Wellness Tourism Package Purchase Prediction
st.title("Wellness Tourism Package Purchase Prediction")
st.write("""
This application predicts whether a customer will purchase the newly introduced Wellness Tourism Package.
Fill in the customer details below to get a prediction.
""")

# User input fields
st.header("Customer Details")

age = st.slider("Age", min_value=18, max_value=80, value=35)
type_of_contact = st.selectbox("Type of Contact", list(TYPEOFCONTACT_MAP.keys()))
city_tier = st.selectbox("City Tier", [1, 2, 3])
duration_of_pitch = st.slider("Duration of Pitch (minutes)", min_value=5, max_value=100, value=15)
occupation = st.selectbox("Occupation", list(OCCUPATION_MAP.keys()))
gender = st.selectbox("Gender", list(GENDER_MAP.keys()))
number_of_person_visiting = st.slider("Number of Persons Visiting", min_value=1, max_value=5, value=3)
number_of_followups = st.slider("Number of Follow-ups", min_value=1, max_value=6, value=3)
product_pitched = st.selectbox("Product Pitched", list(PRODUCTPITCHED_MAP.keys()))
preferred_property_star = st.selectbox("Preferred Property Star", [3.0, 4.0, 5.0])
marital_status = st.selectbox("Marital Status", list(MARITALSTATUS_MAP.keys()))
number_of_trips = st.slider("NumberOfTrips", min_value=1, max_value=25, value=3)
passport = st.selectbox("Passport Holder?", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No")
pitch_satisfaction_score = st.slider("Pitch Satisfaction Score", min_value=1, max_value=5, value=3)
own_car = st.selectbox("Owns Car?", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No")
number_of_children_visiting = st.slider("Number of Children Visiting (under 5)", min_value=0, max_value=3, value=1)
designation = st.selectbox("Designation", list(DESIGNATION_MAP.keys()))
monthly_income = st.number_input("Monthly Income", min_value=1000.0, max_value=100000.0, value=25000.0, step=100.0)

# Prepare input data for the model (matching Xtrain structure after LabelEncoding)
if st.button("Predict Purchase"):
    # Convert categorical inputs to numerical using defined mappings
    encoded_type_of_contact = TYPEOFCONTACT_MAP[type_of_contact]
    encoded_occupation = OCCUPATION_MAP[occupation]
    encoded_gender = GENDER_MAP[gender]
    encoded_product_pitched = PRODUCTPITCHED_MAP[product_pitched]
    encoded_marital_status = MARITALSTATUS_MAP[marital_status]
    encoded_designation = DESIGNATION_MAP[designation]

    # Create a DataFrame with the same column order as Xtrain
    input_data = pd.DataFrame([{
        'Age': age,
        'TypeofContact': encoded_type_of_contact,
        'CityTier': city_tier,
        'DurationOfPitch': duration_of_pitch,
        'Occupation': encoded_occupation,
        'Gender': encoded_gender,
        'NumberOfPersonVisiting': number_of_person_visiting,
        'NumberOfFollowups': number_of_followups,
        'ProductPitched': encoded_product_pitched,
        'PreferredPropertyStar': preferred_property_star,
        'MaritalStatus': encoded_marital_status,
        'NumberOfTrips': number_of_trips,
        'Passport': passport,
        'PitchSatisfactionScore': pitch_satisfaction_score,
        'OwnCar': own_car,
        'NumberOfChildrenVisiting': number_of_children_visiting,
        'Designation': encoded_designation,
        'MonthlyIncome': monthly_income
    }])

    # Ensure column order matches Xtrain used during training
    # This list should match the column order in Xtrain exactly.
    # Using a predefined list as inference from Xtrain.columns from kernel state is reliable.
    column_order = [
        'Age', 'TypeofContact', 'CityTier', 'DurationOfPitch', 'Occupation', 'Gender',
        'NumberOfPersonVisiting', 'NumberOfFollowups', 'ProductPitched',
        'PreferredPropertyStar', 'MaritalStatus', 'NumberOfTrips', 'Passport',
        'PitchSatisfactionScore', 'OwnCar', 'NumberOfChildrenVisiting', 'Designation',
        'MonthlyIncome'
    ]
    input_data = input_data[column_order]

    prediction = model.predict(input_data)[0]
    prediction_proba = model.predict_proba(input_data)[:, 1][0]

    st.subheader("Prediction Result:")
    if prediction == 1:
        st.success(f"The model predicts: **Customer WILL purchase the Wellness Tourism Package!** (Probability: {prediction_proba:.2f})")
    else:
        st.info(f"The model predicts: **Customer will NOT purchase the Wellness Tourism Package.** (Probability: {prediction_proba:.2f})")

    st.write("Note: The model's classification threshold is 0.45.")