File size: 6,478 Bytes
f75d09c
 
 
 
 
 
61ec17c
 
 
f75d09c
903663e
f75d09c
 
 
 
61ec17c
 
de700c2
 
 
 
 
d0a180b
de700c2
 
d0a180b
 
 
63f398d
 
d0a180b
 
 
 
 
 
 
 
 
 
 
 
63f398d
 
 
 
 
 
 
 
d0a180b
 
 
 
de700c2
 
 
 
 
f75d09c
 
 
 
61ec17c
 
 
f75d09c
 
 
 
61ec17c
 
f75d09c
 
 
 
 
 
 
 
 
 
 
61ec17c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f75d09c
 
61ec17c
 
 
 
 
 
 
d0a180b
f75d09c
61ec17c
 
 
f75d09c
61ec17c
 
 
f75d09c
61ec17c
 
 
 
f75d09c
 
 
61ec17c
 
 
f75d09c
61ec17c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f75d09c
 
 
 
 
61ec17c
 
 
 
f75d09c
61ec17c
 
 
 
f75d09c
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import streamlit as st
import pandas as pd
import joblib

from huggingface_hub import hf_hub_download

# -------------------------------------------------------------------
# CONFIG
# -------------------------------------------------------------------
MODEL_REPO_ID = "bhumitps/tourism_model"
MODEL_FILENAME = "best_tourism_model_v3.joblib"


@st.cache_resource
def load_model():
    """Download the model from HF Hub and load it with joblib."""
    st.write("Loading model from Hugging Face Hub...")
    try:
        model_path = hf_hub_download(
            repo_id=MODEL_REPO_ID,
            filename=MODEL_FILENAME,
            repo_type="model",
            force_download=True,  # always fetch latest v3 model
        )
        model = joblib.load(model_path)

        # ------------------------------------------------------------------
        # Patch for xgboost version mismatch:
        # some older models don't have .use_label_encoder or .gpu_id,
        # but newer xgboost expects them.
        # ------------------------------------------------------------------
        try:
            from xgboost import XGBClassifier

            # If it's a sklearn Pipeline, grab the final XGBClassifier
            xgb_clf = None
            if hasattr(model, "named_steps"):
                # our pipeline step is named "model"
                xgb_clf = model.named_steps.get("model", None)
            elif isinstance(model, XGBClassifier):
                xgb_clf = model

            if isinstance(xgb_clf, XGBClassifier):
                # attribute added/changed between versions
                if not hasattr(xgb_clf, "use_label_encoder"):
                    xgb_clf.use_label_encoder = False
                # attribute introduced in newer XGBModel
                if not hasattr(xgb_clf, "gpu_id"):
                    # -1 or 0 are both fine for CPU usage
                    xgb_clf.gpu_id = -1
        except Exception:
            # If anything goes wrong with the patch, don't break the app
            pass

        st.write("Model loaded successfully.")
        return model
    except Exception as e:
        st.error(f"Error loading model: {e}")
        raise


model = load_model()

# -------------------------------------------------------------------
# UI
# -------------------------------------------------------------------
st.title("Wellness Tourism Package Purchase Prediction")

st.write(
    """
    Predict whether a customer is likely to purchase the
    **Wellness Tourism Package**.

    Fill in the customer details below and click **Predict**.
    """
)

# --- Input fields ---
col1, col2 = st.columns(2)

with col1:
    Age = st.number_input("Age", min_value=0, max_value=100, value=35)
    CityTier = st.selectbox("CityTier", options=[1, 2, 3], index=0)
    DurationOfPitch = st.number_input(
        "DurationOfPitch (minutes)", min_value=0, max_value=300, value=15
    )
    NumberOfPersonVisiting = st.number_input(
        "NumberOfPersonVisiting", min_value=1, max_value=20, value=2
    )
    NumberOfFollowups = st.number_input(
        "NumberOfFollowups", min_value=0, max_value=20, value=2
    )
    PreferredPropertyStar = st.selectbox(
        "PreferredPropertyStar", options=[1, 2, 3, 4, 5], index=2
    )
    NumberOfTrips = st.number_input(
        "NumberOfTrips", min_value=0, max_value=50, value=1
    )
    NumberOfChildrenVisiting = st.number_input(
        "NumberOfChildrenVisiting", min_value=0, max_value=10, value=0
    )
    MonthlyIncome = st.number_input(
        "MonthlyIncome", min_value=0, max_value=1_000_000, value=50_000, step=1000
    )

with col2:
    TypeofContact = st.selectbox(
        "TypeofContact",
        options=["Self Enquiry", "Company Invited", "Other"],
    )
    Occupation = st.selectbox(
        "Occupation", options=["Salaried", "Self Employed", "Free Lancer", "Other"]
    )
    Gender = st.selectbox("Gender", options=["Male", "Female", "Other"])
    ProductPitched = st.text_input("ProductPitched (raw value)", value="Basic")
    MaritalStatus = st.selectbox(
        "MaritalStatus", options=["Married", "Single", "Divorced", "Other"]
    )
    Passport = st.selectbox("Passport", options=["No", "Yes"])
    PitchSatisfactionScore = st.selectbox(
        "PitchSatisfactionScore", options=[1, 2, 3, 4, 5], index=2
    )
    OwnCar = st.selectbox("OwnCar", options=["No", "Yes"])
    Designation = st.selectbox(
        "Designation",
        options=["Executive", "Manager", "Senior Manager", "AVP", "VP", "Other"],
    )

st.markdown("---")

# -------------------------------------------------------------------
# Prediction
# -------------------------------------------------------------------
if st.button("Predict"):
    # Map Yes/No to the numeric format used during training (0/1)
    passport_num = 1 if Passport == "Yes" else 0
    owncar_num = 1 if OwnCar == "Yes" else 0

    input_data = pd.DataFrame(
        [
            {
                "Age": Age,
                "TypeofContact": TypeofContact,
                "CityTier": CityTier,
                "DurationOfPitch": DurationOfPitch,
                "Occupation": Occupation,
                "Gender": Gender,
                "NumberOfPersonVisiting": NumberOfPersonVisiting,
                "NumberOfFollowups": NumberOfFollowups,
                "ProductPitched": ProductPitched,
                "PreferredPropertyStar": PreferredPropertyStar,
                "MaritalStatus": MaritalStatus,
                "NumberOfTrips": NumberOfTrips,
                "Passport": passport_num,  # numeric
                "PitchSatisfactionScore": PitchSatisfactionScore,
                "OwnCar": owncar_num,       # numeric
                "NumberOfChildrenVisiting": NumberOfChildrenVisiting,
                "Designation": Designation,
                "MonthlyIncome": MonthlyIncome,
            }
        ]
    )

    # Make prediction
    pred_proba = model.predict_proba(input_data)[0][1]
    pred_label = model.predict(input_data)[0]

    st.subheader("Prediction Result")
    if pred_label == 1:
        st.success(
            f"Customer is **LIKELY** to purchase the Wellness Tourism Package. "
            f"(Probability: {pred_proba:.2%})"
        )
    else:
        st.info(
            f"Customer is **UNLIKELY** to purchase the Wellness Tourism Package. "
            f"(Probability: {pred_proba:.2%})"
        )

    st.caption("Note: probabilities are model-based estimates and not guarantees.")