File size: 4,968 Bytes
e38bc03
 
 
 
 
 
 
 
 
 
d55626a
 
e38bc03
 
 
 
d55626a
 
e38bc03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7a8a95
e38bc03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d55626a
 
e38bc03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import pandas as pd
import numpy as np
import joblib
from huggingface_hub import hf_hub_download


# 1. Load Model from Hugging Face Model Hub
# ============================================================

MODEL_REPO_ID = "Quantum9999/Tourism-Package-Prediction-Model" # Corrected to the new model repo ID
DATASET_REPO_ID = "Quantum9999/Tourism-Package-Prediction" # Repo where preprocessor is stored
MODEL_FILENAME = "xgb_model.pkl"

@st.cache_resource
def load_model():
    # Download the model from the MODEL_REPO_ID with repo_type="model"
    model_path = hf_hub_download(repo_id=MODEL_REPO_ID, filename=MODEL_FILENAME, repo_type="model")
    model = joblib.load(model_path)
    return model

model = load_model()


# 2. Streamlit UI
# ============================================================

st.title(" Wellness Tourism Package Purchase Prediction")
st.write("Fill in the customer details below to predict whether they will purchase the new Wellness Tourism Package.")

st.markdown("---")

# 3. User Inputs
# ============================================================

def user_input_form():
    Age = st.number_input("Age", min_value=1, max_value=100, value=30)
    CityTier = st.selectbox("City Tier", [1, 2, 3])
    NumberOfPersonVisiting = st.number_input("Number of People Visiting", min_value=1, max_value=10, value=1)
    PreferredPropertyStar = st.selectbox("Preferred Property Star", [3, 4, 5])
    NumberOfTrips = st.number_input("Number of Trips per Year", min_value=0, max_value=20, value=2)
    NumberOfChildrenVisiting = st.number_input("Children Visiting (Under 5 Years)", min_value=0, max_value=5, value=0)
    MonthlyIncome = st.number_input("Monthly Income", min_value=1000, max_value=1000000, value=30000)
    PitchSatisfactionScore = st.selectbox("Pitch Satisfaction Score", [1, 2, 3, 4, 5])
    NumberOfFollowups = st.number_input("Number of Follow-ups", min_value=1, max_value=10, value=2)
    DurationOfPitch = st.number_input("Duration of Pitch (Minutes)", min_value=1, max_value=60, value=15)

    TypeofContact = st.selectbox("Type of Contact", ["Company Invited", "Self Enquiry"])
    Occupation = st.selectbox("Occupation", ["Salaried", "Self Employed", "Small Business", "Large Business", "Free Lancer"])
    Gender = st.selectbox("Gender", ["Male", "Female"])
    MaritalStatus = st.selectbox("Marital Status", ["Married", "Single", "Divorced"])
    Passport = st.selectbox("Passport", [0, 1])
    OwnCar = st.selectbox("Owns Car?", [0, 1])
    Designation = st.selectbox("Designation", ["Junior", "Senior", "Manager", "Executive", "Other"])
    ProductPitched = st.selectbox("Product Pitched", ["Basic", "Standard", "Deluxe", "Super Deluxe", "King"])

    # Create DataFrame
    data = pd.DataFrame({
        "Age": [Age],
        "CityTier": [CityTier],
        "NumberOfPersonVisiting": [NumberOfPersonVisiting],
        "PreferredPropertyStar": [PreferredPropertyStar],
        "NumberOfTrips": [NumberOfTrips],
        "NumberOfChildrenVisiting": [NumberOfChildrenVisiting],
        "MonthlyIncome": [MonthlyIncome],
        "PitchSatisfactionScore": [PitchSatisfactionScore],
        "NumberOfFollowups": [NumberOfFollowups],
        "DurationOfPitch": [DurationOfPitch],
        "TypeofContact": [TypeofContact],
        "Occupation": [Occupation],
        "Gender": [Gender],
        "MaritalStatus": [MaritalStatus],
        "Passport": [Passport],
        "OwnCar": [OwnCar],
        "Designation": [Designation],
        "ProductPitched": [ProductPitched]
    })

    return data

user_data = user_input_form()

st.markdown("---")


# 4. Preprocess User Input → MATCH Training Preprocessing
# ============================================================

# Categorical + numerical split (same as prep.py)
numerical_features = [
    'Age', 'CityTier', 'NumberOfPersonVisiting', 'PreferredPropertyStar',
    'NumberOfTrips', 'NumberOfChildrenVisiting', 'MonthlyIncome',
    'PitchSatisfactionScore', 'NumberOfFollowups', 'DurationOfPitch'
]

categorical_features = [
    'TypeofContact', 'Occupation', 'Gender', 'MaritalStatus',
    'Passport', 'OwnCar', 'Designation', 'ProductPitched'
]

# Load preprocessors (generated in prep.py) from the DATASET_REPO_ID
preprocessor_path = hf_hub_download(repo_id=DATASET_REPO_ID, filename="preprocessing_pipeline.pkl", repo_type="dataset")
preprocessor = joblib.load(preprocessor_path)

processed_user_data = preprocessor.transform(user_data)


# 5. Make Prediction
# ============================================================

if st.button("Predict"):
    prediction = model.predict(processed_user_data)[0]
    proba = model.predict_proba(processed_user_data)[0][1]

    st.subheader(" Prediction Result")

    if prediction == 1:
        st.success(f" Customer is LIKELY to purchase the Wellness Tourism Package! (Confidence: {proba:.2f})")
    else:
        st.error(f" Customer is NOT likely to purchase the package. (Confidence: {proba:.2f})")