tourism_project / app.py
Divyap90's picture
Update app.py
eac29a5 verified
import streamlit as st
import pandas as pd
import numpy as np
import mlflow
import os
import joblib
from huggingface_hub import hf_hub_download
# MUST be the VERY first Streamlit command
st.set_page_config(page_title="Wellness Tourism Package Predictor", layout="wide")
# Define the Hugging Face Model Repository ID
HF_MODEL_REPO = "Divyap90/tourism_project_model"
filename = "model.joblib"
@st.cache_resource
def load_model():
"""Loads the MLflow model from Hugging Face Hub."""
try:
model_path = hf_hub_download(
repo_id="Divyap90/tourism_project_model",
filename="model.joblib",
repo_type="model"
)
# Load the model without using st commands inside the function
model = joblib.load(model_path)
return model
except Exception as e:
st.error(f"Error loading model: {e}")
st.stop()
# Load model
model = load_model()
st.title("Wellness Tourism Package Purchase Predictor")
st.markdown("Predict whether a customer will purchase the newly introduced Wellness Tourism Package based on their details.")
# Input fields for customer details
st.header("Customer Information")
col1, col2, col3 = st.columns(3)
with col1:
age = st.slider("Age", 18, 70, 30)
monthly_income = st.number_input("Monthly Income", 10000, 200000, 30000, step=1000)
num_person_visiting = st.slider("Number of People Visiting", 1, 6, 1)
gender = st.selectbox("Gender", ["Male", "Female", "Prefer not to say"])
passport = st.selectbox("Passport", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No")
with col2:
marital_status = st.selectbox("Marital Status", ["Single", "Married", "Divorced"])
occupation = st.selectbox("Occupation", ["Salaried", "Small Business", "Large Business", "Free Lancer"])
designation = st.selectbox("Designation", [
"Manager", "Executive", "Senior Manager", "AVP", "VP",
"Director", "Senior Executive", "Junior Executive", "Consultant"
])
city_tier = st.selectbox("City Tier", [1, 2, 3])
own_car = st.selectbox("Own Car", [0, 1], format_func=lambda x: "Yes" if x == 1 else "No")
with col3:
typeof_contact = st.selectbox("Type of Contact", ["Self Enquiry", "Company Invited"])
preferred_property_star = st.slider("Preferred Property Star (1-5)", 1, 5, 3)
num_of_trips = st.slider("Number of Trips (Annually)", 0, 20, 2)
num_of_followups = st.slider("Number of Follow-ups", 0, 10, 3)
duration_of_pitch = st.slider("Duration of Pitch (minutes)", 5, 60, 10)
pitch_satisfaction_score = st.slider("Pitch Satisfaction Score (1-5)", 1, 5, 3)
product_pitched = st.selectbox("Product Pitched", [
"Basic", "Deluxe", "Standard", "Super Deluxe", "King", "Premium"
])
num_of_children_visiting = st.slider("Number of Children Visiting (below 5 years)", 0, 5, 0)
# Preprocess input data to match model's expected input format
def preprocess_input(data):
input_df = pd.DataFrame([data])
# Define the columns that the model's preprocessor expects
numerical_features = [
'Age', 'PitchSatisfactionScore', 'NumberOfPersonVisiting',
'PreferredPropertyStar', 'NumberOfTrips', 'NumberOfFollowups',
'DurationOfPitch', 'MonthlyIncome', 'NumberOfChildrenVisiting'
]
categorical_features = [
'TypeofContact', 'CityTier', 'Occupation', 'Gender',
'MaritalStatus', 'ProductPitched', 'Designation'
]
binary_features = ['Passport', 'OwnCar']
default_values = {
'Age': 30.0,
'PitchSatisfactionScore': 3.0,
'NumberOfPersonVisiting': 1.0,
'PreferredPropertyStar': 3.0,
'NumberOfTrips': 2.0,
'NumberOfFollowups': 3.0,
'DurationOfPitch': 10.0,
'MonthlyIncome': 30000.0,
'NumberOfChildrenVisiting': 0.0,
'TypeofContact': 'Self Enquiry',
'CityTier': 1,
'Occupation': 'Salaried',
'Gender': 'Male',
'MaritalStatus': 'Single',
'ProductPitched': 'Basic',
'Designation': 'Executive',
'Passport': 0,
'OwnCar': 0
}
processed_input_df = pd.DataFrame(default_values, index=[0])
for col, value in data.items():
if col in processed_input_df.columns:
processed_input_df[col] = value
return processed_input_df
if st.button("Predict Purchase Likelihood"):
input_data = {
'Age': age,
'PitchSatisfactionScore': pitch_satisfaction_score,
'NumberOfPersonVisiting': num_person_visiting,
'PreferredPropertyStar': preferred_property_star,
'NumberOfTrips': num_of_trips,
'NumberOfFollowups': num_of_followups,
'DurationOfPitch': duration_of_pitch,
'MonthlyIncome': monthly_income,
'NumberOfChildrenVisiting': num_of_children_visiting,
'TypeofContact': typeof_contact,
'CityTier': city_tier,
'Occupation': occupation,
'Gender': gender,
'MaritalStatus': marital_status,
'ProductPitched': product_pitched,
'Designation': designation,
'Passport': passport,
'OwnCar': own_car
}
processed_df = preprocess_input(input_data)
try:
prediction = model.predict(processed_df)
prediction_proba = model.predict_proba(processed_df)[:, 1]
st.subheader("Prediction Result")
if prediction[0] == 1:
st.success(f"The customer is predicted to **purchase** the Wellness Tourism Package with a probability of **{prediction_proba[0]:.2f}**.")
else:
st.warning(f"The customer is predicted **not to purchase** the Wellness Tourism Package with a probability of **{1 - prediction_proba[0]:.2f}**.")
st.subheader("Input Data for Prediction")
st.dataframe(processed_df)
except Exception as e:
st.error(f"An error occurred during prediction: {e}")
st.info("Please check the input values and ensure the model is loaded correctly.")