cheeka84's picture
Upload folder using huggingface_hub
2d8abcf verified
import joblib
import streamlit as st
import pandas as pd
import numpy as np
from huggingface_hub import hf_hub_download
st.set_page_config(page_title="tourism package purchase", page_icon="🧳", layout="centered")
# ---------- model loader ----------
model_repo = "cheeka84/tourism-package-pred"
candidate_filenames = [
"rf_pipeline.joblib",
"xgb_pipeline.joblib",
"best_pipeline.joblib",
]
model = None
loaded_filename = None
last_err = None
for fname in candidate_filenames:
try:
path = hf_hub_download(repo_id=model_repo, repo_type="model", filename=fname)
model = joblib.load(path)
loaded_filename = fname
break
except Exception as e:
last_err = e
if model is None:
st.error(f"could not load a model from {model_repo}. last error: {last_err}")
st.stop()
# try to discover columns the pipeline was fitted on
expected_cols = None
try:
expected_cols = list(model.named_steps["columntransformer"].feature_names_in_)
except Exception:
try:
expected_cols = list(model.feature_names_in_)
except Exception:
expected_cols = None
st.title("Tourism Package Purchase Predictor")
st.caption(f"loaded model: {loaded_filename} from {model_repo}")
threshold = st.slider(
"decision threshold (probability ≥ threshold ⇒ purchase)",
min_value=0.05, max_value=0.95, value=0.50, step=0.01
)
with st.form("input_form"):
st.subheader("enter customer details")
col1, col2 = st.columns(2)
with col1:
age = st.number_input("Age", min_value=10, max_value=100, value=30, step=1)
citytier = st.selectbox("CityTier", [1, 2, 3], index=0)
durationofpitch = st.number_input("DurationOfPitch (minutes)", min_value=0, max_value=500, value=30, step=1)
occupation = st.selectbox("Occupation", ["Salaried", "Self Employed", "Small Business", "Large Business", "Student", "Unemployed"])
gender = st.selectbox("Gender", ["Male", "Female"])
typeofcontact = st.selectbox("TypeofContact", ["Self Enquiry", "Company Invited"])
designation = st.selectbox("Designation", ["Executive", "Senior Manager", "Manager", "AVP", "VP"])
monthlyincome = st.number_input("MonthlyIncome", min_value=0.0, value=50000.0, step=100.0)
with col2:
numberofpersonvisiting = st.number_input("NumberOfPersonVisiting", min_value=1, max_value=20, value=2, step=1)
numberoffollowups = st.number_input("NumberOfFollowups", min_value=0, max_value=20, value=2, step=1)
productpitched = st.selectbox("ProductPitched", ["Basic", "Standard", "Deluxe", "Super Deluxe", "King"])
preferredpropertystar = st.selectbox("PreferredPropertyStar", [1, 2, 3, 4, 5], index=2)
maritalstatus = st.selectbox("MaritalStatus", ["Married", "Single", "Divorced", "Unmarried"])
numberoftrips = st.number_input("NumberOfTrips", min_value=0, max_value=50, value=2, step=1)
passport = st.checkbox("Passport (has passport)")
pitchsatisfactionscore = st.selectbox("PitchSatisfactionScore", [1, 2, 3, 4, 5], index=3)
owncar = st.checkbox("OwnCar (has car)")
numberofchildrenvisiting = st.number_input("NumberOfChildrenVisiting", min_value=0, max_value=10, value=0, step=1)
submitted = st.form_submit_button("predict")
if expected_cols:
with st.expander("debug: expected input columns (from the fitted pipeline)"):
st.write(expected_cols)
if submitted:
# base row with all expected cols as NaN so imputers can work
if expected_cols:
row = {c: np.nan for c in expected_cols}
else:
row = {}
# update with user inputs using the *exact* training column names
row.update({
"Age": age,
"CityTier": citytier,
"DurationOfPitch": durationofpitch,
"Occupation": occupation,
"Gender": gender,
"TypeofContact": typeofcontact,
"Designation": designation,
"MonthlyIncome": monthlyincome,
"NumberOfPersonVisiting": numberofpersonvisiting,
"NumberOfFollowups": numberoffollowups,
"ProductPitched": productpitched,
"PreferredPropertyStar": preferredpropertystar,
"MaritalStatus": maritalstatus,
"NumberOfTrips": numberoftrips,
"Passport": 1 if passport else 0,
"PitchSatisfactionScore": pitchsatisfactionscore,
"OwnCar": 1 if owncar else 0,
"NumberOfChildrenVisiting": numberofchildrenvisiting,
})
# build input frame; if expected_cols is known, reindex to match training order
input_df = pd.DataFrame([row])
if expected_cols:
# drop unexpected, add missing
for c in expected_cols:
if c not in input_df.columns:
input_df[c] = np.nan
input_df = input_df[expected_cols]
# predict
try:
proba = float(model.predict_proba(input_df)[:, 1][0])
pred = int(proba >= threshold)
label = "Will Purchase" if pred == 1 else "Won't Purchase"
st.subheader("prediction")
st.metric("probability of purchase", f"{proba:.3f}")
st.write(f"threshold = **{threshold:.2f}** → predicted: **{label}**")
except Exception as e:
st.error(f"prediction failed: {e}")