Spaces:

cheeka84
/

tourism-package-pred

Sleeping

App Files Files Community

tourism-package-pred / app.py

cheeka84

Upload folder using huggingface_hub

2d8abcf verified 8 months ago

raw

history blame contribute delete

5.24 kB


	import joblib
	import streamlit as st
	import pandas as pd
	import numpy as np
	from huggingface_hub import hf_hub_download

	st.set_page_config(page_title="tourism package purchase", page_icon="🧳", layout="centered")

	# ---------- model loader ----------
	model_repo = "cheeka84/tourism-package-pred"
	candidate_filenames = [
	"rf_pipeline.joblib",
	"xgb_pipeline.joblib",
	"best_pipeline.joblib",
	]

	model = None
	loaded_filename = None
	last_err = None
	for fname in candidate_filenames:
	try:
	path = hf_hub_download(repo_id=model_repo, repo_type="model", filename=fname)
	model = joblib.load(path)
	loaded_filename = fname
	break
	except Exception as e:
	last_err = e

	if model is None:
	st.error(f"could not load a model from {model_repo}. last error: {last_err}")
	st.stop()

	# try to discover columns the pipeline was fitted on
	expected_cols = None
	try:
	expected_cols = list(model.named_steps["columntransformer"].feature_names_in_)
	except Exception:
	try:
	expected_cols = list(model.feature_names_in_)
	except Exception:
	expected_cols = None

	st.title("Tourism Package Purchase Predictor")
	st.caption(f"loaded model: {loaded_filename} from {model_repo}")

	threshold = st.slider(
	"decision threshold (probability ≥ threshold ⇒ purchase)",
	min_value=0.05, max_value=0.95, value=0.50, step=0.01
	)

	with st.form("input_form"):
	st.subheader("enter customer details")

	col1, col2 = st.columns(2)

	with col1:
	age = st.number_input("Age", min_value=10, max_value=100, value=30, step=1)
	citytier = st.selectbox("CityTier", [1, 2, 3], index=0)
	durationofpitch = st.number_input("DurationOfPitch (minutes)", min_value=0, max_value=500, value=30, step=1)
	occupation = st.selectbox("Occupation", ["Salaried", "Self Employed", "Small Business", "Large Business", "Student", "Unemployed"])
	gender = st.selectbox("Gender", ["Male", "Female"])
	typeofcontact = st.selectbox("TypeofContact", ["Self Enquiry", "Company Invited"])
	designation = st.selectbox("Designation", ["Executive", "Senior Manager", "Manager", "AVP", "VP"])
	monthlyincome = st.number_input("MonthlyIncome", min_value=0.0, value=50000.0, step=100.0)

	with col2:
	numberofpersonvisiting = st.number_input("NumberOfPersonVisiting", min_value=1, max_value=20, value=2, step=1)
	numberoffollowups = st.number_input("NumberOfFollowups", min_value=0, max_value=20, value=2, step=1)
	productpitched = st.selectbox("ProductPitched", ["Basic", "Standard", "Deluxe", "Super Deluxe", "King"])
	preferredpropertystar = st.selectbox("PreferredPropertyStar", [1, 2, 3, 4, 5], index=2)
	maritalstatus = st.selectbox("MaritalStatus", ["Married", "Single", "Divorced", "Unmarried"])
	numberoftrips = st.number_input("NumberOfTrips", min_value=0, max_value=50, value=2, step=1)
	passport = st.checkbox("Passport (has passport)")
	pitchsatisfactionscore = st.selectbox("PitchSatisfactionScore", [1, 2, 3, 4, 5], index=3)
	owncar = st.checkbox("OwnCar (has car)")
	numberofchildrenvisiting = st.number_input("NumberOfChildrenVisiting", min_value=0, max_value=10, value=0, step=1)

	submitted = st.form_submit_button("predict")

	if expected_cols:
	with st.expander("debug: expected input columns (from the fitted pipeline)"):
	st.write(expected_cols)

	if submitted:
	# base row with all expected cols as NaN so imputers can work
	if expected_cols:
	row = {c: np.nan for c in expected_cols}
	else:
	row = {}

	# update with user inputs using the exact training column names
	row.update({
	"Age": age,
	"CityTier": citytier,
	"DurationOfPitch": durationofpitch,
	"Occupation": occupation,
	"Gender": gender,
	"TypeofContact": typeofcontact,
	"Designation": designation,
	"MonthlyIncome": monthlyincome,
	"NumberOfPersonVisiting": numberofpersonvisiting,
	"NumberOfFollowups": numberoffollowups,
	"ProductPitched": productpitched,
	"PreferredPropertyStar": preferredpropertystar,
	"MaritalStatus": maritalstatus,
	"NumberOfTrips": numberoftrips,
	"Passport": 1 if passport else 0,
	"PitchSatisfactionScore": pitchsatisfactionscore,
	"OwnCar": 1 if owncar else 0,
	"NumberOfChildrenVisiting": numberofchildrenvisiting,
	})

	# build input frame; if expected_cols is known, reindex to match training order
	input_df = pd.DataFrame([row])
	if expected_cols:
	# drop unexpected, add missing
	for c in expected_cols:
	if c not in input_df.columns:
	input_df[c] = np.nan
	input_df = input_df[expected_cols]

	# predict
	try:
	proba = float(model.predict_proba(input_df)[:, 1][0])
	pred = int(proba >= threshold)
	label = "Will Purchase" if pred == 1 else "Won't Purchase"

	st.subheader("prediction")
	st.metric("probability of purchase", f"{proba:.3f}")
	st.write(f"threshold = {threshold:.2f} → predicted: {label}")
	except Exception as e:
	st.error(f"prediction failed: {e}")