Spaces:

DD009
/

Tourism

Sleeping

App Files Files Community

Tourism / app.py

DD009

Upload folder using huggingface_hub

723570a verified 3 months ago

raw

history blame contribute delete

10.2 kB

	import streamlit as st
	import pandas as pd
	import numpy as np
	from huggingface_hub import hf_hub_download
	import joblib
	import pickle

	st.set_page_config(page_title="Tourism Package Predictor", page_icon="🏝️", layout="wide")

	# ============================================================================
	# LOAD MODEL AND PREPROCESSING OBJECTS
	# ============================================================================

	@st.cache_resource
	def load_model_and_preprocessors():
	"""Load model, scaler, and encoders from HuggingFace"""
	try:
	# Download model
	model_path = hf_hub_download(
	repo_id="DD009/tourism-package-model",
	filename="best_tourism_model.joblib"
	)
	model = joblib.load(model_path)

	# Download scaler
	scaler_path = hf_hub_download(
	repo_id="DD009/tourism-package-model",
	filename="scaler.pkl"
	)
	scaler = joblib.load(scaler_path)

	# Download label encoders
	encoders_path = hf_hub_download(
	repo_id="DD009/tourism-package-model",
	filename="label_encoders.pkl"
	)
	with open(encoders_path, 'rb') as f:
	encoders = pickle.load(f)

	return model, scaler, encoders
	except Exception as e:
	st.error(f"Error loading model: {e}")
	st.info("Please ensure the model is uploaded to HuggingFace")
	return None, None, None

	model, scaler, encoders = load_model_and_preprocessors()

	# ============================================================================
	# STREAMLIT UI
	# ============================================================================

	st.title("🏝️ Wellness Tourism Package Purchase Predictor")
	st.markdown("""
	### Predict Customer Purchase Likelihood
	This application predicts whether a customer will purchase the Wellness Tourism Package
	based on their demographics, travel preferences, and sales interaction data.
	""")

	st.markdown("---")

	if model is None:
	st.error("❌ Model not loaded. Please check HuggingFace repository.")
	st.stop()

	# ============================================================================
	# INPUT FORM
	# ============================================================================

	col1, col2 = st.columns(2)

	with col1:
	st.subheader("👤 Customer Demographics")

	age = st.number_input("Age", min_value=18, max_value=100, value=35, step=1)

	gender = st.selectbox("Gender", ["Male", "Female"])

	marital_status = st.selectbox(
	"Marital Status",
	["Single", "Married", "Divorced", "Unmarried"]
	)

	occupation = st.selectbox(
	"Occupation",
	["Salaried", "Small Business", "Large Business", "Free Lancer"]
	)

	designation = st.selectbox(
	"Designation",
	["Executive", "Manager", "Senior Manager", "AVP", "VP"]
	)

	monthly_income = st.number_input(
	"Monthly Income (₹)",
	min_value=0,
	max_value=200000,
	value=50000,
	step=1000
	)

	with col2:
	st.subheader("✈️ Travel Preferences")

	city_tier = st.selectbox("City Tier", [1, 2, 3])

	type_of_contact = st.selectbox(
	"Type of Contact",
	["Self Enquiry", "Company Invited"]
	)

	num_persons = st.number_input(
	"Number of Persons Visiting",
	min_value=1,
	max_value=10,
	value=2,
	step=1
	)

	num_children = st.number_input(
	"Number of Children (below 5)",
	min_value=0,
	max_value=5,
	value=0,
	step=1
	)

	property_star = st.selectbox(
	"Preferred Property Star Rating",
	[3.0, 4.0, 5.0]
	)

	num_trips = st.number_input(
	"Number of Trips Per Year",
	min_value=0,
	max_value=20,
	value=2,
	step=1
	)

	passport = st.selectbox("Has Valid Passport", ["Yes", "No"])
	passport_val = 1 if passport == "Yes" else 0

	own_car = st.selectbox("Owns Car", ["Yes", "No"])
	own_car_val = 1 if own_car == "Yes" else 0

	st.markdown("---")

	st.subheader("💼 Sales Interaction")

	col3, col4 = st.columns(2)

	with col3:
	product_pitched = st.selectbox(
	"Product Pitched",
	["Basic", "Standard", "Deluxe", "Super Deluxe", "King"]
	)

	pitch_satisfaction = st.slider(
	"Pitch Satisfaction Score",
	min_value=1,
	max_value=5,
	value=3
	)

	with col4:
	num_followups = st.number_input(
	"Number of Follow-ups",
	min_value=0,
	max_value=10,
	value=3,
	step=1
	)

	duration_pitch = st.number_input(
	"Duration of Pitch (minutes)",
	min_value=0,
	max_value=60,
	value=15,
	step=1
	)

	st.markdown("---")

	# ============================================================================
	# PREDICTION
	# ============================================================================

	if st.button("🔮 Predict Purchase Probability", type="primary", use_container_width=True):

	# Create input dataframe with exact column names from training
	input_data = pd.DataFrame([{
	'Age': age,
	'TypeofContact': type_of_contact,
	'CityTier': city_tier,
	'DurationOfPitch': duration_pitch,
	'Occupation': occupation,
	'Gender': gender,
	'NumberOfPersonVisiting': num_persons,
	'NumberOfFollowups': num_followups,
	'ProductPitched': product_pitched,
	'PreferredPropertyStar': property_star,
	'MaritalStatus': marital_status,
	'NumberOfTrips': num_trips,
	'Passport': passport_val,
	'PitchSatisfactionScore': pitch_satisfaction,
	'OwnCar': own_car_val,
	'NumberOfChildrenVisiting': num_children,
	'Designation': designation,
	'MonthlyIncome': monthly_income
	}])

	try:
	# Encode categorical variables
	for col in input_data.select_dtypes(include=['object']).columns:
	if col in encoders:
	try:
	input_data[col] = encoders[col].transform(input_data[col].astype(str))
	except:
	# Handle unseen categories
	input_data[col] = 0

	# Scale features
	input_scaled = scaler.transform(input_data)

	# Make prediction
	prediction = model.predict(input_scaled)[0]
	probability = model.predict_proba(input_scaled)[0]

	# Display results
	st.markdown("---")
	st.subheader("📊 Prediction Results")

	col_r1, col_r2, col_r3 = st.columns(3)

	with col_r1:
	if prediction == 1:
	st.success("### ✅ Will Purchase")
	st.markdown("Prediction: Customer is likely to buy the package")
	else:
	st.error("### ❌ Will Not Purchase")
	st.markdown("Prediction: Customer is unlikely to buy the package")

	with col_r2:
	purchase_prob = probability[1] * 100
	st.metric(
	"Purchase Probability",
	f"{purchase_prob:.1f}%",
	delta=None
	)

	with col_r3:
	confidence = max(probability) * 100
	st.metric(
	"Model Confidence",
	f"{confidence:.1f}%",
	delta=None
	)

	# Recommendations
	st.markdown("---")
	st.subheader("💡 Recommendation")

	if probability[1] >= 0.7:
	st.success("""
	🌟 High Priority Lead
	- Probability: Very High (>70%)
	- Action: Contact immediately with personalized offer
	- Follow-up: Within 24 hours
	- Offer: Premium package with special discount
	""")
	elif probability[1] >= 0.5:
	st.warning("""
	⚡ Medium Priority Lead
	- Probability: Moderate (50-70%)
	- Action: Schedule follow-up call within 2-3 days
	- Follow-up: Regular contact
	- Offer: Standard package with competitive pricing
	""")
	elif probability[1] >= 0.3:
	st.info("""
	📧 Low Priority Lead
	- Probability: Low (30-50%)
	- Action: Add to email nurture campaign
	- Follow-up: Monthly newsletters
	- Offer: Budget-friendly options
	""")
	else:
	st.warning("""
	🔍 Re-evaluate Approach
	- Probability: Very Low (<30%)
	- Action: May need different package or timing
	- Follow-up: Quarterly check-in
	- Offer: Explore alternative travel options
	""")

	# Display probability breakdown
	st.markdown("---")
	st.subheader("📈 Probability Breakdown")

	prob_df = pd.DataFrame({
	'Outcome': ['Will Not Purchase', 'Will Purchase'],
	'Probability': [probability[0] * 100, probability[1] * 100]
	})

	st.bar_chart(prob_df.set_index('Outcome'))

	except Exception as e:
	st.error(f"❌ Prediction error: {e}")
	st.info("Please ensure all preprocessing files are uploaded correctly")

	# ============================================================================
	# FOOTER
	# ============================================================================

	st.markdown("---")
	st.markdown("""
	### 📝 About
	This predictive model uses XGBoost trained on historical customer data to predict
	the likelihood of purchasing the Wellness Tourism Package. The model considers:
	- Customer demographics (age, income, occupation)
	- Travel preferences (city tier, property rating, trips per year)
	- Sales interaction data (pitch satisfaction, follow-ups, duration)

	Model Performance:
	- F1-Score: ~85-90%
	- ROC-AUC: ~90-95%
	- Accuracy: ~85-90%

	Developed as part of MLOps Pipeline Project
	""")