Spaces:

omm7
/

hbcp

Sleeping

App Files Files Community

hbcp / app.py

omm7

Upload folder using huggingface_hub

b7d40ec verified 6 months ago

raw

history blame

8.36 kB

	import streamlit as st
	import joblib
	import pandas as pd
	import numpy as np
	import os
	import time

	# --- Constants and Configuration ---

	MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib'

	# The exact list of features (columns) the model expects in this specific order
	# Corrected order based on the debugging output and X_train columns
	EXPECTED_FEATURES = [
	'lead_time',
	'no_of_special_requests',
	'avg_price_per_room',
	'no_of_adults',
	'no_of_weekend_nights',
	'required_car_parking_space', # Corrected position
	'no_of_week_nights', # Corrected position
	'arrival_month', # Corrected position
	'market_segment_type_Online' # Corrected position
	]


	# Define typical ranges based on EDA (adjust as needed)
	# From data.describe().T:
	# lead_time: min=0, max=443, mean=81.61, 75%=118
	# no_of_special_requests: min=0, max=5, mean=0.56, 75%=1
	# avg_price_per_room: min=0, max=540, mean=101.03, 75%=119
	# no_of_adults: min=0, max=4, mean=1.83, 75%=2
	# no_of_weekend_nights: min=0, max=6, mean=0.78, 75%=1
	# no_of_week_nights: min=0, max=17, mean=2.17, 75%=3
	# required_car_parking_space: min=0, max=1
	# arrival_month: min=1, max=12 (based on code)

	FEATURE_RANGES = {
	'lead_time': {'min': 0, 'max': 450, 'default': 82},
	'no_of_special_requests': {'min': 0, 'max': 5, 'default': 1},
	'avg_price_per_room': {'min': 0.0, 'max': 600.0, 'default': 101.0},
	'no_of_adults': {'min': 0, 'max': 4, 'default': 2},
	'no_of_weekend_nights': {'min': 0, 'max': 7, 'default': 1}, # Adjusted max slightly
	'no_of_week_nights': {'min': 0, 'max': 20, 'default': 2}, # Adjusted max slightly
	'arrival_month': {'min': 1, 'max': 12, 'default': 7}, # Default to July based on EDA for example
	}


	# --- Model Loading (Cached) ---

	@st.cache_resource
	def load_cancellation_model():
	try:
	model = joblib.load(MODEL_FILE)
	return model
	except Exception as e:
	st.error(f"Error loading model: {e}")
	return None

	cancellation_predictor = load_cancellation_model()

	# --- Prediction Function (Critical Data Preprocessing) ---

	def run_prediction(
	lead_time, market_segment_type, avg_price_per_room, no_of_adults,
	no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
	arrival_month, required_car_parking_space, model
	):
	"""Processes inputs and runs prediction, mimicking the training feature engineering."""

	# 1. Manually construct the input row, using the required feature engineering
	data_row = {
	'lead_time': lead_time,
	'no_of_special_requests': no_of_special_requests,
	'avg_price_per_room': avg_price_per_room,
	'no_of_adults': no_of_adults,
	'no_of_weekend_nights': no_of_weekend_nights,
	'no_of_week_nights': no_of_week_nights,
	'arrival_month': arrival_month,

	# One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category)
	'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0,

	# Binary Encoding for 'required_car_parking_space'
	'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0,
	}

	# 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order and dtypes
	# Explicitly set the order of columns to match EXPECTED_FEATURES
	input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES)

	# Ensure 'required_car_parking_space' is float64 as expected by the model
	input_df['required_car_parking_space'] = input_df['required_car_parking_space'].astype('float64')


	# 3. Make Prediction
	prediction = model.predict(input_df)[0]
	# Probabilities are [Prob_Not_Canceled (0), Prob_Canceled (1)]
	probabilities = model.predict_proba(input_df)[0]
	prob_cancellation = probabilities[1]
	prob_kept = probabilities[0]


	return prediction, prob_cancellation, prob_kept


	# --- Streamlit UI ---

	st.set_page_config(
	page_title="Hotel Cancellation Predictor",
	layout="centered",
	initial_sidebar_state="expanded"
	)

	st.title("🛎️ INN Hotels: Booking Cancellation Predictor")
	st.markdown("Use the controls below to input booking details and predict the cancellation risk.")

	if cancellation_predictor is None:
	st.warning("Application stopped due to critical error in model loading.")
	st.stop()

	# --- Input Fields (arranged for better dashboard look) ---

	col1, col2 = st.columns(2)

	with col1:
	lead_time = st.slider("1. Lead Time (Days before arrival)",
	min_value=FEATURE_RANGES['lead_time']['min'],
	max_value=FEATURE_RANGES['lead_time']['max'],
	value=FEATURE_RANGES['lead_time']['default'])
	no_of_adults = st.number_input("4. Number of Adults",
	min_value=FEATURE_RANGES['no_of_adults']['min'],
	max_value=FEATURE_RANGES['no_of_adults']['max'],
	value=FEATURE_RANGES['no_of_adults']['default'],
	step=1)
	no_of_week_nights = st.slider("6. Number of Week Nights",
	min_value=FEATURE_RANGES['no_of_week_nights']['min'],
	max_value=FEATURE_RANGES['no_of_week_nights']['max'],
	value=FEATURE_RANGES['no_of_week_nights']['default'])
	arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)",
	list(range(FEATURE_RANGES['arrival_month']['min'], FEATURE_RANGES['arrival_month']['max'] + 1)),
	index=FEATURE_RANGES['arrival_month']['default'] - 1) # Adjust index for 0-based list


	with col2:
	market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0)
	avg_price_per_room = st.number_input("3. Average Price per Room ($)",
	min_value=FEATURE_RANGES['avg_price_per_room']['min'],
	max_value=FEATURE_RANGES['avg_price_per_room']['max'],
	value=FEATURE_RANGES['avg_price_per_room']['default'],
	format="%.2f")
	no_of_weekend_nights = st.slider("5. Number of Weekend Nights",
	min_value=FEATURE_RANGES['no_of_weekend_nights']['min'],
	max_value=FEATURE_RANGES['no_of_weekend_nights']['max'],
	value=FEATURE_RANGES['no_of_weekend_nights']['default'])
	no_of_special_requests = st.number_input("7. Number of Special Requests",
	min_value=FEATURE_RANGES['no_of_special_requests']['min'],
	max_value=FEATURE_RANGES['no_of_special_requests']['max'],
	value=FEATURE_RANGES['no_of_special_requests']['default'],
	step=1)
	required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1)


	# --- Prediction Button ---

	st.markdown("---") # Add a separator

	if st.button("PREDICT CANCELLATION RISK", type="primary"):

	# Simple progress indicator for UX
	with st.spinner('Analyzing booking data...'):
	time.sleep(0.5)

	prediction, prob_cancellation, prob_kept = run_prediction(
	lead_time, market_segment_type, avg_price_per_room, no_of_adults,
	no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
	arrival_month, required_car_parking_space, cancellation_predictor
	)


	st.markdown("---")
	st.subheader("Prediction Result")

	if prediction == 1:
	st.error(f"High Risk of Cancellation: The model predicts the booking will be CANCELLED.")
	else:
	st.success(f"Low Risk: The model predicts the booking will be KEPT.")

	st.markdown(f"*Likelihood of Cancellation: {prob_cancellation100:.2f}%**")
	st.markdown(f"Likelihood of Keeping Booking: {prob_kept*100:.2f}%")

	if prediction == 1 and prob_cancellation > 0.70:
	st.info("💡 Actionable Insight: Consider proactively contacting this guest or flagging the room for immediate re-marketing.")