Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| import time | |
| # --- Constants and Configuration --- | |
| MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib' | |
| # The exact list of features (columns) the model expects in this specific order | |
| # Corrected order based on the debugging output and X_train columns | |
| EXPECTED_FEATURES = [ | |
| 'lead_time', | |
| 'no_of_special_requests', | |
| 'avg_price_per_room', | |
| 'no_of_adults', | |
| 'no_of_weekend_nights', | |
| 'required_car_parking_space', # Corrected position | |
| 'no_of_week_nights', # Corrected position | |
| 'arrival_month', # Corrected position | |
| 'market_segment_type_Online' # Corrected position | |
| ] | |
| # Define typical ranges based on EDA (adjust as needed) | |
| # From data.describe().T: | |
| # lead_time: min=0, max=443, mean=81.61, 75%=118 | |
| # no_of_special_requests: min=0, max=5, mean=0.56, 75%=1 | |
| # avg_price_per_room: min=0, max=540, mean=101.03, 75%=119 | |
| # no_of_adults: min=0, max=4, mean=1.83, 75%=2 | |
| # no_of_weekend_nights: min=0, max=6, mean=0.78, 75%=1 | |
| # no_of_week_nights: min=0, max=17, mean=2.17, 75%=3 | |
| # required_car_parking_space: min=0, max=1 | |
| # arrival_month: min=1, max=12 (based on code) | |
| FEATURE_RANGES = { | |
| 'lead_time': {'min': 0, 'max': 450, 'default': 82}, | |
| 'no_of_special_requests': {'min': 0, 'max': 5, 'default': 1}, | |
| 'avg_price_per_room': {'min': 0.0, 'max': 600.0, 'default': 101.0}, | |
| 'no_of_adults': {'min': 0, 'max': 4, 'default': 2}, | |
| 'no_of_weekend_nights': {'min': 0, 'max': 7, 'default': 1}, # Adjusted max slightly | |
| 'no_of_week_nights': {'min': 0, 'max': 20, 'default': 2}, # Adjusted max slightly | |
| 'arrival_month': {'min': 1, 'max': 12, 'default': 7}, # Default to July based on EDA for example | |
| } | |
| # --- Model Loading (Cached) --- | |
| def load_cancellation_model(): | |
| try: | |
| model = joblib.load(MODEL_FILE) | |
| return model | |
| except Exception as e: | |
| st.error(f"Error loading model: {e}") | |
| return None | |
| cancellation_predictor = load_cancellation_model() | |
| # --- Prediction Function (Critical Data Preprocessing) --- | |
| def run_prediction( | |
| lead_time, market_segment_type, avg_price_per_room, no_of_adults, | |
| no_of_weekend_nights, no_of_week_nights, no_of_special_requests, | |
| arrival_month, required_car_parking_space, model | |
| ): | |
| """Processes inputs and runs prediction, mimicking the training feature engineering.""" | |
| # 1. Manually construct the input row, using the required feature engineering | |
| data_row = { | |
| 'lead_time': lead_time, | |
| 'no_of_special_requests': no_of_special_requests, | |
| 'avg_price_per_room': avg_price_per_room, | |
| 'no_of_adults': no_of_adults, | |
| 'no_of_weekend_nights': no_of_weekend_nights, | |
| 'no_of_week_nights': no_of_week_nights, | |
| 'arrival_month': arrival_month, | |
| # One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category) | |
| 'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0, | |
| # Binary Encoding for 'required_car_parking_space' | |
| 'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0, | |
| } | |
| # 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order and dtypes | |
| # Explicitly set the order of columns to match EXPECTED_FEATURES | |
| input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES) | |
| # Ensure 'required_car_parking_space' is float64 as expected by the model | |
| input_df['required_car_parking_space'] = input_df['required_car_parking_space'].astype('float64') | |
| # 3. Make Prediction | |
| prediction = model.predict(input_df)[0] | |
| # Probabilities are [Prob_Not_Canceled (0), Prob_Canceled (1)] | |
| probabilities = model.predict_proba(input_df)[0] | |
| prob_cancellation = probabilities[1] | |
| prob_kept = probabilities[0] | |
| return prediction, prob_cancellation, prob_kept | |
| # --- Streamlit UI --- | |
| st.set_page_config( | |
| page_title="Hotel Cancellation Predictor", | |
| layout="centered", | |
| initial_sidebar_state="expanded" | |
| ) | |
| st.title("🛎️ INN Hotels: Booking Cancellation Predictor") | |
| st.markdown("Use the controls below to input booking details and predict the cancellation risk.") | |
| if cancellation_predictor is None: | |
| st.warning("Application stopped due to critical error in model loading.") | |
| st.stop() | |
| # --- Input Fields (arranged for better dashboard look) --- | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| lead_time = st.slider("1. Lead Time (Days before arrival)", | |
| min_value=FEATURE_RANGES['lead_time']['min'], | |
| max_value=FEATURE_RANGES['lead_time']['max'], | |
| value=FEATURE_RANGES['lead_time']['default']) | |
| no_of_adults = st.number_input("4. Number of Adults", | |
| min_value=FEATURE_RANGES['no_of_adults']['min'], | |
| max_value=FEATURE_RANGES['no_of_adults']['max'], | |
| value=FEATURE_RANGES['no_of_adults']['default'], | |
| step=1) | |
| no_of_week_nights = st.slider("6. Number of Week Nights", | |
| min_value=FEATURE_RANGES['no_of_week_nights']['min'], | |
| max_value=FEATURE_RANGES['no_of_week_nights']['max'], | |
| value=FEATURE_RANGES['no_of_week_nights']['default']) | |
| arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)", | |
| list(range(FEATURE_RANGES['arrival_month']['min'], FEATURE_RANGES['arrival_month']['max'] + 1)), | |
| index=FEATURE_RANGES['arrival_month']['default'] - 1) # Adjust index for 0-based list | |
| with col2: | |
| market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0) | |
| avg_price_per_room = st.number_input("3. Average Price per Room ($)", | |
| min_value=FEATURE_RANGES['avg_price_per_room']['min'], | |
| max_value=FEATURE_RANGES['avg_price_per_room']['max'], | |
| value=FEATURE_RANGES['avg_price_per_room']['default'], | |
| format="%.2f") | |
| no_of_weekend_nights = st.slider("5. Number of Weekend Nights", | |
| min_value=FEATURE_RANGES['no_of_weekend_nights']['min'], | |
| max_value=FEATURE_RANGES['no_of_weekend_nights']['max'], | |
| value=FEATURE_RANGES['no_of_weekend_nights']['default']) | |
| no_of_special_requests = st.number_input("7. Number of Special Requests", | |
| min_value=FEATURE_RANGES['no_of_special_requests']['min'], | |
| max_value=FEATURE_RANGES['no_of_special_requests']['max'], | |
| value=FEATURE_RANGES['no_of_special_requests']['default'], | |
| step=1) | |
| required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1) | |
| # --- Prediction Button --- | |
| st.markdown("---") # Add a separator | |
| if st.button("PREDICT CANCELLATION RISK", type="primary"): | |
| # Simple progress indicator for UX | |
| with st.spinner('Analyzing booking data...'): | |
| time.sleep(0.5) | |
| prediction, prob_cancellation, prob_kept = run_prediction( | |
| lead_time, market_segment_type, avg_price_per_room, no_of_adults, | |
| no_of_weekend_nights, no_of_week_nights, no_of_special_requests, | |
| arrival_month, required_car_parking_space, cancellation_predictor | |
| ) | |
| st.markdown("---") | |
| st.subheader("Prediction Result") | |
| if prediction == 1: | |
| st.error(f"**High Risk of Cancellation:** The model predicts the booking will be **CANCELLED**.") | |
| else: | |
| st.success(f"**Low Risk:** The model predicts the booking will be **KEPT**.") | |
| st.markdown(f"**Likelihood of Cancellation: {prob_cancellation*100:.2f}%**") | |
| st.markdown(f"Likelihood of Keeping Booking: {prob_kept*100:.2f}%") | |
| if prediction == 1 and prob_cancellation > 0.70: | |
| st.info("💡 **Actionable Insight:** Consider proactively contacting this guest or flagging the room for immediate re-marketing.") | |