File size: 4,777 Bytes
722f3e2
 
 
 
 
1e792a0
722f3e2
1e792a0
 
5197fc9
 
 
 
 
1e792a0
 
 
 
722f3e2
 
1e792a0
 
ec750ff
1e792a0
ec750ff
1e792a0
ec750ff
1e792a0
ec750ff
1e792a0
 
 
 
d908a7f
 
1e792a0
 
 
 
 
 
 
 
 
d908a7f
1e792a0
 
 
2710d00
d908a7f
1e792a0
 
d908a7f
1e792a0
 
d908a7f
 
 
 
 
 
 
 
 
 
1e792a0
d908a7f
5197fc9
1e792a0
 
ec750ff
1e792a0
 
d908a7f
1e792a0
 
ec750ff
1e792a0
 
 
 
 
 
 
 
 
ec750ff
1e792a0
 
 
ec750ff
1e792a0
 
d908a7f
1e792a0
 
 
 
 
 
 
 
 
 
 
 
ec750ff
d908a7f
 
 
722f3e2
 
d908a7f
 
722f3e2
1e792a0
722f3e2
d908a7f
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
import streamlit as st
import joblib
import pandas as pd
import numpy as np

# --- 1. CONFIGURATION ---
MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib'
# The list of features (columns) the model was trained on, in order.
FEATURE_NAMES = [
    'lead_time',
    'no_of_special_requests',
    'avg_price_per_room',
    'no_of_adults',
    'no_of_weekend_nights',
    'required_car_parking_space',
    'no_of_week_nights',
    'arrival_month',
    'market_segment_type_Online'
]

# --- 2. MODEL LOADING ---
# Use cache to load the model just once
@st.cache_resource
def load_model():
    try:
        return joblib.load(MODEL_FILE)
    except Exception as e:
        st.error(f"Error loading model: {e}. Check if '{MODEL_FILE}' exists.")
        return None
model = load_model()

# --- 3. PREDICTION LOGIC ---
def predict_cancellation(inputs, loaded_model):
    """Prepares data and gets the model's prediction and confidence score."""

    # Map user inputs to the format the model expects
    input_data = {
        'lead_time': inputs['lead_time'],
        'no_of_special_requests': inputs['no_of_special_requests'],
        'avg_price_per_room': inputs['avg_price_per_room'],
        'no_of_adults': inputs['no_of_adults'],
        'no_of_weekend_nights': inputs['no_of_weekend_nights'],
        'no_of_week_nights': inputs['no_of_week_nights'],
        'arrival_month': inputs['arrival_month'],

        # Binary encoding for categorical features
        'market_segment_type_Online': 1.0 if inputs['market_segment_type'] == 'Online' else 0.0,
        'required_car_parking_space': 1.0 if inputs['required_car_parking_space'] == "Yes" else 0.0,
    }

    # Create a DataFrame with the correct column order
    input_df = pd.DataFrame([input_data], columns=FEATURE_NAMES)

    # Make prediction (0=Not Cancelled, 1=Cancelled)
    prediction = loaded_model.predict(input_df)[0]

    # Get probability scores for each class (0 and 1)
    # The output is typically [P(Class 0), P(Class 1)]
    probabilities = loaded_model.predict_proba(input_df)[0]
    
    # The confidence score for the predicted class
    if prediction == 1:
        confidence_score = probabilities[1] # Probability of being Cancelled (Class 1)
    else:
        confidence_score = probabilities[0] # Probability of being Not Cancelled (Class 0)
    
    return prediction, confidence_score

# --- 4. STREAMLIT INTERFACE ---
st.title("Hotel Booking Cancellation Predictor")

if model is None:
    st.stop()
    
st.markdown("Enter booking details to predict if the reservation will be cancelled.")
st.markdown("---")

# --- Input Fields (Single Column) ---
# Simple number inputs for basic data types
lead_time = st.number_input("1. Lead Time (Days before arrival)", min_value=0, value=82, step=1)
arrival_month = st.selectbox("2. Arrival Month (1=Jan to 12=Dec)", list(range(1, 13)), index=6) # Default to July (7)
avg_price_per_room = st.number_input("3. Average Price per Room ($)", min_value=0.0, value=101.0, format="%.2f")
no_of_adults = st.number_input("4. Number of Adults", min_value=0, value=2, step=1)
no_of_weekend_nights = st.number_input("5. Number of Weekend Nights", min_value=0, value=1, step=1)
no_of_week_nights = st.number_input("6. Number of Week Nights", min_value=0, value=2, step=1)
no_of_special_requests = st.number_input("7. Number of Special Requests", min_value=0, value=0, step=1)

# Simple select boxes for categorical data
market_segment_type = st.selectbox("8. Market Segment Type", ["Online", "Offline"])
required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"])

# --- 5. PREDICTION BUTTON AND OUTPUT ---
if st.button("Get Prediction Result", type="primary"):

    # Dictionary to pass inputs easily
    user_inputs = {
        'lead_time': lead_time,
        'market_segment_type': market_segment_type,
        'avg_price_per_room': avg_price_per_room,
        'no_of_adults': no_of_adults,
        'no_of_weekend_nights': no_of_weekend_nights,
        'no_of_week_nights': no_of_week_nights,
        'no_of_special_requests': no_of_special_requests,
        'arrival_month': arrival_month,
        'required_car_parking_space': required_car_parking_space,
    }

    # Get both the prediction and the confidence score
    prediction, confidence_score = predict_cancellation(user_inputs, model)

    st.markdown("---")
    st.subheader("Prediction Result")

    # Display the result based on the prediction
    if prediction == 1:
        st.error("The model predicts the booking will be **CANCELLED**.")
    else:
        st.success("The model predicts the booking will be **Not Cancelled**.")

    # Display the confidence score formatted as a percentage
    st.info(f"Confidence Score: **{confidence_score * 100:.2f}%**")