omm7 commited on
Commit
1e792a0
·
verified ·
1 Parent(s): ec750ff

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +77 -158
app.py CHANGED
@@ -2,192 +2,111 @@ import streamlit as st
2
  import joblib
3
  import pandas as pd
4
  import numpy as np
5
- import os
6
- import time
7
-
8
- # --- Constants and Configuration ---
9
 
 
10
  MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib'
11
 
12
- # The exact list of features (columns) the model expects in this specific order
13
- # Corrected order based on the debugging output and X_train columns
14
- EXPECTED_FEATURES = [
15
  'lead_time',
16
  'no_of_special_requests',
17
  'avg_price_per_room',
18
  'no_of_adults',
19
  'no_of_weekend_nights',
20
- 'required_car_parking_space', # Corrected position
21
- 'no_of_week_nights', # Corrected position
22
- 'arrival_month', # Corrected position
23
- 'market_segment_type_Online' # Corrected position
24
  ]
25
 
26
-
27
- # Define typical ranges based on EDA (adjust as needed)
28
- # From data.describe().T:
29
- # lead_time: min=0, max=443, mean=81.61, 75%=118
30
- # no_of_special_requests: min=0, max=5, mean=0.56, 75%=1
31
- # avg_price_per_room: min=0, max=540, mean=101.03, 75%=119
32
- # no_of_adults: min=0, max=4, mean=1.83, 75%=2
33
- # no_of_weekend_nights: min=0, max=6, mean=0.78, 75%=1
34
- # no_of_week_nights: min=0, max=17, mean=2.17, 75%=3
35
- # required_car_parking_space: min=0, max=1
36
- # arrival_month: min=1, max=12 (based on code)
37
-
38
- FEATURE_RANGES = {
39
- 'lead_time': {'min': 0, 'max': 450, 'default': 82},
40
- 'no_of_special_requests': {'min': 0, 'max': 5, 'default': 1},
41
- 'avg_price_per_room': {'min': 0.0, 'max': 600.0, 'default': 101.0},
42
- 'no_of_adults': {'min': 0, 'max': 4, 'default': 2},
43
- 'no_of_weekend_nights': {'min': 0, 'max': 7, 'default': 1}, # Adjusted max slightly
44
- 'no_of_week_nights': {'min': 0, 'max': 20, 'default': 2}, # Adjusted max slightly
45
- 'arrival_month': {'min': 1, 'max': 12, 'default': 7}, # Default to July based on EDA for example
46
- }
47
-
48
-
49
- # --- Model Loading (Cached) ---
50
-
51
  @st.cache_resource
52
- def load_cancellation_model():
53
  try:
54
- model = joblib.load(MODEL_FILE)
55
- return model
56
  except Exception as e:
57
- st.error(f"Error loading model: {e}")
58
  return None
59
 
60
- cancellation_predictor = load_cancellation_model()
61
-
62
- # --- Prediction Function (Critical Data Preprocessing) ---
63
-
64
- def run_prediction(
65
- lead_time, market_segment_type, avg_price_per_room, no_of_adults,
66
- no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
67
- arrival_month, required_car_parking_space, model
68
- ):
69
- """Processes inputs and runs prediction, mimicking the training feature engineering."""
70
-
71
- # 1. Manually construct the input row, using the required feature engineering
72
- data_row = {
73
- 'lead_time': lead_time,
74
- 'no_of_special_requests': no_of_special_requests,
75
- 'avg_price_per_room': avg_price_per_room,
76
- 'no_of_adults': no_of_adults,
77
- 'no_of_weekend_nights': no_of_weekend_nights,
78
- 'no_of_week_nights': no_of_week_nights,
79
- 'arrival_month': arrival_month,
80
-
81
- # One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category)
82
- 'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0,
83
-
84
- # Binary Encoding for 'required_car_parking_space'
85
- 'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0,
86
  }
 
 
 
 
 
 
 
 
87
 
88
- # 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order and dtypes
89
- # Explicitly set the order of columns to match EXPECTED_FEATURES
90
- input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES)
91
-
92
- # Ensure 'required_car_parking_space' is float64 as expected by the model
93
- input_df['required_car_parking_space'] = input_df['required_car_parking_space'].astype('float64')
94
-
95
 
96
- # 3. Make Prediction
97
- prediction = model.predict(input_df)[0]
98
- # Probabilities are [Prob_Not_Canceled (0), Prob_Canceled (1)]
99
- probabilities = model.predict_proba(input_df)[0]
100
- prob_cancellation = probabilities[1]
101
- prob_kept = probabilities[0]
102
 
 
 
103
 
104
- return prediction, prob_cancellation, prob_kept
 
105
 
 
106
 
107
- # --- Streamlit UI ---
 
 
 
 
 
 
 
108
 
109
- st.set_page_config(
110
- page_title="Hotel Cancellation Predictor",
111
- layout="centered",
112
- initial_sidebar_state="expanded"
113
- )
114
 
115
- st.title("🛎️ INN Hotels: Booking Cancellation Predictor")
116
- st.markdown("Use the controls below to input booking details and predict the cancellation risk.")
117
 
118
- if cancellation_predictor is None:
119
- st.warning("Application stopped due to critical error in model loading.")
120
- st.stop()
121
-
122
- # --- Input Fields (arranged for better dashboard look) ---
123
-
124
- col1, col2 = st.columns(2)
125
-
126
- with col1:
127
- lead_time = st.slider("1. Lead Time (Days before arrival)",
128
- min_value=FEATURE_RANGES['lead_time']['min'],
129
- max_value=FEATURE_RANGES['lead_time']['max'],
130
- value=FEATURE_RANGES['lead_time']['default'])
131
- no_of_adults = st.number_input("4. Number of Adults",
132
- min_value=FEATURE_RANGES['no_of_adults']['min'],
133
- max_value=FEATURE_RANGES['no_of_adults']['max'],
134
- value=FEATURE_RANGES['no_of_adults']['default'],
135
- step=1)
136
- no_of_week_nights = st.slider("6. Number of Week Nights",
137
- min_value=FEATURE_RANGES['no_of_week_nights']['min'],
138
- max_value=FEATURE_RANGES['no_of_week_nights']['max'],
139
- value=FEATURE_RANGES['no_of_week_nights']['default'])
140
- arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)",
141
- list(range(FEATURE_RANGES['arrival_month']['min'], FEATURE_RANGES['arrival_month']['max'] + 1)),
142
- index=FEATURE_RANGES['arrival_month']['default'] - 1) # Adjust index for 0-based list
143
-
144
-
145
- with col2:
146
- market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0)
147
- avg_price_per_room = st.number_input("3. Average Price per Room ($)",
148
- min_value=FEATURE_RANGES['avg_price_per_room']['min'],
149
- max_value=FEATURE_RANGES['avg_price_per_room']['max'],
150
- value=FEATURE_RANGES['avg_price_per_room']['default'],
151
- format="%.2f")
152
- no_of_weekend_nights = st.slider("5. Number of Weekend Nights",
153
- min_value=FEATURE_RANGES['no_of_weekend_nights']['min'],
154
- max_value=FEATURE_RANGES['no_of_weekend_nights']['max'],
155
- value=FEATURE_RANGES['no_of_weekend_nights']['default'])
156
- no_of_special_requests = st.number_input("7. Number of Special Requests",
157
- min_value=FEATURE_RANGES['no_of_special_requests']['min'],
158
- max_value=FEATURE_RANGES['no_of_special_requests']['max'],
159
- value=FEATURE_RANGES['no_of_special_requests']['default'],
160
- step=1)
161
- required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1)
162
-
163
-
164
- # --- Prediction Button ---
165
-
166
- st.markdown("---") # Add a separator
167
-
168
- if st.button("PREDICT CANCELLATION RISK", type="primary"):
169
-
170
- # Simple progress indicator for UX
171
- with st.spinner('Analyzing booking data...'):
172
- time.sleep(0.5)
173
-
174
- prediction, prob_cancellation, prob_kept = run_prediction(
175
- lead_time, market_segment_type, avg_price_per_room, no_of_adults,
176
- no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
177
- arrival_month, required_car_parking_space, cancellation_predictor
178
- )
179
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
180
 
 
 
181
  st.markdown("---")
182
  st.subheader("Prediction Result")
183
-
184
  if prediction == 1:
185
- st.error(f"**High Risk of Cancellation:** The model predicts the booking will be **CANCELLED**.")
186
  else:
187
- st.success(f"**Low Risk:** The model predicts the booking will be **KEPT**.")
188
-
189
- st.markdown(f"**Likelihood of Cancellation: {prob_cancellation*100:.2f}%**")
190
- st.markdown(f"Likelihood of Keeping Booking: {prob_kept*100:.2f}%")
191
-
192
- if prediction == 1 and prob_cancellation > 0.70:
193
- st.info("💡 **Actionable Insight:** Consider proactively contacting this guest or flagging the room for immediate re-marketing.")
 
2
  import joblib
3
  import pandas as pd
4
  import numpy as np
 
 
 
 
5
 
6
+ # --- 1. CONFIGURATION ---
7
  MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib'
8
 
9
+ # The list of features (columns) the model was trained on, in order.
10
+ FEATURE_NAMES = [
 
11
  'lead_time',
12
  'no_of_special_requests',
13
  'avg_price_per_room',
14
  'no_of_adults',
15
  'no_of_weekend_nights',
16
+ 'required_car_parking_space',
17
+ 'no_of_week_nights',
18
+ 'arrival_month',
19
+ 'market_segment_type_Online'
20
  ]
21
 
22
+ # --- 2. MODEL LOADING ---
23
+ # Use cache to load the model just once
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  @st.cache_resource
25
+ def load_model():
26
  try:
27
+ return joblib.load(MODEL_FILE)
 
28
  except Exception as e:
29
+ st.error(f"Error loading model: {e}. Check if '{MODEL_FILE}' exists.")
30
  return None
31
 
32
+ model = load_model()
33
+
34
+ # --- 3. PREDICTION LOGIC ---
35
+ def predict_cancellation(inputs, loaded_model):
36
+ """Prepares data and gets the model's prediction."""
37
+
38
+ # Map user inputs to the format the model expects
39
+ input_data = {
40
+ 'lead_time': inputs['lead_time'],
41
+ 'no_of_special_requests': inputs['no_of_special_requests'],
42
+ 'avg_price_per_room': inputs['avg_price_per_room'],
43
+ 'no_of_adults': inputs['no_of_adults'],
44
+ 'no_of_weekend_nights': inputs['no_of_weekend_nights'],
45
+ 'no_of_week_nights': inputs['no_of_week_nights'],
46
+ 'arrival_month': inputs['arrival_month'],
47
+
48
+ # Binary encoding for categorical features
49
+ 'market_segment_type_Online': 1.0 if inputs['market_segment_type'] == 'Online' else 0.0,
50
+ 'required_car_parking_space': 1.0 if inputs['required_car_parking_space'] == "Yes" else 0.0,
 
 
 
 
 
 
 
51
  }
52
+
53
+ # Create a DataFrame with the correct column order
54
+ input_df = pd.DataFrame([input_data], columns=FEATURE_NAMES)
55
+
56
+ # Make prediction (0=Not Cancelled, 1=Cancelled)
57
+ prediction = loaded_model.predict(input_df)[0]
58
+
59
+ return prediction
60
 
61
+ # --- 4. STREAMLIT INTERFACE ---
 
 
 
 
 
 
62
 
63
+ st.title("Hotel Booking Cancellation Predictor")
 
 
 
 
 
64
 
65
+ if model is None:
66
+ st.stop()
67
 
68
+ st.markdown("Enter booking details to predict if the reservation will be cancelled.")
69
+ st.markdown("---")
70
 
71
+ # --- Input Fields (Single Column) ---
72
 
73
+ # Simple number inputs for basic data types
74
+ lead_time = st.number_input("1. Lead Time (Days before arrival)", min_value=0, value=82, step=1)
75
+ arrival_month = st.selectbox("2. Arrival Month (1=Jan to 12=Dec)", list(range(1, 13)), index=6) # Default to July (7)
76
+ avg_price_per_room = st.number_input("3. Average Price per Room ($)", min_value=0.0, value=101.0, format="%.2f")
77
+ no_of_adults = st.number_input("4. Number of Adults", min_value=0, value=2, step=1)
78
+ no_of_weekend_nights = st.number_input("5. Number of Weekend Nights", min_value=0, value=1, step=1)
79
+ no_of_week_nights = st.number_input("6. Number of Week Nights", min_value=0, value=2, step=1)
80
+ no_of_special_requests = st.number_input("7. Number of Special Requests", min_value=0, value=0, step=1)
81
 
82
+ # Simple select boxes for categorical data
83
+ market_segment_type = st.selectbox("8. Market Segment Type", ["Online", "Offline"])
84
+ required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"])
 
 
85
 
 
 
86
 
87
+ # --- 5. PREDICTION BUTTON AND OUTPUT ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
89
+ if st.button("Get Prediction Result", type="primary"):
90
+
91
+ # Dictionary to pass inputs easily
92
+ user_inputs = {
93
+ 'lead_time': lead_time,
94
+ 'market_segment_type': market_segment_type,
95
+ 'avg_price_per_room': avg_price_per_room,
96
+ 'no_of_adults': no_of_adults,
97
+ 'no_of_weekend_nights': no_of_weekend_nights,
98
+ 'no_of_week_nights': no_of_week_nights,
99
+ 'no_of_special_requests': no_of_special_requests,
100
+ 'arrival_month': arrival_month,
101
+ 'required_car_parking_space': required_car_parking_space,
102
+ }
103
 
104
+ prediction = predict_cancellation(user_inputs, model)
105
+
106
  st.markdown("---")
107
  st.subheader("Prediction Result")
108
+
109
  if prediction == 1:
110
+ st.error("The model predicts the booking will be **CANCELLED**.")
111
  else:
112
+ st.success("The model predicts the booking will be **Not Cancelled**.")