omm7 commited on
Commit
5197fc9
·
verified ·
1 Parent(s): 09fcf90

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. app.py +88 -32
  2. requirements.txt +1 -1
app.py CHANGED
@@ -11,17 +11,39 @@ MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib'
11
 
12
  # The exact list of features (columns) the model expects in this specific order
13
  EXPECTED_FEATURES = [
14
- 'lead_time',
15
- 'no_of_special_requests',
16
- 'avg_price_per_room',
17
- 'no_of_adults',
18
- 'no_of_weekend_nights',
19
- 'no_of_week_nights',
20
- 'arrival_month',
21
  'market_segment_type_Online',
22
  'required_car_parking_space'
23
  ]
24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  # --- Model Loading (Cached) ---
26
 
27
  @st.cache_resource
@@ -38,12 +60,12 @@ cancellation_predictor = load_cancellation_model()
38
  # --- Prediction Function (Critical Data Preprocessing) ---
39
 
40
  def run_prediction(
41
- lead_time, market_segment_type, avg_price_per_room, no_of_adults,
42
- no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
43
  arrival_month, required_car_parking_space, model
44
  ):
45
  """Processes inputs and runs prediction, mimicking the training feature engineering."""
46
-
47
  # 1. Manually construct the input row, using the required feature engineering
48
  data_row = {
49
  'lead_time': lead_time,
@@ -53,16 +75,19 @@ def run_prediction(
53
  'no_of_weekend_nights': no_of_weekend_nights,
54
  'no_of_week_nights': no_of_week_nights,
55
  'arrival_month': arrival_month,
56
-
57
  # One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category)
58
  'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0,
59
-
60
  # Binary Encoding for 'required_car_parking_space'
61
  'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0,
62
  }
63
 
64
- # 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order
65
  input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES)
 
 
 
66
 
67
  # 3. Make Prediction
68
  prediction = model.predict(input_df)[0]
@@ -71,6 +96,7 @@ def run_prediction(
71
  prob_cancellation = probabilities[1]
72
  prob_kept = probabilities[0]
73
 
 
74
  return prediction, prob_cancellation, prob_kept
75
 
76
 
@@ -89,38 +115,68 @@ if cancellation_predictor is None:
89
  st.warning("Application stopped due to critical error in model loading.")
90
  st.stop()
91
 
92
- # --- Input Fields ---
93
-
94
- st.sidebar.header("Booking Parameters")
95
-
96
- with st.sidebar:
97
- lead_time = st.number_input("1. Lead Time (Days before arrival)", min_value=0, max_value=730, value=50)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
98
  market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0)
99
- avg_price_per_room = st.number_input("3. Average Price per Room ($)", min_value=0.0, value=100.0, format="%.2f")
100
- no_of_adults = st.number_input("4. Number of Adults", min_value=0, max_value=10, value=2)
101
- no_of_weekend_nights = st.number_input("5. Number of Weekend Nights", min_value=0, max_value=7, value=1)
102
- no_of_week_nights = st.number_input("6. Number of Week Nights", min_value=0, max_value=30, value=2)
103
- no_of_special_requests = st.number_input("7. Number of Special Requests", min_value=0, max_value=5, value=0)
104
- arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)", list(range(1, 13)), index=5)
 
 
 
 
 
 
 
 
105
  required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1)
106
 
 
107
  # --- Prediction Button ---
108
 
 
 
109
  if st.button("PREDICT CANCELLATION RISK", type="primary"):
110
-
111
  # Simple progress indicator for UX
112
  with st.spinner('Analyzing booking data...'):
113
- time.sleep(0.5)
114
-
115
  prediction, prob_cancellation, prob_kept = run_prediction(
116
- lead_time, market_segment_type, avg_price_per_room, no_of_adults,
117
- no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
118
  arrival_month, required_car_parking_space, cancellation_predictor
119
  )
120
-
 
121
  st.markdown("---")
122
  st.subheader("Prediction Result")
123
-
124
  if prediction == 1:
125
  st.error(f"**High Risk of Cancellation:** The model predicts the booking will be **CANCELLED**.")
126
  else:
 
11
 
12
  # The exact list of features (columns) the model expects in this specific order
13
  EXPECTED_FEATURES = [
14
+ 'lead_time',
15
+ 'no_of_special_requests',
16
+ 'avg_price_per_room',
17
+ 'no_of_adults',
18
+ 'no_of_weekend_nights',
19
+ 'no_of_week_nights',
20
+ 'arrival_month',
21
  'market_segment_type_Online',
22
  'required_car_parking_space'
23
  ]
24
 
25
+ # Define typical ranges based on EDA (adjust as needed)
26
+ # From data.describe().T:
27
+ # lead_time: min=0, max=443, mean=81.61, 75%=118
28
+ # no_of_special_requests: min=0, max=5, mean=0.56, 75%=1
29
+ # avg_price_per_room: min=0, max=540, mean=101.03, 75%=119
30
+ # no_of_adults: min=0, max=4, mean=1.83, 75%=2
31
+ # no_of_weekend_nights: min=0, max=6, mean=0.78, 75%=1
32
+ # no_of_week_nights: min=0, max=17, mean=2.17, 75%=3
33
+ # required_car_parking_space: min=0, max=1
34
+ # arrival_month: min=1, max=12 (based on code)
35
+
36
+ FEATURE_RANGES = {
37
+ 'lead_time': {'min': 0, 'max': 450, 'default': 82},
38
+ 'no_of_special_requests': {'min': 0, 'max': 5, 'default': 1},
39
+ 'avg_price_per_room': {'min': 0.0, 'max': 600.0, 'default': 101.0},
40
+ 'no_of_adults': {'min': 0, 'max': 4, 'default': 2},
41
+ 'no_of_weekend_nights': {'min': 0, 'max': 7, 'default': 1}, # Adjusted max slightly
42
+ 'no_of_week_nights': {'min': 0, 'max': 20, 'default': 2}, # Adjusted max slightly
43
+ 'arrival_month': {'min': 1, 'max': 12, 'default': 7}, # Default to July based on EDA for example
44
+ }
45
+
46
+
47
  # --- Model Loading (Cached) ---
48
 
49
  @st.cache_resource
 
60
  # --- Prediction Function (Critical Data Preprocessing) ---
61
 
62
  def run_prediction(
63
+ lead_time, market_segment_type, avg_price_per_room, no_of_adults,
64
+ no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
65
  arrival_month, required_car_parking_space, model
66
  ):
67
  """Processes inputs and runs prediction, mimicking the training feature engineering."""
68
+
69
  # 1. Manually construct the input row, using the required feature engineering
70
  data_row = {
71
  'lead_time': lead_time,
 
75
  'no_of_weekend_nights': no_of_weekend_nights,
76
  'no_of_week_nights': no_of_week_nights,
77
  'arrival_month': arrival_month,
78
+
79
  # One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category)
80
  'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0,
81
+
82
  # Binary Encoding for 'required_car_parking_space'
83
  'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0,
84
  }
85
 
86
+ # 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order and dtypes
87
  input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES)
88
+ # Ensure 'required_car_parking_space' is float64 as expected by the model
89
+ input_df['required_car_parking_space'] = input_df['required_car_parking_space'].astype('float64')
90
+
91
 
92
  # 3. Make Prediction
93
  prediction = model.predict(input_df)[0]
 
96
  prob_cancellation = probabilities[1]
97
  prob_kept = probabilities[0]
98
 
99
+
100
  return prediction, prob_cancellation, prob_kept
101
 
102
 
 
115
  st.warning("Application stopped due to critical error in model loading.")
116
  st.stop()
117
 
118
+ # --- Input Fields (arranged for better dashboard look) ---
119
+
120
+ col1, col2 = st.columns(2)
121
+
122
+ with col1:
123
+ lead_time = st.slider("1. Lead Time (Days before arrival)",
124
+ min_value=FEATURE_RANGES['lead_time']['min'],
125
+ max_value=FEATURE_RANGES['lead_time']['max'],
126
+ value=FEATURE_RANGES['lead_time']['default'])
127
+ no_of_adults = st.number_input("4. Number of Adults",
128
+ min_value=FEATURE_RANGES['no_of_adults']['min'],
129
+ max_value=FEATURE_RANGES['no_of_adults']['max'],
130
+ value=FEATURE_RANGES['no_of_adults']['default'],
131
+ step=1)
132
+ no_of_week_nights = st.slider("6. Number of Week Nights",
133
+ min_value=FEATURE_RANGES['no_of_week_nights']['min'],
134
+ max_value=FEATURE_RANGES['no_of_week_nights']['max'],
135
+ value=FEATURE_RANGES['no_of_week_nights']['default'])
136
+ arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)",
137
+ list(range(FEATURE_RANGES['arrival_month']['min'], FEATURE_RANGES['arrival_month']['max'] + 1)),
138
+ index=FEATURE_RANGES['arrival_month']['default'] - 1) # Adjust index for 0-based list
139
+
140
+
141
+ with col2:
142
  market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0)
143
+ avg_price_per_room = st.number_input("3. Average Price per Room ($)",
144
+ min_value=FEATURE_RANGES['avg_price_per_room']['min'],
145
+ max_value=FEATURE_RANGES['avg_price_per_room']['max'],
146
+ value=FEATURE_RANGES['avg_price_per_room']['default'],
147
+ format="%.2f")
148
+ no_of_weekend_nights = st.slider("5. Number of Weekend Nights",
149
+ min_value=FEATURE_RANGES['no_of_weekend_nights']['min'],
150
+ max_value=FEATURE_RANGES['no_of_weekend_nights']['max'],
151
+ value=FEATURE_RANGES['no_of_weekend_nights']['default'])
152
+ no_of_special_requests = st.number_input("7. Number of Special Requests",
153
+ min_value=FEATURE_RANGES['no_of_special_requests']['min'],
154
+ max_value=FEATURE_RANGES['no_of_special_requests']['max'],
155
+ value=FEATURE_RANGES['no_of_special_requests']['default'],
156
+ step=1)
157
  required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1)
158
 
159
+
160
  # --- Prediction Button ---
161
 
162
+ st.markdown("---") # Add a separator
163
+
164
  if st.button("PREDICT CANCELLATION RISK", type="primary"):
165
+
166
  # Simple progress indicator for UX
167
  with st.spinner('Analyzing booking data...'):
168
+ time.sleep(0.5)
169
+
170
  prediction, prob_cancellation, prob_kept = run_prediction(
171
+ lead_time, market_segment_type, avg_price_per_room, no_of_adults, # Corrected variable name here
172
+ no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
173
  arrival_month, required_car_parking_space, cancellation_predictor
174
  )
175
+
176
+
177
  st.markdown("---")
178
  st.subheader("Prediction Result")
179
+
180
  if prediction == 1:
181
  st.error(f"**High Risk of Cancellation:** The model predicts the booking will be **CANCELLED**.")
182
  else:
requirements.txt CHANGED
@@ -1,4 +1,4 @@
1
- streamlit==1.49.1
2
  pandas==2.2.2
3
  numpy==1.26.4
4
  scikit-learn==1.6.1
 
1
+ streamlit
2
  pandas==2.2.2
3
  numpy==1.26.4
4
  scikit-learn==1.6.1