Spaces:

omm7
/

hbcp

Sleeping

App Files Files Community

omm7 commited on Oct 10, 2025

Commit

5197fc9

verified ·

1 Parent(s): 09fcf90

Upload folder using huggingface_hub

Browse files

Files changed (2) hide show

app.py +88 -32
requirements.txt +1 -1

app.py CHANGED Viewed

@@ -11,17 +11,39 @@ MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib'
 # The exact list of features (columns) the model expects in this specific order
 EXPECTED_FEATURES = [
-    'lead_time',
-    'no_of_special_requests',
-    'avg_price_per_room',
-    'no_of_adults',
-    'no_of_weekend_nights',
-    'no_of_week_nights',
-    'arrival_month',
     'market_segment_type_Online',
     'required_car_parking_space'
 ]
 # --- Model Loading (Cached) ---
 @st.cache_resource
@@ -38,12 +60,12 @@ cancellation_predictor = load_cancellation_model()
 # --- Prediction Function (Critical Data Preprocessing) ---
 def run_prediction(
-    lead_time, market_segment_type, avg_price_per_room, no_of_adults,
-    no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
     arrival_month, required_car_parking_space, model
 ):
     """Processes inputs and runs prediction, mimicking the training feature engineering."""
     # 1. Manually construct the input row, using the required feature engineering
     data_row = {
         'lead_time': lead_time,
@@ -53,16 +75,19 @@ def run_prediction(
         'no_of_weekend_nights': no_of_weekend_nights,
         'no_of_week_nights': no_of_week_nights,
         'arrival_month': arrival_month,
         # One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category)
         'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0,
         # Binary Encoding for 'required_car_parking_space'
         'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0,
     }
-    # 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order
     input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES)
     # 3. Make Prediction
     prediction = model.predict(input_df)[0]
@@ -71,6 +96,7 @@ def run_prediction(
     prob_cancellation = probabilities[1]
     prob_kept = probabilities[0]
     return prediction, prob_cancellation, prob_kept
@@ -89,38 +115,68 @@ if cancellation_predictor is None:
     st.warning("Application stopped due to critical error in model loading.")
     st.stop()
-# --- Input Fields ---
-st.sidebar.header("Booking Parameters")
-with st.sidebar:
-    lead_time = st.number_input("1. Lead Time (Days before arrival)", min_value=0, max_value=730, value=50)
     market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0)
-    avg_price_per_room = st.number_input("3. Average Price per Room ($)", min_value=0.0, value=100.0, format="%.2f")
-    no_of_adults = st.number_input("4. Number of Adults", min_value=0, max_value=10, value=2)
-    no_of_weekend_nights = st.number_input("5. Number of Weekend Nights", min_value=0, max_value=7, value=1)
-    no_of_week_nights = st.number_input("6. Number of Week Nights", min_value=0, max_value=30, value=2)
-    no_of_special_requests = st.number_input("7. Number of Special Requests", min_value=0, max_value=5, value=0)
-    arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)", list(range(1, 13)), index=5)
     required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1)
 # --- Prediction Button ---
 if st.button("PREDICT CANCELLATION RISK", type="primary"):
     # Simple progress indicator for UX
     with st.spinner('Analyzing booking data...'):
-        time.sleep(0.5)
         prediction, prob_cancellation, prob_kept = run_prediction(
-            lead_time, market_segment_type, avg_price_per_room, no_of_adults,
-            no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
             arrival_month, required_car_parking_space, cancellation_predictor
         )
     st.markdown("---")
     st.subheader("Prediction Result")
     if prediction == 1:
         st.error(f"**High Risk of Cancellation:** The model predicts the booking will be **CANCELLED**.")
     else:

 # The exact list of features (columns) the model expects in this specific order
 EXPECTED_FEATURES = [
+    'lead_time',
+    'no_of_special_requests',
+    'avg_price_per_room',
+    'no_of_adults',
+    'no_of_weekend_nights',
+    'no_of_week_nights',
+    'arrival_month',
     'market_segment_type_Online',
     'required_car_parking_space'
 ]
+# Define typical ranges based on EDA (adjust as needed)
+# From data.describe().T:
+# lead_time: min=0, max=443, mean=81.61, 75%=118
+# no_of_special_requests: min=0, max=5, mean=0.56, 75%=1
+# avg_price_per_room: min=0, max=540, mean=101.03, 75%=119
+# no_of_adults: min=0, max=4, mean=1.83, 75%=2
+# no_of_weekend_nights: min=0, max=6, mean=0.78, 75%=1
+# no_of_week_nights: min=0, max=17, mean=2.17, 75%=3
+# required_car_parking_space: min=0, max=1
+# arrival_month: min=1, max=12 (based on code)
+FEATURE_RANGES = {
+    'lead_time': {'min': 0, 'max': 450, 'default': 82},
+    'no_of_special_requests': {'min': 0, 'max': 5, 'default': 1},
+    'avg_price_per_room': {'min': 0.0, 'max': 600.0, 'default': 101.0},
+    'no_of_adults': {'min': 0, 'max': 4, 'default': 2},
+    'no_of_weekend_nights': {'min': 0, 'max': 7, 'default': 1}, # Adjusted max slightly
+    'no_of_week_nights': {'min': 0, 'max': 20, 'default': 2}, # Adjusted max slightly
+    'arrival_month': {'min': 1, 'max': 12, 'default': 7}, # Default to July based on EDA for example
+}
 # --- Model Loading (Cached) ---
 @st.cache_resource
 # --- Prediction Function (Critical Data Preprocessing) ---
 def run_prediction(
+    lead_time, market_segment_type, avg_price_per_room, no_of_adults,
+    no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
     arrival_month, required_car_parking_space, model
 ):
     """Processes inputs and runs prediction, mimicking the training feature engineering."""
     # 1. Manually construct the input row, using the required feature engineering
     data_row = {
         'lead_time': lead_time,
         'no_of_weekend_nights': no_of_weekend_nights,
         'no_of_week_nights': no_of_week_nights,
         'arrival_month': arrival_month,
         # One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category)
         'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0,
         # Binary Encoding for 'required_car_parking_space'
         'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0,
     }
+    # 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order and dtypes
     input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES)
+    # Ensure 'required_car_parking_space' is float64 as expected by the model
+    input_df['required_car_parking_space'] = input_df['required_car_parking_space'].astype('float64')
     # 3. Make Prediction
     prediction = model.predict(input_df)[0]
     prob_cancellation = probabilities[1]
     prob_kept = probabilities[0]
     return prediction, prob_cancellation, prob_kept
     st.warning("Application stopped due to critical error in model loading.")
     st.stop()
+# --- Input Fields (arranged for better dashboard look) ---
+col1, col2 = st.columns(2)
+with col1:
+    lead_time = st.slider("1. Lead Time (Days before arrival)",
+                          min_value=FEATURE_RANGES['lead_time']['min'],
+                          max_value=FEATURE_RANGES['lead_time']['max'],
+                          value=FEATURE_RANGES['lead_time']['default'])
+    no_of_adults = st.number_input("4. Number of Adults",
+                                   min_value=FEATURE_RANGES['no_of_adults']['min'],
+                                   max_value=FEATURE_RANGES['no_of_adults']['max'],
+                                   value=FEATURE_RANGES['no_of_adults']['default'],
+                                   step=1)
+    no_of_week_nights = st.slider("6. Number of Week Nights",
+                                  min_value=FEATURE_RANGES['no_of_week_nights']['min'],
+                                  max_value=FEATURE_RANGES['no_of_week_nights']['max'],
+                                  value=FEATURE_RANGES['no_of_week_nights']['default'])
+    arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)",
+                                 list(range(FEATURE_RANGES['arrival_month']['min'], FEATURE_RANGES['arrival_month']['max'] + 1)),
+                                 index=FEATURE_RANGES['arrival_month']['default'] - 1) # Adjust index for 0-based list
+with col2:
     market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0)
+    avg_price_per_room = st.number_input("3. Average Price per Room ($)",
+                                         min_value=FEATURE_RANGES['avg_price_per_room']['min'],
+                                         max_value=FEATURE_RANGES['avg_price_per_room']['max'],
+                                         value=FEATURE_RANGES['avg_price_per_room']['default'],
+                                         format="%.2f")
+    no_of_weekend_nights = st.slider("5. Number of Weekend Nights",
+                                     min_value=FEATURE_RANGES['no_of_weekend_nights']['min'],
+                                     max_value=FEATURE_RANGES['no_of_weekend_nights']['max'],
+                                     value=FEATURE_RANGES['no_of_weekend_nights']['default'])
+    no_of_special_requests = st.number_input("7. Number of Special Requests",
+                                             min_value=FEATURE_RANGES['no_of_special_requests']['min'],
+                                             max_value=FEATURE_RANGES['no_of_special_requests']['max'],
+                                             value=FEATURE_RANGES['no_of_special_requests']['default'],
+                                             step=1)
     required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1)
 # --- Prediction Button ---
+st.markdown("---") # Add a separator
 if st.button("PREDICT CANCELLATION RISK", type="primary"):
     # Simple progress indicator for UX
     with st.spinner('Analyzing booking data...'):
+        time.sleep(0.5)
         prediction, prob_cancellation, prob_kept = run_prediction(
+            lead_time, market_segment_type, avg_price_per_room, no_of_adults, # Corrected variable name here
+            no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
             arrival_month, required_car_parking_space, cancellation_predictor
         )
     st.markdown("---")
     st.subheader("Prediction Result")
     if prediction == 1:
         st.error(f"**High Risk of Cancellation:** The model predicts the booking will be **CANCELLED**.")
     else:

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-streamlit==1.49.1
 pandas==2.2.2
 numpy==1.26.4
 scikit-learn==1.6.1

+streamlit
 pandas==2.2.2
 numpy==1.26.4
 scikit-learn==1.6.1