omm7 commited on
Commit
722f3e2
·
verified ·
1 Parent(s): 1323829

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +28 -0
  2. app.py +133 -0
  3. requirements.txt +5 -0
Dockerfile ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a Python base image with a stable version
2
+ FROM python:3.9
3
+
4
+ # Set the working directory inside the container
5
+ WORKDIR /app
6
+
7
+ # Copy dependency files first for better Docker caching
8
+ COPY requirements.txt .
9
+
10
+ # Install Python dependencies
11
+ RUN pip install --no-cache-dir -r requirements.txt
12
+
13
+ # Copy the application logic and the serialized model file
14
+ COPY app.py .
15
+ # Ensure this file name matches the joblib dump file name
16
+ COPY hotel_cancellation_prediction_model_v1_0.joblib .
17
+
18
+ # Recommended security practice
19
+ RUN useradd -m -u 1000 user
20
+ USER user
21
+ ENV HOME=/home/user \
22
+ PATH=/home/user/.local/bin:$PATH
23
+ WORKDIR $HOME/app
24
+ COPY --chown=user . $HOME/app
25
+
26
+ # Define the command to run the Streamlit app on port "7860"
27
+ # and explicitly disable XSRF protection (critical for Hugging Face Spaces file uploads)
28
+ CMD ["streamlit", "run", "app.py", "--server.port=7860", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import os
6
+ import time
7
+
8
+ # --- Constants and Configuration ---
9
+
10
+ MODEL_FILE = 'hotel_cancellation_prediction_model_v1_0.joblib'
11
+
12
+ # The exact list of features (columns) the model expects in this specific order
13
+ EXPECTED_FEATURES = [
14
+ 'lead_time',
15
+ 'no_of_special_requests',
16
+ 'avg_price_per_room',
17
+ 'no_of_adults',
18
+ 'no_of_weekend_nights',
19
+ 'no_of_week_nights',
20
+ 'arrival_month',
21
+ 'market_segment_type_Online',
22
+ 'required_car_parking_space'
23
+ ]
24
+
25
+ # --- Model Loading (Cached) ---
26
+
27
+ @st.cache_resource
28
+ def load_cancellation_model():
29
+ try:
30
+ model = joblib.load(MODEL_FILE)
31
+ return model
32
+ except Exception as e:
33
+ st.error(f"Error loading model: {e}")
34
+ return None
35
+
36
+ cancellation_predictor = load_cancellation_model()
37
+
38
+ # --- Prediction Function (Critical Data Preprocessing) ---
39
+
40
+ def run_prediction(
41
+ lead_time, market_segment_type, avg_price_per_room, no_of_adults,
42
+ no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
43
+ arrival_month, required_car_parking_space, model
44
+ ):
45
+ """Processes inputs and runs prediction, mimicking the training feature engineering."""
46
+
47
+ # 1. Manually construct the input row, using the required feature engineering
48
+ data_row = {
49
+ 'lead_time': lead_time,
50
+ 'no_of_special_requests': no_of_special_requests,
51
+ 'avg_price_per_room': avg_price_per_room,
52
+ 'no_of_adults': no_of_adults,
53
+ 'no_of_weekend_nights': no_of_weekend_nights,
54
+ 'no_of_week_nights': no_of_week_nights,
55
+ 'arrival_month': arrival_month,
56
+
57
+ # One-Hot Encoding for 'market_segment_type' (assuming 'Offline' is the base category)
58
+ 'market_segment_type_Online': 1.0 if market_segment_type == 'Online' else 0.0,
59
+
60
+ # Binary Encoding for 'required_car_parking_space'
61
+ 'required_car_parking_space': 1.0 if required_car_parking_space == "Yes" else 0.0,
62
+ }
63
+
64
+ # 2. Convert dictionary to DataFrame with the correct EXPECTED_FEATURES order
65
+ input_df = pd.DataFrame([data_row], columns=EXPECTED_FEATURES)
66
+
67
+ # 3. Make Prediction
68
+ prediction = model.predict(input_df)[0]
69
+ # Probabilities are [Prob_Not_Canceled (0), Prob_Canceled (1)]
70
+ probabilities = model.predict_proba(input_df)[0]
71
+ prob_cancellation = probabilities[1]
72
+ prob_kept = probabilities[0]
73
+
74
+ return prediction, prob_cancellation, prob_kept
75
+
76
+
77
+ # --- Streamlit UI ---
78
+
79
+ st.set_page_config(
80
+ page_title="Hotel Cancellation Predictor",
81
+ layout="centered",
82
+ initial_sidebar_state="expanded"
83
+ )
84
+
85
+ st.title("🛎️ INN Hotels: Booking Cancellation Predictor")
86
+ st.markdown("Use the controls below to input booking details and predict the cancellation risk.")
87
+
88
+ if cancellation_predictor is None:
89
+ st.warning("Application stopped due to critical error in model loading.")
90
+ st.stop()
91
+
92
+ # --- Input Fields ---
93
+
94
+ st.sidebar.header("Booking Parameters")
95
+
96
+ with st.sidebar:
97
+ lead_time = st.number_input("1. Lead Time (Days before arrival)", min_value=0, max_value=730, value=50)
98
+ market_segment_type = st.selectbox("2. Market Segment Type", ["Online", "Offline"], index=0)
99
+ avg_price_per_room = st.number_input("3. Average Price per Room ($)", min_value=0.0, value=100.0, format="%.2f")
100
+ no_of_adults = st.number_input("4. Number of Adults", min_value=0, max_value=10, value=2)
101
+ no_of_weekend_nights = st.number_input("5. Number of Weekend Nights", min_value=0, max_value=7, value=1)
102
+ no_of_week_nights = st.number_input("6. Number of Week Nights", min_value=0, max_value=30, value=2)
103
+ no_of_special_requests = st.number_input("7. Number of Special Requests", min_value=0, max_value=5, value=0)
104
+ arrival_month = st.selectbox("8. Arrival Month (1=Jan to 12=Dec)", list(range(1, 13)), index=5)
105
+ required_car_parking_space = st.selectbox("9. Required Car Parking Space", ["Yes", "No"], index=1)
106
+
107
+ # --- Prediction Button ---
108
+
109
+ if st.button("PREDICT CANCELLATION RISK", type="primary"):
110
+
111
+ # Simple progress indicator for UX
112
+ with st.spinner('Analyzing booking data...'):
113
+ time.sleep(0.5)
114
+
115
+ prediction, prob_cancellation, prob_kept = run_prediction(
116
+ lead_time, market_segment_type, avg_price_per_room, no_of_adults,
117
+ no_of_weekend_nights, no_of_week_nights, no_of_special_requests,
118
+ arrival_month, required_car_parking_space, cancellation_predictor
119
+ )
120
+
121
+ st.markdown("---")
122
+ st.subheader("Prediction Result")
123
+
124
+ if prediction == 1:
125
+ st.error(f"**High Risk of Cancellation:** The model predicts the booking will be **CANCELLED**.")
126
+ else:
127
+ st.success(f"**Low Risk:** The model predicts the booking will be **KEPT**.")
128
+
129
+ st.markdown(f"**Likelihood of Cancellation: {prob_cancellation*100:.2f}%**")
130
+ st.markdown(f"Likelihood of Keeping Booking: {prob_kept*100:.2f}%")
131
+
132
+ if prediction == 1 and prob_cancellation > 0.70:
133
+ st.info("💡 **Actionable Insight:** Consider proactively contacting this guest or flagging the room for immediate re-marketing.")
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit==1.49.1
2
+ pandas==2.2.2
3
+ numpy==1.26.4
4
+ scikit-learn==1.6.1
5
+ joblib==1.5.2