simnid commited on
Commit
c19138e
·
verified ·
1 Parent(s): becd748

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +20 -12
  2. app.py +221 -0
  3. requirements.txt +8 -3
Dockerfile CHANGED
@@ -1,20 +1,28 @@
1
- FROM python:3.13.5-slim
 
2
 
 
3
  WORKDIR /app
4
 
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
 
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
 
14
- RUN pip3 install -r requirements.txt
 
 
 
 
15
 
16
- EXPOSE 8501
17
 
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
 
19
 
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.9-slim
3
 
4
+ # Set the working directory inside the container to /app
5
  WORKDIR /app
6
 
7
+ # Copy all files from the project directory to /app
8
+ COPY . .
 
 
 
9
 
10
+ # Install Python dependencies
11
+ RUN pip3 install -r tourism_project/requirements.txt
12
 
13
+ # Create a non-root user for security
14
+ RUN useradd -m -u 1000 user
15
+ USER user
16
+ ENV HOME=/home/user \
17
+ PATH=/home/user/.local/bin:$PATH
18
 
19
+ WORKDIR $HOME/app
20
 
21
+ # Copy app files with proper ownership
22
+ COPY --chown=user . $HOME/app
23
 
24
+ # Define the command to run the Streamlit app
25
+ CMD ["streamlit", "run", "tourism_project/deployment/app.py", \
26
+ "--server.port=8501", \
27
+ "--server.address=0.0.0.0", \
28
+ "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,221 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from huggingface_hub import hf_hub_download
5
+ import joblib
6
+
7
+ # App title and description
8
+ st.set_page_config(
9
+ page_title="Wellness Tourism Prediction",
10
+ page_icon="🏖️",
11
+ layout="wide"
12
+ )
13
+
14
+ st.title("🏖️ Wellness Tourism Prediction App")
15
+ st.markdown("""
16
+ This application predicts whether a customer is likely to purchase a wellness tourism package
17
+ based on their demographic, behavioral, and engagement data.
18
+ """)
19
+
20
+ # Sidebar for information
21
+ with st.sidebar:
22
+ st.header("About This Model")
23
+ st.markdown("""
24
+ **Model Details:**
25
+ - Algorithm: XGBoost Classifier
26
+ - Trained on: Wellness Tourism Dataset
27
+ - Target: Product Taken (1 = Purchased, 0 = Not Purchased)
28
+
29
+ **Key Features:**
30
+ - Handles class imbalance with scale_pos_weight
31
+ - Uses preprocessing pipeline (scaling + encoding)
32
+ - Optimized for ROC-AUC score
33
+ """)
34
+
35
+ # Display model metrics from your training
36
+ st.subheader("Model Performance")
37
+ st.metric("ROC AUC", "0.9414")
38
+ st.metric("Precision (Class 1)", "0.69")
39
+ st.metric("Recall (Class 1)", "0.79")
40
+
41
+ # Function to download and load model
42
+ @st.cache_resource
43
+ def load_model():
44
+ """Load the trained model from Hugging Face Hub"""
45
+ try:
46
+ model_path = hf_hub_download(
47
+ repo_id="simnid/wellness-tourism-model",
48
+ filename="best_wellness_tourism_model.joblib"
49
+ )
50
+ model = joblib.load(model_path)
51
+ return model
52
+ except Exception as e:
53
+ st.error(f"Error loading model: {e}")
54
+ return None
55
+
56
+ # Load the model
57
+ model = load_model()
58
+
59
+ if model is None:
60
+ st.warning("Model could not be loaded. Please check your connection.")
61
+ st.stop()
62
+
63
+ # User input section
64
+ st.header("📋 Customer Information")
65
+
66
+ # Create columns for better layout
67
+ col1, col2, col3 = st.columns(3)
68
+
69
+ with col1:
70
+ st.subheader("Demographic Information")
71
+ Age = st.number_input("Age", min_value=18, max_value=80, value=35, step=1)
72
+ Gender = st.selectbox("Gender", ["Male", "Female"])
73
+ MaritalStatus = st.selectbox("Marital Status", ["Single", "Married", "Divorced", "Unmarried"])
74
+ NumberOfChildrenVisiting = st.number_input("Number of Children Visiting", min_value=0, max_value=5, value=0, step=1)
75
+ Designation = st.selectbox("Designation", ["Executive", "Manager", "Senior Manager", "AVP", "VP"])
76
+
77
+ with col2:
78
+ st.subheader("Travel Preferences")
79
+ CityTier = st.selectbox("City Tier", [1, 2, 3])
80
+ PreferredPropertyStar = st.selectbox("Preferred Property Star Rating", [3, 4, 5])
81
+ Passport = st.selectbox("Has Passport", [0, 1], format_func=lambda x: "No" if x == 0 else "Yes")
82
+ OwnCar = st.selectbox("Owns Car", [0, 1], format_func=lambda x: "No" if x == 0 else "Yes")
83
+ NumberOfTrips = st.number_input("Number of Previous Trips", min_value=0, max_value=20, value=2, step=1)
84
+
85
+ with col3:
86
+ st.subheader("Engagement Details")
87
+ TypeofContact = st.selectbox("Type of Contact", ["Self Enquiry", "Company Invited"])
88
+ DurationOfPitch = st.number_input("Duration of Pitch (minutes)", min_value=0.0, max_value=60.0, value=15.0, step=0.5)
89
+ NumberOfPersonVisiting = st.number_input("Number of People Visiting", min_value=1, max_value=10, value=2, step=1)
90
+ NumberOfFollowups = st.number_input("Number of Follow-ups", min_value=0, max_value=10, value=3, step=1)
91
+ ProductPitched = st.selectbox("Product Pitched", ["Basic", "Deluxe", "Standard", "Super Deluxe", "King"])
92
+ PitchSatisfactionScore = st.slider("Pitch Satisfaction Score", 1, 5, 3)
93
+
94
+ # Additional inputs
95
+ st.subheader("Financial Information")
96
+ col4, col5 = st.columns(2)
97
+
98
+ with col4:
99
+ Occupation = st.selectbox("Occupation", ["Salaried", "Small Business", "Large Business", "Free Lancer"])
100
+ MonthlyIncome = st.number_input("Monthly Income ($)", min_value=1000, max_value=50000, value=15000, step=500)
101
+
102
+ with col5:
103
+ # Calculate Pitch Efficiency (feature from your preprocessing)
104
+ PitchEfficiency = DurationOfPitch * PitchSatisfactionScore
105
+ st.metric("Calculated Pitch Efficiency", f"{PitchEfficiency:.2f}")
106
+
107
+ # Assemble input into DataFrame
108
+ input_data = pd.DataFrame([{
109
+ 'Age': Age,
110
+ 'TypeofContact': TypeofContact,
111
+ 'CityTier': CityTier,
112
+ 'DurationOfPitch': DurationOfPitch,
113
+ 'Occupation': Occupation,
114
+ 'Gender': Gender,
115
+ 'NumberOfPersonVisiting': NumberOfPersonVisiting,
116
+ 'NumberOfFollowups': NumberOfFollowups,
117
+ 'ProductPitched': ProductPitched,
118
+ 'PreferredPropertyStar': PreferredPropertyStar,
119
+ 'MaritalStatus': MaritalStatus,
120
+ 'NumberOfTrips': NumberOfTrips,
121
+ 'Passport': Passport,
122
+ 'PitchSatisfactionScore': PitchSatisfactionScore,
123
+ 'OwnCar': OwnCar,
124
+ 'NumberOfChildrenVisiting': NumberOfChildrenVisiting,
125
+ 'Designation': Designation,
126
+ 'MonthlyIncome': MonthlyIncome,
127
+ 'PitchEfficiency': PitchEfficiency
128
+ }])
129
+
130
+ # Display the input data
131
+ with st.expander("View Input Data"):
132
+ st.dataframe(input_data)
133
+
134
+ # Prediction section
135
+ st.header("���� Prediction")
136
+
137
+ if st.button("Predict Purchase Probability", type="primary", use_container_width=True):
138
+ with st.spinner("Making prediction..."):
139
+ try:
140
+ # Make prediction
141
+ prediction_proba = model.predict_proba(input_data)[0]
142
+ prediction_class = model.predict(input_data)[0]
143
+
144
+ # Display results
145
+ col_result1, col_result2 = st.columns(2)
146
+
147
+ with col_result1:
148
+ st.subheader("Prediction Result")
149
+ if prediction_class == 1:
150
+ st.success("✅ **Customer is LIKELY to purchase**")
151
+ st.balloons()
152
+ else:
153
+ st.info("❌ **Customer is UNLIKELY to purchase**")
154
+
155
+ with col_result2:
156
+ st.subheader("Probability Scores")
157
+ # Create gauge-like visualization
158
+ prob_purchase = prediction_proba[1] * 100
159
+ prob_no_purchase = prediction_proba[0] * 100
160
+
161
+ st.metric("Probability of Purchase", f"{prob_purchase:.1f}%")
162
+ st.metric("Probability of No Purchase", f"{prob_no_purchase:.1f}%")
163
+
164
+ # Visual progress bar
165
+ st.progress(int(prob_purchase))
166
+ st.caption(f"Confidence: {prob_purchase:.1f}%")
167
+
168
+ # Business insights
169
+ st.subheader("📊 Business Insights")
170
+
171
+ if prediction_class == 1:
172
+ if prob_purchase > 80:
173
+ st.success("**High Confidence Lead** - Consider offering premium packages")
174
+ elif prob_purchase > 60:
175
+ st.warning("**Medium Confidence Lead** - Standard follow-up recommended")
176
+ else:
177
+ st.info("**Low Confidence Lead** - May require additional engagement")
178
+
179
+ st.markdown("""
180
+ **Recommended Actions:**
181
+ - Schedule follow-up call within 48 hours
182
+ - Offer personalized package options
183
+ - Highlight wellness benefits specific to customer profile
184
+ """)
185
+ else:
186
+ st.markdown("""
187
+ **Recommended Actions:**
188
+ - Consider re-engagement in 3-6 months
189
+ - Collect feedback on pitch satisfaction
190
+ - Update marketing materials for similar profiles
191
+ """)
192
+
193
+ except Exception as e:
194
+ st.error(f"Error making prediction: {e}")
195
+
196
+ # Model information
197
+ with st.expander("ℹ️ Model Information"):
198
+ st.markdown("""
199
+ **Model Architecture:**
200
+ - Preprocessing: StandardScaler for numeric features + OneHotEncoder for categorical features
201
+ - Algorithm: XGBoost Classifier
202
+ - Hyperparameters from grid search:
203
+ - n_estimators: 200
204
+ - max_depth: 7
205
+ - learning_rate: 0.1
206
+ - colsample_bytree: 0.6
207
+ - reg_lambda: 0.5
208
+
209
+ **Training Performance:**
210
+ - ROC AUC: 0.9414
211
+ - PR AUC: 0.8344
212
+ - Test Accuracy: 0.8898
213
+ - Precision (Class 1): 0.69
214
+ - Recall (Class 1): 0.79
215
+
216
+ **Note:** Class 1 represents customers who purchased the wellness tourism package.
217
+ """)
218
+
219
+ # Footer
220
+ st.markdown("---")
221
+ st.caption("Wellness Tourism Prediction Model | Built with XGBoost & Streamlit")
requirements.txt CHANGED
@@ -1,3 +1,8 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
 
1
+ pandas==2.2.2
2
+ huggingface_hub==0.32.6
3
+ streamlit==1.43.2
4
+ joblib==1.5.1
5
+ scikit-learn==1.6.0
6
+ xgboost==2.1.4
7
+ mlflow==3.0.1
8
+ numpy==1.26.0