TheHumanAgent commited on
Commit
564fa86
·
verified ·
1 Parent(s): 19e46f5

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. Dockerfile +23 -0
  2. app.py +609 -0
  3. requirements.txt +22 -0
Dockerfile ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use a minimal base image with Python 3.9 installed
2
+ FROM python:3.9
3
+
4
+ # Set the working directory inside the container to /app
5
+ WORKDIR /app
6
+
7
+ # Copy all files from the current directory on the host to the container's /app directory
8
+ COPY . .
9
+
10
+ # Install Python dependencies listed in requirements.txt
11
+ RUN pip3 install -r requirements.txt
12
+
13
+ RUN useradd -m -u 1000 user
14
+ USER user
15
+ ENV HOME=/home/user \
16
+ PATH=/home/user/.local/bin:$PATH
17
+
18
+ WORKDIR $HOME/app
19
+
20
+ COPY --chown=user . $HOME/app
21
+
22
+ # Define the command to run the Streamlit app on port "8501" and make it accessible externally
23
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0", "--server.enableXsrfProtection=false"]
app.py ADDED
@@ -0,0 +1,609 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Wellness Tourism Package Prediction App
3
+ Production-grade Streamlit application for predicting customer purchase likelihood
4
+ """
5
+
6
+ import streamlit as st
7
+ import pandas as pd
8
+ import numpy as np
9
+ import joblib
10
+ from huggingface_hub import hf_hub_download
11
+ import plotly.graph_objects as go
12
+ import plotly.express as px
13
+ from datetime import datetime
14
+ import os
15
+
16
+ # Page configuration
17
+ st.set_page_config(
18
+ page_title="Wellness Tourism Predictor",
19
+ page_icon="✈️",
20
+ layout="wide",
21
+ initial_sidebar_state="expanded"
22
+ )
23
+
24
+ # Custom CSS for better UI
25
+ st.markdown("""
26
+ <style>
27
+ .main-header {
28
+ font-size: 2.5rem;
29
+ color: #1f77b4;
30
+ text-align: center;
31
+ margin-bottom: 2rem;
32
+ }
33
+ .sub-header {
34
+ font-size: 1.2rem;
35
+ color: #555;
36
+ text-align: center;
37
+ margin-bottom: 2rem;
38
+ }
39
+ .prediction-box {
40
+ padding: 2rem;
41
+ border-radius: 10px;
42
+ text-align: center;
43
+ font-size: 1.5rem;
44
+ font-weight: bold;
45
+ margin: 2rem 0;
46
+ }
47
+ .prediction-positive {
48
+ background-color: #d4edda;
49
+ color: #155724;
50
+ border: 2px solid #c3e6cb;
51
+ }
52
+ .prediction-negative {
53
+ background-color: #f8d7da;
54
+ color: #721c24;
55
+ border: 2px solid #f5c6cb;
56
+ }
57
+ .metric-card {
58
+ background-color: #f0f2f6;
59
+ padding: 1rem;
60
+ border-radius: 5px;
61
+ margin: 0.5rem 0;
62
+ }
63
+ .stDownloadButton button {
64
+ width: 100%;
65
+ }
66
+ </style>
67
+ """, unsafe_allow_html=True)
68
+
69
+ @st.cache_resource
70
+ def load_model():
71
+ """
72
+ Load the trained model from Hugging Face Hub
73
+ Uses caching to avoid reloading on every interaction
74
+ """
75
+ try:
76
+ model_path = hf_hub_download(
77
+ repo_id="TheHumanAgent/tour_pkg_pred_model",
78
+ filename="final_tour_pkg_pred_model_v1.joblib",
79
+ repo_type="model"
80
+ )
81
+ model = joblib.load(model_path)
82
+ return model
83
+ except Exception as e:
84
+ st.error(f"Error loading model: {str(e)}")
85
+ st.error("Please ensure the model is uploaded to Hugging Face Hub")
86
+ st.stop()
87
+
88
+ def create_input_features():
89
+ """
90
+ Create input form for all features required by the model
91
+ Returns a dictionary with user inputs based on actual data ranges
92
+ """
93
+ st.sidebar.header("📋 Customer Information")
94
+
95
+ # Initialize session state for form
96
+ if 'prediction_made' not in st.session_state:
97
+ st.session_state.prediction_made = False
98
+
99
+ with st.sidebar:
100
+ st.subheader("👤 Personal Details")
101
+
102
+ # Age: Range from 18-61 based on data
103
+ age = st.slider("Age",
104
+ min_value=18,
105
+ max_value=61,
106
+ value=36, # median
107
+ help="Customer's age (18-61 years)")
108
+
109
+ # Gender: Male, Female, Fe Male (as seen in data)
110
+ gender = st.selectbox("Gender",
111
+ ["Female" "Male" "Fe Male"],
112
+ help="Customer's gender")
113
+
114
+ # MaritalStatus: Single, Married, Divorced, Unmarried
115
+ marital_status = st.selectbox("Marital Status",
116
+ ["Single" "Divorced" "Married" "Unmarried"],
117
+ help="Customer's marital status")
118
+
119
+ # CityTier: 1, 2, 3
120
+ city_tier = st.selectbox("City Tier",
121
+ [1, 2, 3],
122
+ index=0, # median is 1
123
+ help="City development level (1=Most developed, 3=Least developed)")
124
+
125
+ st.markdown("---")
126
+ st.subheader("💼 Professional Details")
127
+
128
+ # Occupation: Salaried, Small Business, Large Business, Free Lancer
129
+ occupation = st.selectbox("Occupation",
130
+ ["Salaried" "Free Lancer" "Small Business" "Large Business"],
131
+ help="Customer's occupation type")
132
+
133
+ # Designation: Executive, Manager, Senior Manager, AVP, VP
134
+ designation = st.selectbox("Designation",
135
+ ["Manager" "Executive" "Senior Manager" "AVP" "VP"],
136
+ help="Customer's job designation")
137
+
138
+ # MonthlyIncome: Range from 1000 to 98678
139
+ monthly_income = st.number_input("Monthly Income (₹)",
140
+ min_value=1000,
141
+ max_value=100000,
142
+ value=22418, # median
143
+ step=1000,
144
+ help="Gross monthly income in Rupees (₹1,000 - ₹98,678)")
145
+
146
+ st.markdown("---")
147
+ st.subheader("✈️ Travel Preferences")
148
+
149
+ # NumberOfTrips: Range from 1-22
150
+ num_trips = st.slider("Number of Trips (Annually)",
151
+ min_value=1,
152
+ max_value=22,
153
+ value=3, # median
154
+ help="Average annual trips taken (1-22)")
155
+
156
+ # Passport: 0 or 1
157
+ passport = st.selectbox("Valid Passport",
158
+ [0, 1],
159
+ format_func=lambda x: "Yes" if x == 1 else "No",
160
+ index=0, # median is 0
161
+ help="Does customer have a valid passport?")
162
+
163
+ # OwnCar: 0 or 1
164
+ own_car = st.selectbox("Own Car",
165
+ [0, 1],
166
+ format_func=lambda x: "Yes" if x == 1 else "No",
167
+ index=1, # median is 1
168
+ help="Does customer own a car?")
169
+
170
+ # PreferredPropertyStar: 3, 4, 5
171
+ preferred_property_star = st.selectbox("Preferred Hotel Rating",
172
+ [3, 4, 5],
173
+ index=0, # median is 3
174
+ help="Preferred hotel star rating (3-5 stars)")
175
+
176
+ st.markdown("---")
177
+ st.subheader("👨‍👩‍👧‍👦 Trip Details")
178
+
179
+ # NumberOfPersonVisiting: Range from 1-5
180
+ num_persons = st.slider("Number of Persons Visiting",
181
+ min_value=1,
182
+ max_value=5,
183
+ value=3, # median
184
+ help="Total people in the group (1-5)")
185
+
186
+ # NumberOfChildrenVisiting: Range from 0-3
187
+ num_children = st.slider("Number of Children (<5 years)",
188
+ min_value=0,
189
+ max_value=3,
190
+ value=1, # median
191
+ help="Number of children under 5 years (0-3)")
192
+
193
+ st.markdown("---")
194
+ st.subheader("📞 Interaction Details")
195
+
196
+ # TypeofContact: Company Invited, Self Enquiry
197
+ type_of_contact = st.selectbox("Type of Contact",
198
+ ["Self Enquiry" "Company Invited"],
199
+ help="How was the customer contacted?")
200
+
201
+ # ProductPitched: Basic, Standard, Deluxe, Super Deluxe, King
202
+ product_pitched = st.selectbox("Product Pitched",
203
+ ["Deluxe" "Basic" "Standard" "Super Deluxe" "King"],
204
+ help="Type of package pitched to the customer")
205
+
206
+ # DurationOfPitch: Range from 5-127 minutes
207
+ duration_of_pitch = st.slider("Duration of Pitch (minutes)",
208
+ min_value=5,
209
+ max_value=127,
210
+ value=14, # median
211
+ help="Sales pitch duration in minutes (5-127)")
212
+
213
+ # NumberOfFollowups: Range from 1-6
214
+ num_followups = st.slider("Number of Follow-ups",
215
+ min_value=1,
216
+ max_value=6,
217
+ value=4, # median
218
+ help="Total follow-ups after initial pitch (1-6)")
219
+
220
+ # PitchSatisfactionScore: Range from 1-5
221
+ pitch_satisfaction = st.slider("Pitch Satisfaction Score",
222
+ min_value=1,
223
+ max_value=5,
224
+ value=3, # median
225
+ help="Customer satisfaction with the pitch (1=Very Low, 5=Very High)")
226
+
227
+ # Create feature dictionary matching exact column names from training data
228
+ features = {
229
+ 'Age': age,
230
+ 'CityTier': city_tier,
231
+ 'DurationOfPitch': duration_of_pitch,
232
+ 'NumberOfPersonVisiting': num_persons,
233
+ 'NumberOfFollowups': num_followups,
234
+ 'PreferredPropertyStar': preferred_property_star,
235
+ 'NumberOfTrips': num_trips,
236
+ 'Passport': passport,
237
+ 'PitchSatisfactionScore': pitch_satisfaction,
238
+ 'NumberOfChildrenVisiting': num_children,
239
+ 'MonthlyIncome': monthly_income,
240
+ 'TypeofContact': type_of_contact,
241
+ 'Occupation': occupation,
242
+ 'Gender': gender,
243
+ 'OwnCar': own_car,
244
+ 'ProductPitched': product_pitched,
245
+ 'MaritalStatus': marital_status,
246
+ 'Designation': designation
247
+ }
248
+
249
+ return features
250
+
251
+ def create_gauge_chart(probability):
252
+ """
253
+ Create a gauge chart to visualize purchase probability
254
+ """
255
+ fig = go.Figure(go.Indicator(
256
+ mode = "gauge+number+delta",
257
+ value = probability * 100,
258
+ domain = {'x': [0, 1], 'y': [0, 1]},
259
+ title = {'text': "Purchase Probability (%)", 'font': {'size': 24}},
260
+ delta = {'reference': 45, 'increasing': {'color': "green"}},
261
+ gauge = {
262
+ 'axis': {'range': [None, 100], 'tickwidth': 1, 'tickcolor': "darkblue"},
263
+ 'bar': {'color': "darkblue"},
264
+ 'bgcolor': "white",
265
+ 'borderwidth': 2,
266
+ 'bordercolor': "gray",
267
+ 'steps': [
268
+ {'range': [0, 30], 'color': '#ffcccc'},
269
+ {'range': [30, 70], 'color': '#ffffcc'},
270
+ {'range': [70, 100], 'color': '#ccffcc'}
271
+ ],
272
+ 'threshold': {
273
+ 'line': {'color': "red", 'width': 4},
274
+ 'thickness': 0.75,
275
+ 'value': 45
276
+ }
277
+ }
278
+ ))
279
+
280
+ fig.update_layout(
281
+ height=300,
282
+ margin=dict(l=20, r=20, t=50, b=20)
283
+ )
284
+
285
+ return fig
286
+
287
+ def create_feature_importance_chart(features_df):
288
+ """
289
+ Create a bar chart showing key customer metrics
290
+ """
291
+ # Select key features for visualization
292
+ key_features = {
293
+ 'Monthly Income (₹K)': features_df['MonthlyIncome'].values[0] / 1000,
294
+ 'Age': features_df['Age'].values[0],
295
+ 'Annual Trips': features_df['NumberOfTrips'].values[0],
296
+ 'Pitch Duration (min)': features_df['DurationOfPitch'].values[0],
297
+ 'Follow-ups': features_df['NumberOfFollowups'].values[0],
298
+ 'Satisfaction': features_df['PitchSatisfactionScore'].values[0],
299
+ 'Hotel Rating': features_df['PreferredPropertyStar'].values[0],
300
+ 'Group Size': features_df['NumberOfPersonVisiting'].values[0]
301
+ }
302
+
303
+ fig = px.bar(
304
+ x=list(key_features.values()),
305
+ y=list(key_features.keys()),
306
+ orientation='h',
307
+ title='Key Customer Metrics Overview',
308
+ labels={'x': 'Value', 'y': 'Feature'},
309
+ color=list(key_features.values()),
310
+ color_continuous_scale='Blues'
311
+ )
312
+
313
+ fig.update_layout(
314
+ height=400,
315
+ showlegend=False,
316
+ margin=dict(l=20, r=20, t=50, b=20)
317
+ )
318
+
319
+ return fig
320
+
321
+ def get_recommendation(probability, features):
322
+ """
323
+ Generate actionable recommendations based on prediction and customer profile
324
+ """
325
+ recommendations = []
326
+
327
+ # Priority level based on probability
328
+ if probability >= 0.7:
329
+ recommendations.append("✅ **HIGH PRIORITY LEAD** - Strong purchase likelihood")
330
+ recommendations.append("🎯 **Action**: Schedule immediate follow-up call within 24 hours")
331
+ recommendations.append("💎 **Strategy**: Offer premium package options and exclusive benefits")
332
+ elif probability >= 0.45:
333
+ recommendations.append("⚠️ **MEDIUM PRIORITY LEAD** - Moderate purchase likelihood")
334
+ recommendations.append("📧 **Action**: Send personalized email highlighting package benefits")
335
+ recommendations.append("🎁 **Strategy**: Consider offering limited-time discount (5-10%)")
336
+ else:
337
+ recommendations.append("❌ **LOW PRIORITY LEAD** - Lower purchase likelihood")
338
+ recommendations.append("📬 **Action**: Add to nurture email campaign")
339
+ recommendations.append("🔄 **Strategy**: Re-engage after 2-3 months with seasonal offers")
340
+
341
+ recommendations.append("") # Spacing
342
+
343
+ # Additional contextual recommendations based on specific features
344
+ if features['NumberOfFollowups'] <= 2:
345
+ recommendations.append("📌 **Insight**: Low follow-up count - Increase engagement frequency")
346
+
347
+ if features['PitchSatisfactionScore'] <= 2:
348
+ recommendations.append("⚠️ **Alert**: Low satisfaction score - Review and improve pitch approach")
349
+ elif features['PitchSatisfactionScore'] >= 4:
350
+ recommendations.append("⭐ **Positive**: High satisfaction - Customer is engaged, act quickly!")
351
+
352
+ if features['MonthlyIncome'] >= 30000:
353
+ recommendations.append("💰 **Insight**: High-income customer - Emphasize luxury and premium features")
354
+
355
+ if features['NumberOfTrips'] >= 5:
356
+ recommendations.append("✈️ **Insight**: Frequent traveler - Highlight loyalty benefits and travel perks")
357
+
358
+ if features['Passport'] == 0:
359
+ recommendations.append("🛂 **Note**: No passport - Consider domestic package options")
360
+
361
+ if features['NumberOfChildrenVisiting'] >= 2:
362
+ recommendations.append("👨‍👩‍👧‍👦 **Insight**: Family with children - Emphasize family-friendly amenities")
363
+
364
+ if features['DurationOfPitch'] < 10:
365
+ recommendations.append("⏱️ **Note**: Short pitch duration - May need more detailed product information")
366
+
367
+ return recommendations
368
+
369
+ def display_customer_summary(features):
370
+ """
371
+ Display a formatted summary of customer information
372
+ """
373
+ col1, col2, col3, col4 = st.columns(4)
374
+
375
+ with col1:
376
+ st.metric("👤 Age", f"{features['Age']} years")
377
+ st.metric("🏙️ City Tier", f"Tier {features['CityTier']}")
378
+
379
+ with col2:
380
+ st.metric("💰 Income", f"₹{features['MonthlyIncome']:,}")
381
+ st.metric("✈️ Annual Trips", features['NumberOfTrips'])
382
+
383
+ with col3:
384
+ st.metric("📞 Follow-ups", features['NumberOfFollowups'])
385
+ st.metric("⭐ Satisfaction", f"{features['PitchSatisfactionScore']}/5")
386
+
387
+ with col4:
388
+ st.metric("👥 Group Size", features['NumberOfPersonVisiting'])
389
+ st.metric("🏨 Hotel Pref", f"{features['PreferredPropertyStar']} Star")
390
+
391
+ def main():
392
+ """
393
+ Main application function
394
+ """
395
+ # Header
396
+ st.markdown('<p class="main-header">✈️ Wellness Tourism Package Predictor</p>',
397
+ unsafe_allow_html=True)
398
+ st.markdown('<p class="sub-header">AI-Powered Customer Purchase Prediction System</p>',
399
+ unsafe_allow_html=True)
400
+
401
+ # Load model
402
+ with st.spinner("🔄 Loading ML model..."):
403
+ model = load_model()
404
+
405
+ st.success("✅ Model loaded successfully!")
406
+
407
+ # Create input form
408
+ features = create_input_features()
409
+
410
+ # Main content area
411
+ st.markdown("---")
412
+ st.subheader("📊 Customer Profile Summary")
413
+
414
+ display_customer_summary(features)
415
+
416
+ # Show detailed information in expandable section
417
+ with st.expander("📋 View Complete Customer Details"):
418
+ df_display = pd.DataFrame([features]).T
419
+ df_display.columns = ['Value']
420
+ st.dataframe(df_display, use_container_width=True, height=600)
421
+
422
+ st.markdown("---")
423
+
424
+ # Prediction section
425
+ col_left, col_right = st.columns([2, 1])
426
+
427
+ with col_right:
428
+ st.subheader("🎯 Make Prediction")
429
+ predict_button = st.button("🔮 Predict Purchase Likelihood",
430
+ type="primary",
431
+ use_container_width=True)
432
+
433
+ if st.button("🔄 Reset", use_container_width=True):
434
+ st.session_state.prediction_made = False
435
+ st.rerun()
436
+
437
+ with col_left:
438
+ if predict_button:
439
+ with st.spinner("🤖 Analyzing customer data..."):
440
+ # Create DataFrame with exact feature order
441
+ input_df = pd.DataFrame([features])
442
+
443
+ # Make prediction
444
+ try:
445
+ prediction_proba = model.predict_proba(input_df)[0, 1]
446
+ prediction = 1 if prediction_proba >= 0.45 else 0
447
+
448
+ # Store in session state
449
+ st.session_state.prediction_made = True
450
+ st.session_state.prediction = prediction
451
+ st.session_state.probability = prediction_proba
452
+ st.session_state.features = features
453
+ st.session_state.timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
454
+
455
+ except Exception as e:
456
+ st.error(f"❌ Prediction Error: {str(e)}")
457
+ st.error("Please check that all input values are valid.")
458
+ st.stop()
459
+
460
+ # Display prediction results
461
+ if st.session_state.prediction_made:
462
+ st.markdown("---")
463
+ st.subheader("📈 Prediction Results")
464
+
465
+ prediction = st.session_state.prediction
466
+ probability = st.session_state.probability
467
+
468
+ # Prediction box with color coding
469
+ if prediction == 1:
470
+ st.markdown(f"""
471
+ <div class="prediction-box prediction-positive">
472
+ ✅ LIKELY TO PURCHASE<br>
473
+ <span style="font-size: 2rem;">{probability*100:.1f}%</span><br>
474
+ Confidence Level
475
+ </div>
476
+ """, unsafe_allow_html=True)
477
+ else:
478
+ st.markdown(f"""
479
+ <div class="prediction-box prediction-negative">
480
+ ❌ UNLIKELY TO PURCHASE<br>
481
+ <span style="font-size: 2rem;">{(1-probability)*100:.1f}%</span><br>
482
+ Confidence Level (Not Buying)
483
+ </div>
484
+ """, unsafe_allow_html=True)
485
+
486
+ # Visualization section
487
+ st.markdown("---")
488
+ st.subheader("📊 Visual Analysis")
489
+
490
+ viz_col1, viz_col2 = st.columns([1, 1])
491
+
492
+ with viz_col1:
493
+ st.plotly_chart(create_gauge_chart(probability),
494
+ use_container_width=True)
495
+
496
+ with viz_col2:
497
+ input_df = pd.DataFrame([st.session_state.features])
498
+ st.plotly_chart(create_feature_importance_chart(input_df),
499
+ use_container_width=True)
500
+
501
+ # Recommendations section
502
+ st.markdown("---")
503
+ st.subheader("💡 Actionable Recommendations")
504
+
505
+ recommendations = get_recommendation(probability, st.session_state.features)
506
+
507
+ for rec in recommendations:
508
+ if rec: # Skip empty strings
509
+ st.markdown(f"{rec}")
510
+
511
+ # Model explanation
512
+ with st.expander("🤔 How does the model work?"):
513
+ st.markdown("""
514
+ **Model Details:**
515
+ - **Algorithm**: XGBoost (Extreme Gradient Boosting)
516
+ - **Classification Threshold**: 45%
517
+ - **Training Data**: 4,128 customer records
518
+ - **Features**: 18 input variables including demographics, travel preferences, and interaction history
519
+
520
+ **Prediction Logic:**
521
+ - Probability ≥ 45% → Customer likely to purchase
522
+ - Probability < 45% → Customer unlikely to purchase
523
+
524
+ **Key Factors Considered:**
525
+ - Customer demographics (age, income, occupation)
526
+ - Travel behavior (past trips, preferences)
527
+ - Sales interaction (pitch satisfaction, follow-ups)
528
+ - Family situation (marital status, children)
529
+
530
+ The model has been trained to identify patterns that indicate purchase likelihood based on historical customer data.
531
+ """)
532
+
533
+ # Export functionality
534
+ st.markdown("---")
535
+ st.subheader("📥 Export Prediction Report")
536
+
537
+ report_col1, report_col2 = st.columns([2, 1])
538
+
539
+ with report_col1:
540
+ st.info("💾 Download a detailed report with all customer information and prediction results")
541
+
542
+ with report_col2:
543
+ # Create comprehensive report
544
+ report_data = {
545
+ 'Timestamp': [st.session_state.timestamp],
546
+ 'Prediction': ['Will Purchase' if prediction == 1 else 'Will Not Purchase'],
547
+ 'Purchase_Probability': [f"{probability*100:.2f}%"],
548
+ 'Confidence_Level': ['High' if abs(probability - 0.5) > 0.2 else 'Medium'],
549
+ **st.session_state.features
550
+ }
551
+
552
+ report_df = pd.DataFrame(report_data)
553
+ csv = report_df.to_csv(index=False)
554
+
555
+ st.download_button(
556
+ label="📄 Download CSV Report",
557
+ data=csv,
558
+ file_name=f"customer_prediction_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv",
559
+ mime="text/csv",
560
+ use_container_width=True
561
+ )
562
+
563
+ # Footer
564
+ st.markdown("---")
565
+ st.markdown("""
566
+ <div style='text-align: center; color: #888; padding: 1rem;'>
567
+ <p><b>🏢 Visit with Us</b> - Wellness Tourism Package Prediction System</p>
568
+ <p>Powered by XGBoost ML Model | Classification Threshold: 45% | Trained on 4,128 customers</p>
569
+ <p style='font-size: 0.85rem;'>Model Version: v1.0 | Last Updated: December 2024</p>
570
+ </div>
571
+ """, unsafe_allow_html=True)
572
+
573
+ # Sidebar footer with statistics
574
+ with st.sidebar:
575
+ st.markdown("---")
576
+ st.info("""
577
+ **ℹ️ About This Application**
578
+
579
+ This ML-powered system predicts whether a customer will purchase
580
+ the Wellness Tourism Package based on their profile and interaction history.
581
+
582
+ **📊 Model Statistics:**
583
+ - **Training Data**: 4,128 customers
584
+ - **Purchase Rate**: 19.3%
585
+ - **Algorithm**: XGBoost Classifier
586
+ - **Threshold**: 45%
587
+ - **Features**: 18 variables
588
+
589
+ **🎯 How to Use:**
590
+ 1. Enter customer details in the form
591
+ 2. Click 'Predict Purchase Likelihood'
592
+ 3. Review prediction and recommendations
593
+ 4. Download detailed report (optional)
594
+
595
+ **📈 Prediction Accuracy:**
596
+ The model considers demographics, travel preferences,
597
+ and sales interaction history to make accurate predictions.
598
+ """)
599
+
600
+ st.warning("""
601
+ **⚠️ Important Notes:**
602
+ - Ensure all fields are filled accurately
603
+ - Income should be in Indian Rupees (₹)
604
+ - Follow-ups range from 1-6
605
+ - Pitch duration in minutes (5-127)
606
+ """)
607
+
608
+ if __name__ == "__main__":
609
+ main()
requirements.txt ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Core dependencies
2
+ streamlit==1.31.0
3
+ pandas==2.1.4
4
+ numpy==1.26.3
5
+
6
+ # ML dependencies
7
+ scikit-learn==1.4.0
8
+ xgboost==2.0.3
9
+ joblib==1.3.2
10
+
11
+ # Hugging Face integration
12
+ huggingface-hub==0.20.2
13
+
14
+ # Visualization
15
+ plotly==5.18.0
16
+
17
+ # Excel file support (if needed for future enhancements)
18
+ openpyxl==3.1.2
19
+
20
+ # Additional utilities
21
+ python-dateutil==2.8.2
22
+ pytz==2023.3