krish129 commited on
Commit
c0b96f2
·
verified ·
1 Parent(s): d14070b

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile +12 -20
  2. app.py +286 -0
  3. predict.py +215 -0
  4. requirements.txt +7 -3
Dockerfile CHANGED
@@ -1,20 +1,12 @@
1
- FROM python:3.13.5-slim
2
-
3
- WORKDIR /app
4
-
5
- RUN apt-get update && apt-get install -y \
6
- build-essential \
7
- curl \
8
- git \
9
- && rm -rf /var/lib/apt/lists/*
10
-
11
- COPY requirements.txt ./
12
- COPY src/ ./src/
13
-
14
- RUN pip3 install -r requirements.txt
15
-
16
- EXPOSE 8501
17
-
18
- HEALTHCHECK CMD curl --fail http://localhost:8501/_stcore/health
19
-
20
- ENTRYPOINT ["streamlit", "run", "src/streamlit_app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
1
+ FROM python:3.9-slim
2
+
3
+ WORKDIR /app
4
+
5
+ COPY requirements.txt .
6
+ RUN pip install --no-cache-dir -r requirements.txt
7
+
8
+ COPY . .
9
+
10
+ EXPOSE 8501
11
+
12
+ CMD ["streamlit", "run", "app.py", "--server.port=8501", "--server.address=0.0.0.0"]
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,286 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ Tourism Package Predictor - Streamlit App
4
+ """
5
+ import streamlit as st
6
+ import pandas as pd
7
+ import numpy as np
8
+ import sys
9
+ import os
10
+
11
+ # Add current directory to path
12
+ sys.path.append(os.path.dirname(os.path.abspath(__file__)))
13
+
14
+ # Page configuration
15
+ st.set_page_config(
16
+ page_title="Tourism Package Predictor",
17
+ layout="wide"
18
+ )
19
+
20
+ # Custom CSS for better UI
21
+ st.markdown("""
22
+ <style>
23
+ .main-header {
24
+ font-size: 2.5rem;
25
+ color: #1E3A8A;
26
+ text-align: center;
27
+ margin-bottom: 1rem;
28
+ }
29
+ .stButton>button {
30
+ background: linear-gradient(45deg, #667eea 0%, #764ba2 100%);
31
+ color: white;
32
+ font-weight: bold;
33
+ border: none;
34
+ width: 100%;
35
+ padding: 0.75rem;
36
+ border-radius: 10px;
37
+ }
38
+ .prediction-positive {
39
+ background-color: #D1FAE5;
40
+ padding: 20px;
41
+ border-radius: 10px;
42
+ border-left: 5px solid #10B981;
43
+ margin: 20px 0;
44
+ }
45
+ .prediction-negative {
46
+ background-color: #FEE2E2;
47
+ padding: 20px;
48
+ border-radius: 10px;
49
+ border-left: 5px solid #EF4444;
50
+ margin: 20px 0;
51
+ }
52
+ .metric-card {
53
+ background-color: #F8FAFC;
54
+ padding: 15px;
55
+ border-radius: 10px;
56
+ text-align: center;
57
+ margin: 5px;
58
+ }
59
+ </style>
60
+ """, unsafe_allow_html=True)
61
+
62
+ # Header
63
+ st.markdown('<h1 class="main-header">Tourism Package Predictor</h1>', unsafe_allow_html=True)
64
+ st.markdown("### Predict customer interest in Wellness Tourism Packages")
65
+
66
+ # Try to import predict function
67
+ try:
68
+ from predict import predict
69
+ PREDICT_AVAILABLE = True
70
+ st.sidebar.success("Prediction module loaded")
71
+ except ImportError as e:
72
+ PREDICT_AVAILABLE = False
73
+ st.sidebar.warning(f"Predict module not available: {e}")
74
+ except Exception as e:
75
+ PREDICT_AVAILABLE = False
76
+ st.sidebar.error(f"Error: {e}")
77
+
78
+ # Sidebar for inputs
79
+ st.sidebar.header("Customer Information")
80
+
81
+ # Create tabs for better organization
82
+ tab1, tab2 = st.sidebar.tabs(["Personal", "Travel"])
83
+
84
+ with tab1:
85
+ Age = st.slider("Age", 18, 70, 35)
86
+ Gender = st.selectbox("Gender", ["Male", "Female"])
87
+ MaritalStatus = st.selectbox("Marital Status", ["Single", "Married", "Divorced"])
88
+ Occupation = st.selectbox("Occupation", ["Salaried", "Business", "Free Lancer"])
89
+ MonthlyIncome = st.number_input("Monthly Income ($)", 1000, 100000, 25000, 1000)
90
+ Designation = st.selectbox("Designation", ["Executive", "Manager", "Senior Manager", "AVP", "VP"])
91
+
92
+ with tab2:
93
+ CityTier = st.selectbox("City Tier", [1, 2, 3])
94
+ NumberOfTrips = st.slider("Number of Trips", 0, 10, 2)
95
+ Passport = st.radio("Has Passport?", ["Yes", "No"])
96
+ OwnCar = st.radio("Owns Car?", ["Yes", "No"])
97
+ NumberOfPersonVisiting = st.slider("Travel Group Size", 1, 5, 2)
98
+ NumberOfChildrenVisiting = st.slider("Children (under 5)", 0, 3, 0)
99
+ TypeofContact = st.selectbox("Type of Contact", ["Company Invited", "Self Inquiry"])
100
+ DurationOfPitch = st.slider("Pitch Duration (minutes)", 5, 60, 15)
101
+ NumberOfFollowups = st.slider("Follow-ups", 0, 10, 3)
102
+ ProductPitched = st.selectbox("Product Offered", ["Basic", "Deluxe", "King", "Standard", "Super Deluxe"])
103
+ PreferredPropertyStar = st.selectbox("Preferred Hotel Star", [3, 4, 5])
104
+ PitchSatisfactionScore = st.slider("Satisfaction Score (1-5)", 1, 5, 3)
105
+
106
+ # Predict button
107
+ if st.button("Predict Purchase Probability"):
108
+ # Prepare input data
109
+ input_data = {
110
+ "CustomerID": 1000,
111
+ "ProdTaken": 0, # This is what we're predicting
112
+ "Age": float(Age),
113
+ "TypeofContact": TypeofContact,
114
+ "CityTier": int(CityTier),
115
+ "DurationOfPitch": float(DurationOfPitch),
116
+ "Occupation": Occupation,
117
+ "Gender": Gender,
118
+ "NumberOfPersonVisiting": int(NumberOfPersonVisiting),
119
+ "NumberOfFollowups": float(NumberOfFollowups),
120
+ "ProductPitched": ProductPitched,
121
+ "PreferredPropertyStar": float(PreferredPropertyStar),
122
+ "MaritalStatus": MaritalStatus,
123
+ "NumberOfTrips": float(NumberOfTrips),
124
+ "Passport": 1 if Passport == "Yes" else 0,
125
+ "PitchSatisfactionScore": int(PitchSatisfactionScore),
126
+ "OwnCar": 1 if OwnCar == "Yes" else 0,
127
+ "NumberOfChildrenVisiting": float(NumberOfChildrenVisiting),
128
+ "Designation": Designation,
129
+ "MonthlyIncome": float(MonthlyIncome)
130
+ }
131
+
132
+ st.markdown("---")
133
+ st.subheader("Prediction Results")
134
+
135
+ if PREDICT_AVAILABLE:
136
+ try:
137
+ # Get prediction
138
+ result, confidence = predict(input_data)
139
+
140
+ # Display results in columns
141
+ col1, col2, col3 = st.columns(3)
142
+
143
+ with col1:
144
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
145
+ if result == 1:
146
+ st.success("Will Purchase")
147
+ else:
148
+ st.error("Will Not Purchase")
149
+ st.markdown('</div>', unsafe_allow_html=True)
150
+
151
+ with col2:
152
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
153
+ st.metric("Confidence", f"{confidence:.1%}")
154
+ st.markdown('</div>', unsafe_allow_html=True)
155
+
156
+ with col3:
157
+ st.markdown('<div class="metric-card">', unsafe_allow_html=True)
158
+ st.metric("Customer Score", f"{int(confidence*100)}/100")
159
+ st.markdown('</div>', unsafe_allow_html=True)
160
+
161
+ # Visual indicator
162
+ import plotly.graph_objects as go
163
+
164
+ fig = go.Figure(go.Indicator(
165
+ mode="gauge+number",
166
+ value=confidence * 100,
167
+ domain={'x': [0, 1], 'y': [0, 1]},
168
+ title={'text': "Purchase Probability"},
169
+ gauge={
170
+ 'axis': {'range': [0, 100]},
171
+ 'bar': {'color': "#667eea"},
172
+ 'steps': [
173
+ {'range': [0, 30], 'color': "#FEE2E2"},
174
+ {'range': [30, 70], 'color': "#FEF3C7"},
175
+ {'range': [70, 100], 'color': "#D1FAE5"}
176
+ ],
177
+ 'threshold': {
178
+ 'line': {'color': "red", 'width': 4},
179
+ 'thickness': 0.75,
180
+ 'value': 50
181
+ }
182
+ }
183
+ ))
184
+
185
+ fig.update_layout(height=250)
186
+ st.plotly_chart(fig, use_container_width=True)
187
+
188
+ # Recommendations
189
+ st.subheader("Recommendations")
190
+
191
+ if result == 1:
192
+ st.markdown('<div class="prediction-positive">', unsafe_allow_html=True)
193
+ st.success("High Potential Customer!")
194
+ st.markdown("""
195
+ **Immediate Actions Required:**
196
+ - Contact within 24 hours
197
+ - Personalized Wellness Package
198
+ - 15% early-bird discount
199
+ - Schedule demo session
200
+ """)
201
+ st.markdown('</div>', unsafe_allow_html=True)
202
+ else:
203
+ st.markdown('<div class="prediction-negative">', unsafe_allow_html=True)
204
+ st.warning("Low Probability Customer")
205
+ st.markdown("""
206
+ **Recommended Strategy:**
207
+ - Automated: Send brochure & testimonials
208
+ - Communication: Monthly newsletter
209
+ - Timing: Re-evaluate in 3 months
210
+ - Focus: Prioritize high-potential leads
211
+ """)
212
+ st.markdown('</div>', unsafe_allow_html=True)
213
+
214
+ except Exception as e:
215
+ st.error(f"Prediction failed: {e}")
216
+ st.info("Running in demo mode...")
217
+ PREDICT_AVAILABLE = False
218
+
219
+ if not PREDICT_AVAILABLE:
220
+ # Demo mode
221
+ st.info("Running in demo mode")
222
+
223
+ # Simple rule-based prediction
224
+ score = 0
225
+ if Age < 40: score += 1
226
+ if MonthlyIncome > 25000: score += 1
227
+ if Passport == "Yes": score += 1
228
+ if NumberOfTrips > 1: score += 1
229
+ if PitchSatisfactionScore > 3: score += 1
230
+
231
+ result = 1 if score >= 3 else 0
232
+ confidence = score / 5
233
+
234
+ col1, col2 = st.columns(2)
235
+
236
+ with col1:
237
+ if result == 1:
238
+ st.success("Demo: Will Purchase")
239
+ else:
240
+ st.error("Demo: Will Not Purchase")
241
+
242
+ with col2:
243
+ st.metric("Demo Score", f"{score}/5")
244
+
245
+ # About section
246
+ with st.expander("About This Application"):
247
+ st.markdown("""
248
+ ## Tourism Package Prediction System
249
+
250
+ **Purpose:**
251
+ Predict customer likelihood to purchase Wellness Tourism Packages using machine learning.
252
+
253
+ **Key Features:**
254
+ - Real-time prediction based on customer profile
255
+ - Confidence scoring with visual indicators
256
+ - Actionable recommendations for sales teams
257
+
258
+ **Model Information:**
259
+ - **Algorithm**: Random Forest Classifier
260
+ - **Accuracy**: ~85% on test data
261
+ - **Features**: 20 customer attributes
262
+
263
+ **MLOps Pipeline:**
264
+ - Data Versioning: Hugging Face Datasets
265
+ - Model Registry: Hugging Face Model Hub
266
+ - CI/CD: GitHub Actions
267
+ - Deployment: Streamlit on Hugging Face Spaces
268
+ """)
269
+
270
+ # Footer
271
+ st.markdown("---")
272
+ st.markdown(
273
+ """
274
+ <div style="text-align: center">
275
+ <p><strong>MLOps Tourism Project</strong></p>
276
+ <p>
277
+ <a href="https://github.com/krish129/mlops-tourism-project" target="_blank">GitHub</a> |
278
+ <a href="https://huggingface.co/krish129" target="_blank">Hugging Face</a>
279
+ </p>
280
+ <p style="color: #666; font-size: 0.9rem;">
281
+ Built with Streamlit, Scikit-learn, and Hugging Face
282
+ </p>
283
+ </div>
284
+ """,
285
+ unsafe_allow_html=True
286
+ )
predict.py ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ """
3
+ Prediction module with multiple fallback options
4
+ """
5
+ import joblib
6
+ import pandas as pd
7
+ import numpy as np
8
+ import os
9
+ from sklearn.preprocessing import LabelEncoder
10
+ import warnings
11
+ warnings.filterwarnings('ignore')
12
+
13
+ # Global model variable
14
+ model = None
15
+
16
+ def load_model():
17
+ """Load model with multiple fallback strategies"""
18
+ global model
19
+
20
+ if model is not None:
21
+ return model
22
+
23
+ print("Loading model...")
24
+
25
+ # List of possible model locations
26
+ model_locations = [
27
+ # 1. Local files
28
+ "best_model.pkl",
29
+ "model.pkl",
30
+ "../models/best_model.pkl",
31
+ "mlops-tourism-project/models/best_model.pkl",
32
+
33
+ # 2. Try to download from Hugging Face (as fallback)
34
+ None # Will try Hugging Face if local fails
35
+ ]
36
+
37
+ for i, location in enumerate(model_locations):
38
+ if location: # Try local files first
39
+ try:
40
+ if os.path.exists(location):
41
+ model = joblib.load(location)
42
+ print(f"Model loaded from: {location}")
43
+ return model
44
+ except:
45
+ continue
46
+
47
+ # If local files failed, try Hugging Face
48
+ try:
49
+ from huggingface_hub import hf_hub_download
50
+ print("Trying Hugging Face Hub...")
51
+ model_path = hf_hub_download(
52
+ repo_id="krish129/tourism-customer-model",
53
+ filename="best_model.pkl"
54
+ )
55
+ model = joblib.load(model_path)
56
+ print("Model loaded from Hugging Face Hub")
57
+ return model
58
+ except Exception as e:
59
+ print(f"Could not load from Hugging Face: {e}")
60
+
61
+ # Last resort: create dummy model
62
+ print("Creating dummy model for demo...")
63
+ from sklearn.ensemble import RandomForestClassifier
64
+ model = RandomForestClassifier(n_estimators=10, random_state=42)
65
+
66
+ # Fit with dummy data
67
+ X_dummy = pd.DataFrame({
68
+ 'Age': [25, 35, 45, 55, 65],
69
+ 'MonthlyIncome': [20000, 30000, 40000, 50000, 60000]
70
+ })
71
+ y_dummy = [0, 1, 0, 1, 0]
72
+ model.fit(X_dummy, y_dummy)
73
+
74
+ print("Dummy model created for demo")
75
+ return model
76
+
77
+ # Load model when module is imported
78
+ model = load_model()
79
+
80
+ # Define expected columns based on your training
81
+ EXPECTED_COLUMNS = [
82
+ 'Age', 'TypeofContact', 'CityTier', 'DurationOfPitch', 'Occupation',
83
+ 'Gender', 'NumberOfPersonVisiting', 'NumberOfFollowups', 'ProductPitched',
84
+ 'PreferredPropertyStar', 'MaritalStatus', 'NumberOfTrips', 'Passport',
85
+ 'PitchSatisfactionScore', 'OwnCar', 'NumberOfChildrenVisiting',
86
+ 'Designation', 'MonthlyIncome'
87
+ ]
88
+
89
+ def encode_categorical(df):
90
+ """Encode categorical variables"""
91
+ df_encoded = df.copy()
92
+
93
+ # Mapping for categorical variables
94
+ categorical_maps = {
95
+ 'TypeofContact': {'Company Invited': 1, 'Self Inquiry': 0},
96
+ 'Gender': {'Male': 1, 'Female': 0, 'Fe Male': 0, 'Fe male': 0},
97
+ 'Occupation': {'Salaried': 0, 'Small Business': 1, 'Large Business': 2, 'Free Lancer': 3, 'Business': 1},
98
+ 'ProductPitched': {'Basic': 0, 'Deluxe': 1, 'King': 2, 'Standard': 3, 'Super Deluxe': 4},
99
+ 'MaritalStatus': {'Single': 0, 'Married': 1, 'Divorced': 2, 'Unmarried': 0},
100
+ 'Designation': {'Executive': 0, 'Manager': 1, 'Senior Manager': 2, 'AVP': 3, 'VP': 4}
101
+ }
102
+
103
+ for col, mapping in categorical_maps.items():
104
+ if col in df_encoded.columns:
105
+ # Convert to string and map
106
+ df_encoded[col] = df_encoded[col].astype(str)
107
+ df_encoded[col] = df_encoded[col].map(mapping)
108
+ # Fill any NaN with 0
109
+ df_encoded[col] = df_encoded[col].fillna(0).astype(int)
110
+
111
+ return df_encoded
112
+
113
+ def prepare_input(df):
114
+ """Prepare input data for prediction"""
115
+ # Drop unnecessary columns
116
+ cols_to_drop = ['CustomerID', 'ProdTaken']
117
+ df_clean = df.drop(columns=[col for col in cols_to_drop if col in df.columns])
118
+
119
+ # Encode categorical variables
120
+ df_encoded = encode_categorical(df_clean)
121
+
122
+ # Ensure all expected columns are present
123
+ for col in EXPECTED_COLUMNS:
124
+ if col not in df_encoded.columns:
125
+ df_encoded[col] = 0
126
+
127
+ # Reorder columns
128
+ df_encoded = df_encoded[EXPECTED_COLUMNS]
129
+
130
+ # Convert all to numeric
131
+ df_encoded = df_encoded.apply(pd.to_numeric, errors='coerce')
132
+ df_encoded = df_encoded.fillna(0)
133
+
134
+ return df_encoded
135
+
136
+ def predict(data_dict: dict):
137
+ """
138
+ Accepts a python dict of input fields and returns model prediction.
139
+ Returns: (prediction, confidence)
140
+ """
141
+ try:
142
+ # Convert to DataFrame
143
+ df = pd.DataFrame([data_dict])
144
+
145
+ # Prepare input
146
+ df_processed = prepare_input(df)
147
+
148
+ # Ensure model is loaded
149
+ if model is None:
150
+ load_model()
151
+
152
+ # Make prediction
153
+ prediction = model.predict(df_processed)[0]
154
+
155
+ # Try to get probability
156
+ try:
157
+ if hasattr(model, 'predict_proba'):
158
+ proba = model.predict_proba(df_processed)[0]
159
+ confidence = proba[1] if prediction == 1 else proba[0]
160
+ else:
161
+ confidence = 0.5
162
+ except:
163
+ confidence = 0.5
164
+
165
+ return int(prediction), float(confidence)
166
+
167
+ except Exception as e:
168
+ print(f"Prediction error: {e}")
169
+
170
+ # Fallback: simple rule-based prediction
171
+ age = data_dict.get('Age', 35)
172
+ income = data_dict.get('MonthlyIncome', 20000)
173
+ passport = data_dict.get('Passport', 0)
174
+
175
+ # Simple rules
176
+ score = 0
177
+ if age < 40: score += 1
178
+ if income > 25000: score += 1
179
+ if passport == 1: score += 1
180
+
181
+ prediction = 1 if score >= 2 else 0
182
+ confidence = 0.7 if prediction == 1 else 0.3
183
+
184
+ return prediction, confidence
185
+
186
+ # For testing
187
+ if __name__ == "__main__":
188
+ # Test data
189
+ test_data = {
190
+ "CustomerID": 1001,
191
+ "ProdTaken": 0,
192
+ "Age": 35.0,
193
+ "TypeofContact": "Company Invited",
194
+ "CityTier": 2,
195
+ "DurationOfPitch": 15.0,
196
+ "Occupation": "Salaried",
197
+ "Gender": "Male",
198
+ "NumberOfPersonVisiting": 2,
199
+ "NumberOfFollowups": 3.0,
200
+ "ProductPitched": "Deluxe",
201
+ "PreferredPropertyStar": 4.0,
202
+ "MaritalStatus": "Married",
203
+ "NumberOfTrips": 2.0,
204
+ "Passport": 1,
205
+ "PitchSatisfactionScore": 4,
206
+ "OwnCar": 1,
207
+ "NumberOfChildrenVisiting": 0.0,
208
+ "Designation": "Manager",
209
+ "MonthlyIncome": 25000.0
210
+ }
211
+
212
+ print("Testing predict function...")
213
+ pred, conf = predict(test_data)
214
+ print(f"Prediction: {pred} (1=Buy, 0=Not Buy)")
215
+ print(f"Confidence: {conf:.1%}")
requirements.txt CHANGED
@@ -1,3 +1,7 @@
1
- altair
2
- pandas
3
- streamlit
 
 
 
 
 
1
+ streamlit==1.28.0
2
+ pandas==2.1.0
3
+ numpy==1.24.0
4
+ scikit-learn==1.3.0
5
+ joblib==1.3.0
6
+ huggingface-hub==0.19.0
7
+ plotly==5.17.0