rajkhanke commited on
Commit
662c070
·
verified ·
1 Parent(s): 98c9259

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +394 -311
app.py CHANGED
@@ -6,15 +6,18 @@ from sklearn.metrics.pairwise import cosine_similarity
6
  import os
7
  import logging
8
 
 
 
9
  logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]')
10
  logger = logging.getLogger(__name__)
11
 
12
  app = Flask(__name__)
13
 
 
14
  DF = None
15
  ALL_TOPPINGS = []
16
  FEATURE_DF = None
17
- SCALER = None
18
  NUMERICAL_COLS = ['Price', 'Slices', 'Rating', 'Spice_Level', 'Preparation_Time', 'Calories']
19
  CATEGORICAL_FEATURES = [
20
  'Serving_Size', 'Popular_Group', 'Dietary_Category',
@@ -27,58 +30,86 @@ DEFAULT_IMAGE_URL = 'https://images.dominos.co.in/new_margherita_2502.jpg'
27
 
28
  def preprocess_data(df_path='pizza.csv'):
29
  global DF, ALL_TOPPINGS, FEATURE_DF, SCALER, CATEGORICAL_FEATURES, CRUST_TYPE_COL
 
30
 
31
- if not os.path.exists(df_path):
32
- logger.error(f"Dataset file '{df_path}' not found.")
33
- raise FileNotFoundError(f"Dataset file '{df_path}' not found.")
 
 
34
 
35
- DF = pd.read_csv(df_path)
 
 
 
 
 
36
  logger.info(f"Original DataFrame columns: {DF.columns.tolist()}")
37
 
 
38
  potential_crust_cols = ['Crust_Type', 'Cr_Type']
39
  valid_crust_cols = [col for col in potential_crust_cols if col in DF.columns]
40
  if valid_crust_cols:
41
- valid_crust_cols.sort(key=lambda col: DF[col].isnull().sum())
42
  CRUST_TYPE_COL = valid_crust_cols[0]
43
  logger.info(f"Using '{CRUST_TYPE_COL}' for crust type.")
44
  if CRUST_TYPE_COL not in CATEGORICAL_FEATURES:
45
  CATEGORICAL_FEATURES.append(CRUST_TYPE_COL)
 
46
  for col in potential_crust_cols:
47
  if col != CRUST_TYPE_COL and col in CATEGORICAL_FEATURES:
48
  CATEGORICAL_FEATURES.remove(col)
49
  else:
50
- logger.warning("Crust type column not found. Crust type will not be used.")
51
  CRUST_TYPE_COL = None
52
 
53
- text_categorical_cols = list(
54
- set(CATEGORICAL_FEATURES + ['Toppings', 'Description', 'Allergens', 'Image_Url', 'Pizza_Name']))
55
- for col in text_categorical_cols:
56
- if col in DF.columns:
57
  DF[col] = DF[col].fillna('')
 
58
 
59
- numerical_cols_in_df = ['Price_Rs', 'Slices', 'Rating', 'Rating_Count', 'Preparation_Time_min',
60
- 'Calories_per_Slice']
61
  for col in numerical_cols_in_df:
62
  if col in DF.columns:
63
  if pd.api.types.is_numeric_dtype(DF[col]):
64
- DF[col] = DF[col].fillna(DF[col].median())
 
 
65
  else:
66
- DF[col] = pd.to_numeric(DF[col], errors='coerce').fillna(
67
- DF[col].median() if pd.api.types.is_numeric_dtype(DF[col]) else 0)
68
-
69
- if 'Rating_Count' in DF.columns: DF['Rating_Count'] = DF['Rating_Count'].fillna(0).astype(int)
70
-
71
- DF['Toppings_list_internal'] = DF['Toppings'].astype(str).str.split(
72
- ';\\s*')
73
- DF['Toppings_list_internal'] = DF['Toppings_list_internal'].apply(
74
- lambda x: [t.strip() for t in x if isinstance(t, str) and t.strip()])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
- current_all_toppings = set()
77
- for toppings_list in DF['Toppings_list_internal'].dropna():
78
- current_all_toppings.update(t for t in toppings_list if t)
79
- ALL_TOPPINGS = sorted(list(current_all_toppings))
80
- logger.info(f"Found {len(ALL_TOPPINGS)} unique toppings. Example: {ALL_TOPPINGS[:5]}")
81
 
 
82
  feature_data = {}
83
  num_feature_map = {
84
  'Price': 'Price_Rs', 'Slices': 'Slices', 'Rating': 'Rating',
@@ -88,107 +119,235 @@ def preprocess_data(df_path='pizza.csv'):
88
  if df_col in DF.columns:
89
  feature_data[feature_col] = DF[df_col].copy()
90
  else:
91
- feature_data[feature_col] = pd.Series([0.0] * len(DF))
 
92
 
 
93
  if 'Spice_Level' in DF.columns:
94
- DF['Spice_Level'] = DF['Spice_Level'].fillna('Mild')
95
  spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
96
- feature_data['Spice_Level'] = DF['Spice_Level'].map(spice_map).fillna(1.0)
97
  else:
98
- feature_data['Spice_Level'] = pd.Series([1.0] * len(DF))
 
99
 
 
100
  for feature_cat_col in CATEGORICAL_FEATURES:
101
- if feature_cat_col in DF.columns:
 
 
102
  for value in DF[feature_cat_col].unique():
103
- if pd.notnull(value) and value != '':
104
  feature_data[f"{feature_cat_col}_{value}"] = (DF[feature_cat_col] == value).astype(int)
 
 
105
 
 
106
  for topping in ALL_TOPPINGS:
107
- if topping:
108
  feature_data[f"Topping_{topping}"] = DF['Toppings_list_internal'].apply(
109
  lambda x: 1 if topping in x else 0
110
  )
111
 
112
  FEATURE_DF = pd.DataFrame(feature_data)
 
 
 
113
  for col in NUMERICAL_COLS:
114
- if col not in FEATURE_DF.columns: FEATURE_DF[col] = 0.0
 
 
115
  if FEATURE_DF[col].isnull().any():
116
- FEATURE_DF[col] = FEATURE_DF[col].fillna(
117
- FEATURE_DF[col].mean() if pd.notna(FEATURE_DF[col].mean()) else 0.0)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
 
119
- SCALER = MinMaxScaler()
120
- FEATURE_DF[NUMERICAL_COLS] = SCALER.fit_transform(FEATURE_DF[NUMERICAL_COLS])
121
- logger.info(f"Preproc done. FEATURE_DF shape: {FEATURE_DF.shape}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
122
 
123
 
124
  def get_recommendations(preferences):
125
  global DF, FEATURE_DF, SCALER, CRUST_TYPE_COL, DEFAULT_IMAGE_URL
126
 
127
- if FEATURE_DF is None or SCALER is None or DF is None:
128
- current_app.logger.error("Data not fully initialized for get_recommendations.")
129
  return []
130
 
131
  current_indices = DF.index.to_list()
132
  current_app.logger.info(f"Starting with {len(current_indices)} pizzas before filtering. Preferences: {preferences}")
133
 
134
- # 1. Toppings (OR logic if multiple selected)
135
- if 'toppings' in preferences and preferences['toppings']:
 
136
  selected_toppings = set(preferences['toppings'])
137
- if selected_toppings: # Ensure it's not an empty list
138
  topping_mask = DF.loc[current_indices, 'Toppings_list_internal'].apply(
139
- lambda x: any(t in selected_toppings for t in x))
 
140
  current_indices = DF.loc[current_indices][topping_mask].index.to_list()
141
  current_app.logger.info(f"After toppings filter: {len(current_indices)} pizzas remaining")
142
  if not current_indices: return []
143
 
144
  # 2. Max Price
145
  if 'price_range' in preferences and preferences['price_range'] and 'Price_Rs' in DF.columns:
146
- min_price = float(preferences['price_range'][0])
147
- max_price = float(preferences['price_range'][1])
148
- price_mask = (DF.loc[current_indices, 'Price_Rs'] >= min_price) & \
149
- (DF.loc[current_indices, 'Price_Rs'] <= max_price)
150
- current_indices = DF.loc[current_indices][price_mask].index.to_list()
151
- current_app.logger.info(
152
- f"After price filter ({min_price}-{max_price}): {len(current_indices)} pizzas remaining")
153
- if not current_indices: return []
154
-
155
- # 3. Number of Slices (>= selected)
 
 
 
156
  if 'slices' in preferences and preferences['slices'] is not None and 'Slices' in DF.columns:
157
  try:
158
  min_slices = int(preferences['slices'])
159
  slices_mask = DF.loc[current_indices, 'Slices'] >= min_slices
160
  current_indices = DF.loc[current_indices][slices_mask].index.to_list()
161
- current_app.logger.info(f"After slices filter (>= {min_slices}): {len(current_indices)} pizzas remaining")
162
  if not current_indices: return []
163
  except ValueError:
164
  current_app.logger.warning(f"Invalid value for slices: {preferences['slices']}")
165
 
166
- # 4. Minimum Rating (>= selected)
167
  if 'rating' in preferences and preferences['rating'] is not None and 'Rating' in DF.columns:
168
  try:
169
  min_rating = float(preferences['rating'])
170
  rating_mask = DF.loc[current_indices, 'Rating'] >= min_rating
171
  current_indices = DF.loc[current_indices][rating_mask].index.to_list()
172
- current_app.logger.info(f"After rating filter (>= {min_rating}): {len(current_indices)} pizzas remaining")
173
  if not current_indices: return []
174
  except ValueError:
175
  current_app.logger.warning(f"Invalid value for rating: {preferences['rating']}")
176
 
177
- # 5. Max Preparation Time (<= selected)
178
- if 'prep_time' in preferences and preferences[
179
- 'prep_time'] is not None and 'Preparation_Time_min' in DF.columns: # Changed 'preptime' to 'prep_time' to match JS
180
  try:
181
- prep_time_str = str(preferences['prep_time']).lower().replace("min", "").strip()
182
- max_prep_time = int(prep_time_str)
183
  prep_mask = DF.loc[current_indices, 'Preparation_Time_min'] <= max_prep_time
184
  current_indices = DF.loc[current_indices][prep_mask].index.to_list()
185
- current_app.logger.info(
186
- f"After prep time filter (<= {max_prep_time}): {len(current_indices)} pizzas remaining")
187
  if not current_indices: return []
188
  except ValueError:
189
- current_app.logger.warning(f"Could not parse preptime value: {preferences['prep_time']}")
190
 
191
- # 6. Categorical Filters (Exact Match or Multi-select with OR logic)
 
192
  categorical_pref_map = {
193
  "servingsize": "Serving_Size", "populargroup": "Popular_Group",
194
  "dietarycategory": "Dietary_Category", "spicelevel": "Spice_Level",
@@ -196,158 +355,154 @@ def get_recommendations(preferences):
196
  "restaurantchain": "Restaurant_Chain", "seasonalavailability": "Seasonal_Availability",
197
  "breadtype": "Bread_Type", "crusttype": CRUST_TYPE_COL
198
  }
199
-
200
  for pref_key, df_col_name in categorical_pref_map.items():
201
- if df_col_name and pref_key in preferences and preferences[pref_key] and df_col_name in DF.columns:
202
- pref_value = preferences[pref_key]
203
-
204
- # If pref_value is a list (from multi-select) and not empty
205
- if isinstance(pref_value, list) and pref_value:
206
- cat_mask = DF.loc[current_indices, df_col_name].isin(pref_value)
207
- filtered_indices_count_before = len(current_indices)
208
- current_indices = DF.loc[current_indices][cat_mask].index.to_list()
209
- current_app.logger.info(
210
- f"After {pref_key} filter (isin {pref_value}): {len(current_indices)} from {filtered_indices_count_before} pizzas remaining")
211
- # Legacy: if it's a single string (though frontend should send list now)
212
- elif isinstance(pref_value, str) and pref_value and pref_value.lower() != "any":
213
- cat_mask = DF.loc[current_indices, df_col_name] == pref_value
214
- filtered_indices_count_before = len(current_indices)
215
- current_indices = DF.loc[current_indices][cat_mask].index.to_list()
216
- current_app.logger.info(
217
- f"After {pref_key} filter ('{pref_value}'): {len(current_indices)} from {filtered_indices_count_before} pizzas remaining")
218
- elif not pref_value: # Empty list or empty string means no filter for this category
219
- current_app.logger.info(
220
- f"Skipping filter for {pref_key} as no specific options were selected (value: {pref_value}).")
221
- continue
222
-
223
- if not current_indices: return []
224
 
225
  if not current_indices:
226
- current_app.logger.warning("No pizzas match all filter criteria after hard filters.")
227
  return []
228
 
229
  # --- Similarity Scoring Part ---
 
230
  valid_indices_for_feature_df = FEATURE_DF.index.intersection(current_indices)
231
  if valid_indices_for_feature_df.empty:
232
- current_app.logger.warning("No valid indices remain for feature DF after hard filters.")
233
  return []
234
 
235
  filtered_feature_df = FEATURE_DF.loc[valid_indices_for_feature_df]
236
- if filtered_feature_df.empty:
237
- current_app.logger.warning("Filtered feature DF is empty after hard filters.")
238
  return []
239
 
240
- user_vector = pd.Series(0.0, index=FEATURE_DF.columns)
 
241
 
242
- # Toppings for similarity
243
  if 'toppings' in preferences and preferences['toppings']:
244
  for topping in preferences['toppings']:
245
  col_name = f"Topping_{topping}"
246
  if col_name in user_vector.index:
247
  user_vector[col_name] = 1.0
248
 
249
- # Categorical for similarity
250
- js_to_df_key_map_for_vector = {
251
- "servingsize": "Serving_Size", "populargroup": "Popular_Group",
252
- "dietarycategory": "Dietary_Category", "saucetype": "Sauce_Type",
253
- "cheeseamount": "Cheese_Amount", "restaurantchain": "Restaurant_Chain",
254
- "seasonalavailability": "Seasonal_Availability", "breadtype": "Bread_Type",
255
- "spicelevel": "Spice_Level" # Add spicelevel here for one-hot encoding
256
- }
257
- if CRUST_TYPE_COL: js_to_df_key_map_for_vector["crusttype"] = CRUST_TYPE_COL
258
-
259
- for pref_key, df_col_name in js_to_df_key_map_for_vector.items():
260
- if pref_key in preferences and preferences[pref_key]:
261
- pref_values_for_vector = preferences[pref_key]
262
- # Ensure it's a list, even if frontend sent a single string (should be list)
263
- if not isinstance(pref_values_for_vector, list):
264
- pref_values_for_vector = [pref_values_for_vector]
265
-
266
- for val_item in pref_values_for_vector:
267
- if isinstance(val_item, str) and val_item.lower() == "any": # Should not happen with new UI
268
- continue
269
- col_name = f"{df_col_name}_{val_item}"
270
- if col_name in user_vector.index:
271
- user_vector[col_name] = 1.0
272
-
273
- # Numerical for similarity
274
  raw_user_num_prefs_dict = {}
275
- spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
276
 
277
  if 'price_range' in preferences and preferences['price_range']:
278
- raw_user_num_prefs_dict['Price'] = (float(preferences['price_range'][0]) + float(
279
- preferences['price_range'][1])) / 2
 
280
  if 'slices' in preferences and preferences['slices'] is not None:
281
- raw_user_num_prefs_dict['Slices'] = float(preferences['slices'])
 
282
  if 'rating' in preferences and preferences['rating'] is not None:
283
- raw_user_num_prefs_dict['Rating'] = float(preferences['rating'])
284
-
285
- # Handle numerical Spice_Level for user_vector
286
- # Only set if exactly one spice level is chosen in the multi-select.
287
- # The one-hot encoded versions are handled above.
288
- if 'spicelevel' in preferences and preferences['spicelevel']:
289
- selected_spice_levels = preferences['spicelevel']
290
- if isinstance(selected_spice_levels, list) and len(selected_spice_levels) == 1:
291
- # If only one specific spice level selected from multi-select
292
- spice_val_str = selected_spice_levels[0]
293
- if spice_val_str and spice_val_str.lower() != "any":
294
- raw_user_num_prefs_dict['Spice_Level'] = float(spice_map.get(spice_val_str, 1))
295
- # If multiple spice levels or "Any" (empty list), don't set numerical Spice_Level for user_vector.
296
- # The one-hot encoded versions will cover the preference.
297
-
298
- if 'prep_time' in preferences and preferences['prep_time'] is not None: # Changed 'preptime'
299
- try:
300
- prep_time_str = str(preferences['prep_time']).lower().replace("min", "").strip()
301
- raw_user_num_prefs_dict['Preparation_Time'] = float(prep_time_str)
302
- except ValueError:
303
- pass
304
-
305
- # Scaling numerical preferences for user_vector
306
  temp_scaling_df = pd.DataFrame(columns=NUMERICAL_COLS, index=[0])
307
  for col in NUMERICAL_COLS:
308
- temp_scaling_df.loc[0, col] = raw_user_num_prefs_dict.get(col, 0.0) # Use default if not in dict
309
-
310
- # Ensure all NUMERICAL_COLS exist in temp_scaling_df before transform
311
- for col in NUMERICAL_COLS:
312
- if col not in temp_scaling_df.columns:
313
- temp_scaling_df[col] = 0.0 # Default to 0 or mean if appropriate
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
314
 
315
- scaled_user_num_values = SCALER.transform(temp_scaling_df[NUMERICAL_COLS])[0]
316
- for i, col_name in enumerate(NUMERICAL_COLS):
317
- if col_name in raw_user_num_prefs_dict: # Only set if user specified this numerical pref
318
- user_vector[col_name] = scaled_user_num_values[i]
319
 
320
- # Similarity calculation
321
  feature_matrix_filtered = filtered_feature_df.values
322
  user_array = user_vector.values.reshape(1, -1)
323
 
 
324
  if user_array.shape[1] != feature_matrix_filtered.shape[1]:
325
  current_app.logger.error(
326
- f"Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}")
327
- # This can happen if new columns were added to FEATURE_DF after user_vector was initialized
328
- # Re-align user_vector to FEATURE_DF.columns
329
- aligned_user_vector = pd.Series(0.0, index=FEATURE_DF.columns)
330
- for col in user_vector.index:
331
- if col in aligned_user_vector.index:
332
- aligned_user_vector[col] = user_vector[col]
333
  user_array = aligned_user_vector.values.reshape(1, -1)
 
334
  if user_array.shape[1] != feature_matrix_filtered.shape[1]:
335
- current_app.logger.error(
336
- f"Persistent Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}")
337
  return []
338
 
 
339
  similarities = cosine_similarity(user_array, feature_matrix_filtered)[0]
 
340
  sorted_indices_in_filtered_df = similarities.argsort()[::-1]
 
341
  final_recommendation_indices = valid_indices_for_feature_df[sorted_indices_in_filtered_df]
342
 
 
343
  recommendations_list = []
344
- frontend_keys = [
 
 
345
  'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
346
  'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
347
  'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
348
  'bread_type', 'image_url', 'crust_type'
349
  ]
350
- df_to_frontend_map = {
351
  'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
352
  'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
353
  'description': 'Description', 'popular_group': 'Popular_Group',
@@ -362,184 +517,112 @@ def get_recommendations(preferences):
362
  for original_idx in final_recommendation_indices:
363
  pizza_series = DF.iloc[original_idx]
364
  rec_item = {}
365
- for key in frontend_keys:
366
- df_col = df_to_frontend_map.get(key)
367
  if key == 'id':
368
  rec_item[key] = int(original_idx)
369
  elif df_col and df_col in pizza_series:
370
  value = pizza_series[df_col]
371
- if isinstance(value, np.integer):
372
- value = int(value)
373
- elif isinstance(value, np.floating):
374
- value = float(value)
375
- elif isinstance(value, np.ndarray):
376
- value = value.tolist()
377
  rec_item[key] = "" if pd.isna(value) else value
378
- elif key == 'crust_type' and not CRUST_TYPE_COL:
379
- rec_item[key] = "N/A"
380
  else:
381
  rec_item[key] = ""
382
 
383
  rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
384
  rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
385
-
386
- for k_final, v_final in rec_item.items():
387
  if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
388
-
389
  recommendations_list.append(rec_item)
390
 
391
- current_app.logger.info(f"Final recommendations: {len(recommendations_list)} pizzas")
392
  return recommendations_list
393
 
394
 
395
- @app.route('/')
396
- def index_route():
397
- global DF, ALL_TOPPINGS, CATEGORICAL_FEATURES, CRUST_TYPE_COL
398
- if DF is None:
399
- current_app.logger.error("Data not loaded attempting to serve / route.")
400
- return "Error: Pizza data not loaded. Please check server logs.", 500
401
-
402
- filter_options = {}
403
- cols_for_filters = list(
404
- set(CATEGORICAL_FEATURES + ['Spice_Level'])) # Spice_Level might be in CATEGORICAL_FEATURES or separate
405
-
406
- if CRUST_TYPE_COL and CRUST_TYPE_COL not in cols_for_filters: # Ensure crust type is included if available
407
- cols_for_filters.append(CRUST_TYPE_COL)
408
-
409
- for col_name in cols_for_filters:
410
- if col_name in DF.columns:
411
- # Use a consistent key naming convention for JS
412
- key_name = col_name.lower().replace('_', '')
413
- # Special cases for consistency if needed, e.g. "spicelevel"
414
- if col_name == "Spice_Level": key_name = "spicelevel"
415
- if col_name == CRUST_TYPE_COL: key_name = "crusttype"
416
- # if col_name == "Serving_Size": key_name = "servingsize" # Example
417
-
418
- unique_values = sorted([v for v in DF[col_name].dropna().unique() if v != ''])
419
- filter_options[key_name] = unique_values # e.g. filter_options['spicelevel'] = ['Mild', 'Medium', 'Hot']
420
-
421
- default_recommendations_df = DF.sort_values('Rating', ascending=False).copy()
422
- default_recs_list = []
423
-
424
- frontend_keys = [
425
- 'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
426
- 'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
427
- 'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
428
- 'bread_type', 'image_url', 'crust_type'
429
- ]
430
- df_to_frontend_map = {
431
- 'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
432
- 'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
433
- 'description': 'Description', 'popular_group': 'Popular_Group',
434
- 'dietary_category': 'Dietary_Category', 'spice_level': 'Spice_Level',
435
- 'sauce_type': 'Sauce_Type', 'cheese_amount': 'Cheese_Amount',
436
- 'calories': 'Calories_per_Slice', 'allergens': 'Allergens',
437
- 'prep_time': 'Preparation_Time_min', 'restaurant': 'Restaurant_Chain',
438
- 'seasonal': 'Seasonal_Availability', 'bread_type': 'Bread_Type',
439
- 'image_url': 'Image_Url', 'crust_type': CRUST_TYPE_COL
440
- }
441
-
442
- for original_idx, pizza_row in default_recommendations_df.iterrows():
443
- rec_item = {}
444
- for key in frontend_keys:
445
- df_col = df_to_frontend_map.get(key)
446
- if key == 'id':
447
- rec_item[key] = int(original_idx)
448
- elif df_col and df_col in pizza_row:
449
- value = pizza_row[df_col]
450
- if isinstance(value, np.integer):
451
- value = int(value)
452
- elif isinstance(value, np.floating):
453
- value = float(value)
454
- elif isinstance(value, np.ndarray):
455
- value = value.tolist()
456
- rec_item[key] = "" if pd.isna(value) else value
457
- elif key == 'crust_type' and not CRUST_TYPE_COL:
458
- rec_item[key] = "N/A"
459
- else:
460
- rec_item[key] = ""
461
-
462
- rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
463
- rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
464
-
465
- for k, v in rec_item.items():
466
- if isinstance(v, np.generic):
467
- rec_item[k] = v.item()
468
-
469
- default_recs_list.append(rec_item)
470
-
471
- current_app.logger.info(f"Serving {len(default_recs_list)} pizzas for initial display.")
472
- current_app.logger.info(f"Filter options for template: {filter_options}")
473
-
474
- return render_template('index.html',
475
- toppings=ALL_TOPPINGS,
476
- # Pass filter_options directly, JS will use these
477
- filter_options=filter_options,
478
- default_recommendations=default_recs_list,
479
- default_image_url=DEFAULT_IMAGE_URL)
480
-
481
-
482
  @app.route('/recommend', methods=['POST'])
483
  def recommend():
484
  try:
485
  data = request.json
486
- preferences = {}
487
  current_app.logger.info(f"Received recommendation request with data: {data}")
488
 
489
- # Process all possible preferences
490
- # Keys should match what JS sends (e.g., 'servingsize', 'spicelevel')
491
- # Numerical/range preferences
492
- simple_numerical_prefs = ['slices', 'rating', 'prep_time'] # 'prep_time' not 'preptime'
493
- for key in simple_numerical_prefs:
494
- if key in data and data[key] is not None: # Allow 0 for rating
495
- # For range sliders, value might be a string that needs parsing, ensure it's correct type
496
  try:
497
- if key == 'rating':
498
- preferences[key] = float(data[key])
499
- else:
500
- preferences[key] = int(data[key]) # slices, prep_time
501
  except ValueError:
502
- current_app.logger.warning(f"Could not parse numerical preference {key}: {data[key]}")
503
-
504
  if 'price_range' in data and data['price_range']:
505
  try:
506
  preferences['price_range'] = [float(p) for p in data['price_range']]
507
  except (ValueError, TypeError):
508
- current_app.logger.warning(f"Could not parse price_range: {data['price_range']}")
509
 
510
- # Multi-select categorical preferences (including toppings)
511
- # Keys like 'toppings', 'servingsize', 'dietarycategory', 'spicelevel', etc.
512
- multi_select_prefs = [
513
  'toppings', 'servingsize', 'populargroup', 'dietarycategory',
514
  'spicelevel', 'saucetype', 'cheeseamount', 'restaurantchain',
515
  'seasonalavailability', 'breadtype', 'crusttype'
516
  ]
517
- for key in multi_select_prefs:
518
- if key in data and isinstance(data[key], list): # Expecting a list
519
- preferences[key] = data[key] # Store the list (can be empty)
520
- elif key in data: # If not a list, log warning but try to process if it's a single string
521
- current_app.logger.warning(
522
- f"Preference for {key} was not a list: {data[key]}. Processing as single if string.")
523
- if isinstance(data[key], str) and data[key]:
524
- preferences[key] = [data[key]] # Wrap single string in a list for consistency
525
- else: # If not string or empty, treat as no preference for this key
526
- preferences[key] = []
527
 
528
  current_app.logger.info(f"Processed preferences for filtering: {preferences}")
529
  recommendations = get_recommendations(preferences)
530
  current_app.logger.info(f"Returning {len(recommendations)} recommendations after filtering and scoring.")
531
  return jsonify(recommendations)
 
532
  except Exception as e:
533
- current_app.logger.error(f"Error in /recommend: {e}", exc_info=True)
534
  return jsonify({"error": "Failed to get recommendations due to a server issue.", "details": str(e)}), 500
535
 
536
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
537
  if __name__ == '__main__':
538
- try:
539
- preprocess_data()
540
- app.run(debug=True, use_reloader=False) # use_reloader=False is good for dev with global vars
541
- except FileNotFoundError as e:
542
- logger.critical(f"CRITICAL ERROR: {e}. Ensure 'pizza.csv' is present.")
543
- except Exception as e:
544
- logger.critical(f"Unexpected critical startup error: {e}", exc_info=True)
545
- # ... rest of the function
 
 
 
 
 
 
6
  import os
7
  import logging
8
 
9
+ # --- Logging Configuration ---
10
+ # Ensure logging is configured before any loggers are potentially used by imported modules
11
  logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s [in %(pathname)s:%(lineno)d]')
12
  logger = logging.getLogger(__name__)
13
 
14
  app = Flask(__name__)
15
 
16
+ # --- Global Variables ---
17
  DF = None
18
  ALL_TOPPINGS = []
19
  FEATURE_DF = None
20
+ SCALER = None # Will be initialized in preprocess_data
21
  NUMERICAL_COLS = ['Price', 'Slices', 'Rating', 'Spice_Level', 'Preparation_Time', 'Calories']
22
  CATEGORICAL_FEATURES = [
23
  'Serving_Size', 'Popular_Group', 'Dietary_Category',
 
30
 
31
  def preprocess_data(df_path='pizza.csv'):
32
  global DF, ALL_TOPPINGS, FEATURE_DF, SCALER, CATEGORICAL_FEATURES, CRUST_TYPE_COL
33
+ logger.info(f"Attempting to preprocess data from relative path: {df_path}")
34
 
35
+ # Construct absolute path for the CSV file
36
+ # This is crucial for environments like Docker where working directory might differ
37
+ base_dir = os.path.dirname(os.path.abspath(__file__)) # Directory of the current script (app.py)
38
+ absolute_df_path = os.path.join(base_dir, df_path)
39
+ logger.info(f"Absolute path for CSV: {absolute_df_path}")
40
 
41
+ if not os.path.exists(absolute_df_path):
42
+ logger.error(f"Dataset file '{absolute_df_path}' not found.")
43
+ raise FileNotFoundError(f"Dataset file '{absolute_df_path}' not found. Ensure it's in the same directory as app.py.")
44
+
45
+ DF = pd.read_csv(absolute_df_path)
46
+ logger.info(f"Successfully loaded '{absolute_df_path}'. Original DataFrame shape: {DF.shape}")
47
  logger.info(f"Original DataFrame columns: {DF.columns.tolist()}")
48
 
49
+ # Determine Crust Type Column
50
  potential_crust_cols = ['Crust_Type', 'Cr_Type']
51
  valid_crust_cols = [col for col in potential_crust_cols if col in DF.columns]
52
  if valid_crust_cols:
53
+ valid_crust_cols.sort(key=lambda col: DF[col].isnull().sum()) # Prefer column with fewer NaNs
54
  CRUST_TYPE_COL = valid_crust_cols[0]
55
  logger.info(f"Using '{CRUST_TYPE_COL}' for crust type.")
56
  if CRUST_TYPE_COL not in CATEGORICAL_FEATURES:
57
  CATEGORICAL_FEATURES.append(CRUST_TYPE_COL)
58
+ # Remove other potential crust columns if they were in CATEGORICAL_FEATURES
59
  for col in potential_crust_cols:
60
  if col != CRUST_TYPE_COL and col in CATEGORICAL_FEATURES:
61
  CATEGORICAL_FEATURES.remove(col)
62
  else:
63
+ logger.warning("Crust type column (Crust_Type or Cr_Type) not found. Crust type will not be used.")
64
  CRUST_TYPE_COL = None
65
 
66
+ # Fill NaN for text-based categorical columns and other text fields
67
+ text_cols_to_fill = list(set(CATEGORICAL_FEATURES + ['Toppings', 'Description', 'Allergens', 'Image_Url', 'Pizza_Name']))
68
+ for col in text_cols_to_fill:
69
+ if col and col in DF.columns: # Ensure col is not None (e.g. if CRUST_TYPE_COL is None)
70
  DF[col] = DF[col].fillna('')
71
+ logger.info("Filled NaNs in text-based categorical columns with empty strings.")
72
 
73
+ # Fill NaN for numerical columns from the CSV
74
+ numerical_cols_in_df = ['Price_Rs', 'Slices', 'Rating', 'Rating_Count', 'Preparation_Time_min', 'Calories_per_Slice']
75
  for col in numerical_cols_in_df:
76
  if col in DF.columns:
77
  if pd.api.types.is_numeric_dtype(DF[col]):
78
+ median_val = DF[col].median()
79
+ DF[col] = DF[col].fillna(median_val)
80
+ logger.info(f"Filled NaNs in numerical column '{col}' with its median ({median_val}).")
81
  else:
82
+ # Attempt to convert to numeric, then fill with median or 0
83
+ numeric_series = pd.to_numeric(DF[col], errors='coerce')
84
+ median_val = 0
85
+ if not numeric_series.isnull().all():
86
+ median_val = numeric_series.median()
87
+ DF[col] = numeric_series.fillna(median_val)
88
+ logger.warning(f"Column '{col}' was not purely numeric. Converted to numeric, filled NaNs with median/0 ({median_val}).")
89
+ else:
90
+ logger.warning(f"Expected numerical column '{col}' not found in DataFrame. It will be missing from features if not handled.")
91
+
92
+
93
+ if 'Rating_Count' in DF.columns:
94
+ DF['Rating_Count'] = DF['Rating_Count'].fillna(0).astype(int)
95
+
96
+ # Process Toppings
97
+ if 'Toppings' in DF.columns:
98
+ DF['Toppings_list_internal'] = DF['Toppings'].astype(str).str.split(r';\s*') # Use raw string for regex
99
+ DF['Toppings_list_internal'] = DF['Toppings_list_internal'].apply(
100
+ lambda x: [t.strip() for t in x if isinstance(t, str) and t.strip()]) # Filter out empty strings after split
101
+ current_all_toppings = set()
102
+ for toppings_list in DF['Toppings_list_internal'].dropna():
103
+ current_all_toppings.update(t for t in toppings_list if t) # Ensure t is not empty
104
+ ALL_TOPPINGS = sorted(list(current_all_toppings))
105
+ logger.info(f"Found {len(ALL_TOPPINGS)} unique toppings. Example: {ALL_TOPPINGS[:5] if ALL_TOPPINGS else 'None'}")
106
+ else:
107
+ logger.warning("'Toppings' column not found. Topping features will be empty.")
108
+ DF['Toppings_list_internal'] = pd.Series([[] for _ in range(len(DF))]) # Empty list for all rows
109
+ ALL_TOPPINGS = []
110
 
 
 
 
 
 
111
 
112
+ # --- Feature Engineering ---
113
  feature_data = {}
114
  num_feature_map = {
115
  'Price': 'Price_Rs', 'Slices': 'Slices', 'Rating': 'Rating',
 
119
  if df_col in DF.columns:
120
  feature_data[feature_col] = DF[df_col].copy()
121
  else:
122
+ logger.warning(f"Numerical source column '{df_col}' for feature '{feature_col}' not found. Filling with zeros.")
123
+ feature_data[feature_col] = pd.Series([0.0] * len(DF)) # Ensure float for consistency
124
 
125
+ # Spice Level Feature (Numerical)
126
  if 'Spice_Level' in DF.columns:
127
+ DF['Spice_Level'] = DF['Spice_Level'].fillna('Mild') # Default for NaNs
128
  spice_map = {'Mild': 1, 'Medium': 2, 'Hot': 3}
129
+ feature_data['Spice_Level'] = DF['Spice_Level'].map(spice_map).fillna(1.0) # Ensure float
130
  else:
131
+ logger.warning("'Spice_Level' column not found. Filling 'Spice_Level' feature with default (1.0).")
132
+ feature_data['Spice_Level'] = pd.Series([1.0] * len(DF)) # Default if column is missing
133
 
134
+ # One-Hot Encode Categorical Features
135
  for feature_cat_col in CATEGORICAL_FEATURES:
136
+ if feature_cat_col and feature_cat_col in DF.columns: # Check if col_name is not None and exists
137
+ # Ensure the column is treated as string to avoid issues with mixed types in unique()
138
+ DF[feature_cat_col] = DF[feature_cat_col].astype(str)
139
  for value in DF[feature_cat_col].unique():
140
+ if pd.notnull(value) and value.strip() != '': # Check for non-null and non-empty string values
141
  feature_data[f"{feature_cat_col}_{value}"] = (DF[feature_cat_col] == value).astype(int)
142
+ elif feature_cat_col: # Log warning only if feature_cat_col was defined
143
+ logger.warning(f"Categorical source column '{feature_cat_col}' for one-hot encoding not found in DataFrame.")
144
 
145
+ # Topping Features (One-Hot Encoded)
146
  for topping in ALL_TOPPINGS:
147
+ if topping: # Ensure topping string is not empty
148
  feature_data[f"Topping_{topping}"] = DF['Toppings_list_internal'].apply(
149
  lambda x: 1 if topping in x else 0
150
  )
151
 
152
  FEATURE_DF = pd.DataFrame(feature_data)
153
+ logger.info(f"FEATURE_DF created. Shape: {FEATURE_DF.shape}. Columns: {FEATURE_DF.columns.tolist()[:10]}...") # Log first 10 cols
154
+
155
+ # Ensure all NUMERICAL_COLS exist in FEATURE_DF and fill NaNs
156
  for col in NUMERICAL_COLS:
157
+ if col not in FEATURE_DF.columns:
158
+ logger.warning(f"Numerical column '{col}' is missing from FEATURE_DF after construction. Adding as zeros.")
159
+ FEATURE_DF[col] = 0.0 # Ensure float
160
  if FEATURE_DF[col].isnull().any():
161
+ mean_val = FEATURE_DF[col].mean()
162
+ fill_val = mean_val if pd.notna(mean_val) else 0.0
163
+ logger.info(f"Filling NaNs in numerical feature column '{col}' with {fill_val}.")
164
+ FEATURE_DF[col] = FEATURE_DF[col].fillna(fill_val)
165
+
166
+ # Scale Numerical Features
167
+ SCALER = MinMaxScaler() # Initialize scaler
168
+ if not FEATURE_DF.empty and all(col in FEATURE_DF.columns for col in NUMERICAL_COLS):
169
+ try:
170
+ FEATURE_DF[NUMERICAL_COLS] = SCALER.fit_transform(FEATURE_DF[NUMERICAL_COLS])
171
+ logger.info(f"Numerical columns ({NUMERICAL_COLS}) scaled. FEATURE_DF shape: {FEATURE_DF.shape}")
172
+ except Exception as e:
173
+ logger.error(f"Error during scaling of numerical columns: {e}. FEATURE_DF might be problematic.")
174
+ # Fallback: Keep numerical columns unscaled if scaling fails, or handle as needed
175
+ elif FEATURE_DF.empty:
176
+ logger.error("FEATURE_DF is empty before scaling. Scaling skipped. This will likely cause issues.")
177
+ else:
178
+ missing_cols = [col for col in NUMERICAL_COLS if col not in FEATURE_DF.columns]
179
+ logger.error(f"Not all numerical columns ({NUMERICAL_COLS}) found in FEATURE_DF for scaling. Missing: {missing_cols}. Scaling skipped.")
180
+
181
+ logger.info(f"Preprocessing done. DF is None: {DF is None}, FEATURE_DF is None: {FEATURE_DF is None}, SCALER is None: {SCALER is None}")
182
+ if FEATURE_DF is not None:
183
+ logger.info(f"Final FEATURE_DF shape: {FEATURE_DF.shape}")
184
+ if DF is not None:
185
+ logger.info(f"Final DF shape: {DF.shape}")
186
 
187
+
188
+ @app.route('/')
189
+ def index_route():
190
+ global DF, ALL_TOPPINGS, CATEGORICAL_FEATURES, CRUST_TYPE_COL, FEATURE_DF, DEFAULT_IMAGE_URL
191
+ # Critical check at the beginning of the route
192
+ if DF is None:
193
+ current_app.logger.error("DF is None when trying to serve '/'. Data preprocessing might have failed or not run.")
194
+ return "Error: Pizza data (DF) not loaded. Please check server logs.", 500
195
+ if FEATURE_DF is None: # Also check FEATURE_DF as it's derived
196
+ current_app.logger.error("FEATURE_DF is None when trying to serve '/'. Data preprocessing might have failed.")
197
+ return "Error: Pizza feature data (FEATURE_DF) not loaded. Please check server logs.", 500
198
+
199
+ filter_options = {}
200
+ # Ensure 'Spice_Level' is included for filter options if it exists in DF
201
+ cols_for_filters_set = set(cat_col for cat_col in CATEGORICAL_FEATURES if cat_col and cat_col in DF.columns) # Filter out None or non-existent
202
+ if 'Spice_Level' in DF.columns:
203
+ cols_for_filters_set.add('Spice_Level')
204
+ # CRUST_TYPE_COL is already in CATEGORICAL_FEATURES if found
205
+
206
+ for col_name in list(cols_for_filters_set):
207
+ # key_name for JS should be consistent (lowercase, no underscores)
208
+ key_name = col_name.lower().replace('_', '')
209
+ # No special handling for spicelevel or crusttype here, it's naturally handled by the line above.
210
+
211
+ unique_values = sorted([v for v in DF[col_name].astype(str).dropna().unique() if v.strip() != ''])
212
+ if unique_values: # Only add if there are actual values
213
+ filter_options[key_name] = unique_values
214
+
215
+ # Prepare default recommendations (e.g., top-rated)
216
+ # Make sure 'Rating' column exists
217
+ if 'Rating' in DF.columns:
218
+ default_recommendations_df = DF.sort_values('Rating', ascending=False).copy()
219
+ else:
220
+ logger.warning("'Rating' column not found in DF. Cannot sort for default recommendations. Using unsorted DF.")
221
+ default_recommendations_df = DF.copy() # Fallback to unsorted
222
+
223
+ default_recs_list = []
224
+ frontend_keys = [
225
+ 'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
226
+ 'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
227
+ 'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
228
+ 'bread_type', 'image_url', 'crust_type'
229
+ ]
230
+ df_to_frontend_map = {
231
+ 'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
232
+ 'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
233
+ 'description': 'Description', 'popular_group': 'Popular_Group',
234
+ 'dietary_category': 'Dietary_Category', 'spice_level': 'Spice_Level',
235
+ 'sauce_type': 'Sauce_Type', 'cheese_amount': 'Cheese_Amount',
236
+ 'calories': 'Calories_per_Slice', 'allergens': 'Allergens',
237
+ 'prep_time': 'Preparation_Time_min', 'restaurant': 'Restaurant_Chain',
238
+ 'seasonal': 'Seasonal_Availability', 'bread_type': 'Bread_Type',
239
+ 'image_url': 'Image_Url', 'crust_type': CRUST_TYPE_COL # Uses the determined CRUST_TYPE_COL
240
+ }
241
+
242
+ for original_idx, pizza_row in default_recommendations_df.iterrows():
243
+ rec_item = {}
244
+ for key in frontend_keys:
245
+ df_col = df_to_frontend_map.get(key)
246
+ if key == 'id':
247
+ rec_item[key] = int(original_idx) # Pizza ID is its original index in DF
248
+ elif df_col and df_col in pizza_row: # df_col can be None for 'id' or if CRUST_TYPE_COL is None
249
+ value = pizza_row[df_col]
250
+ # Type conversions for JSON serializability
251
+ if isinstance(value, np.integer): value = int(value)
252
+ elif isinstance(value, np.floating): value = float(value)
253
+ elif isinstance(value, np.ndarray): value = value.tolist()
254
+ rec_item[key] = "" if pd.isna(value) else value
255
+ elif key == 'crust_type' and not CRUST_TYPE_COL : # If CRUST_TYPE_COL was not found
256
+ rec_item[key] = "N/A"
257
+ else:
258
+ rec_item[key] = "" # Default for missing fields
259
+
260
+ rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0) # Ensure int
261
+ rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
262
+
263
+ # Final pass to convert any remaining numpy generic types
264
+ for k_final, v_final in rec_item.items():
265
+ if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
266
+ default_recs_list.append(rec_item)
267
+
268
+ current_app.logger.info(f"Serving {len(default_recs_list)} pizzas for initial display.")
269
+ current_app.logger.info(f"Filter options for template: {filter_options}")
270
+ current_app.logger.info(f"ALL_TOPPINGS for template: {ALL_TOPPINGS[:5] if ALL_TOPPINGS else 'None'}")
271
+
272
+
273
+ return render_template('index.html',
274
+ toppings=ALL_TOPPINGS,
275
+ filter_options=filter_options,
276
+ default_recommendations=default_recs_list,
277
+ default_image_url=DEFAULT_IMAGE_URL)
278
 
279
 
280
  def get_recommendations(preferences):
281
  global DF, FEATURE_DF, SCALER, CRUST_TYPE_COL, DEFAULT_IMAGE_URL
282
 
283
+ if DF is None or FEATURE_DF is None or SCALER is None:
284
+ current_app.logger.error("Data not fully initialized (DF, FEATURE_DF, or SCALER is None) for get_recommendations.")
285
  return []
286
 
287
  current_indices = DF.index.to_list()
288
  current_app.logger.info(f"Starting with {len(current_indices)} pizzas before filtering. Preferences: {preferences}")
289
 
290
+ # --- Hard Filters ---
291
+ # 1. Toppings
292
+ if 'toppings' in preferences and preferences['toppings'] and 'Toppings_list_internal' in DF.columns:
293
  selected_toppings = set(preferences['toppings'])
294
+ if selected_toppings: # Ensure not an empty list that would select nothing
295
  topping_mask = DF.loc[current_indices, 'Toppings_list_internal'].apply(
296
+ lambda x_toppings: isinstance(x_toppings, list) and any(t in selected_toppings for t in x_toppings)
297
+ )
298
  current_indices = DF.loc[current_indices][topping_mask].index.to_list()
299
  current_app.logger.info(f"After toppings filter: {len(current_indices)} pizzas remaining")
300
  if not current_indices: return []
301
 
302
  # 2. Max Price
303
  if 'price_range' in preferences and preferences['price_range'] and 'Price_Rs' in DF.columns:
304
+ try:
305
+ min_price = float(preferences['price_range'][0])
306
+ max_price = float(preferences['price_range'][1])
307
+ price_mask = (DF.loc[current_indices, 'Price_Rs'] >= min_price) & \
308
+ (DF.loc[current_indices, 'Price_Rs'] <= max_price)
309
+ current_indices = DF.loc[current_indices][price_mask].index.to_list()
310
+ current_app.logger.info(f"After price filter ({min_price}-{max_price}): {len(current_indices)} pizzas")
311
+ if not current_indices: return []
312
+ except (TypeError, ValueError, IndexError) as e:
313
+ current_app.logger.warning(f"Invalid price_range preference: {preferences['price_range']}. Error: {e}")
314
+
315
+
316
+ # 3. Number of Slices (Min Slices)
317
  if 'slices' in preferences and preferences['slices'] is not None and 'Slices' in DF.columns:
318
  try:
319
  min_slices = int(preferences['slices'])
320
  slices_mask = DF.loc[current_indices, 'Slices'] >= min_slices
321
  current_indices = DF.loc[current_indices][slices_mask].index.to_list()
322
+ current_app.logger.info(f"After slices filter (>= {min_slices}): {len(current_indices)} pizzas")
323
  if not current_indices: return []
324
  except ValueError:
325
  current_app.logger.warning(f"Invalid value for slices: {preferences['slices']}")
326
 
327
+ # 4. Minimum Rating
328
  if 'rating' in preferences and preferences['rating'] is not None and 'Rating' in DF.columns:
329
  try:
330
  min_rating = float(preferences['rating'])
331
  rating_mask = DF.loc[current_indices, 'Rating'] >= min_rating
332
  current_indices = DF.loc[current_indices][rating_mask].index.to_list()
333
+ current_app.logger.info(f"After rating filter (>= {min_rating}): {len(current_indices)} pizzas")
334
  if not current_indices: return []
335
  except ValueError:
336
  current_app.logger.warning(f"Invalid value for rating: {preferences['rating']}")
337
 
338
+ # 5. Max Preparation Time
339
+ if 'prep_time' in preferences and preferences['prep_time'] is not None and 'Preparation_Time_min' in DF.columns:
 
340
  try:
341
+ max_prep_time = int(str(preferences['prep_time']).lower().replace("min", "").strip())
 
342
  prep_mask = DF.loc[current_indices, 'Preparation_Time_min'] <= max_prep_time
343
  current_indices = DF.loc[current_indices][prep_mask].index.to_list()
344
+ current_app.logger.info(f"After prep time filter (<= {max_prep_time}): {len(current_indices)} pizzas")
 
345
  if not current_indices: return []
346
  except ValueError:
347
+ current_app.logger.warning(f"Could not parse prep_time value: {preferences['prep_time']}")
348
 
349
+ # 6. Categorical Filters (Multi-select OR logic)
350
+ # JS keys: servingsize, populargroup, dietarycategory, spicelevel, saucetype, etc.
351
  categorical_pref_map = {
352
  "servingsize": "Serving_Size", "populargroup": "Popular_Group",
353
  "dietarycategory": "Dietary_Category", "spicelevel": "Spice_Level",
 
355
  "restaurantchain": "Restaurant_Chain", "seasonalavailability": "Seasonal_Availability",
356
  "breadtype": "Bread_Type", "crusttype": CRUST_TYPE_COL
357
  }
 
358
  for pref_key, df_col_name in categorical_pref_map.items():
359
+ if df_col_name and pref_key in preferences and preferences[pref_key]: # Ensure df_col_name is not None
360
+ pref_value_list = preferences[pref_key] # Expected to be a list from JS
361
+ if isinstance(pref_value_list, list) and pref_value_list: # If list is not empty
362
+ if df_col_name in DF.columns:
363
+ cat_mask = DF.loc[current_indices, df_col_name].isin(pref_value_list)
364
+ current_indices = DF.loc[current_indices][cat_mask].index.to_list()
365
+ current_app.logger.info(f"After {pref_key} filter (isin {pref_value_list}): {len(current_indices)} pizzas")
366
+ if not current_indices: return []
367
+ else:
368
+ current_app.logger.warning(f"Column '{df_col_name}' for preference '{pref_key}' not found in DF. Filter skipped.")
369
+ # If pref_value_list is empty, it means "Any" for this category, so no filtering.
 
 
 
 
 
 
 
 
 
 
 
 
370
 
371
  if not current_indices:
372
+ current_app.logger.info("No pizzas match all hard filter criteria.")
373
  return []
374
 
375
  # --- Similarity Scoring Part ---
376
+ # Filter FEATURE_DF to only include pizzas remaining after hard filters
377
  valid_indices_for_feature_df = FEATURE_DF.index.intersection(current_indices)
378
  if valid_indices_for_feature_df.empty:
379
+ current_app.logger.info("No valid indices remain for FEATURE_DF after hard filters.")
380
  return []
381
 
382
  filtered_feature_df = FEATURE_DF.loc[valid_indices_for_feature_df]
383
+ if filtered_feature_df.empty: # Should not happen if valid_indices_for_feature_df is not empty
384
+ current_app.logger.warning("Filtered FEATURE_DF is empty. This is unexpected.")
385
  return []
386
 
387
+ # Create User Preference Vector (aligned with FEATURE_DF columns)
388
+ user_vector = pd.Series(0.0, index=FEATURE_DF.columns) # Initialize with 0.0 for float consistency
389
 
390
+ # 1. Toppings in User Vector
391
  if 'toppings' in preferences and preferences['toppings']:
392
  for topping in preferences['toppings']:
393
  col_name = f"Topping_{topping}"
394
  if col_name in user_vector.index:
395
  user_vector[col_name] = 1.0
396
 
397
+ # 2. Categorical Preferences (One-Hot) in User Vector
398
+ # js_to_df_key_map_for_vector is same as categorical_pref_map but df_col_name is for one-hot prefix
399
+ for pref_key, df_col_prefix in categorical_pref_map.items():
400
+ if df_col_prefix and pref_key in preferences and preferences[pref_key]: # df_col_prefix can be None for CRUST_TYPE_COL
401
+ selected_values = preferences[pref_key] # This is a list
402
+ for val_item in selected_values:
403
+ # Construct the one-hot encoded column name (e.g., "Spice_Level_Mild")
404
+ one_hot_col_name = f"{df_col_prefix}_{val_item}"
405
+ if one_hot_col_name in user_vector.index:
406
+ user_vector[one_hot_col_name] = 1.0
407
+
408
+ # 3. Numerical Preferences in User Vector
 
 
 
 
 
 
 
 
 
 
 
 
 
409
  raw_user_num_prefs_dict = {}
410
+ spice_map_for_num_pref = {'Mild': 1.0, 'Medium': 2.0, 'Hot': 3.0} # Use floats
411
 
412
  if 'price_range' in preferences and preferences['price_range']:
413
+ try: # Average of min/max price for preference
414
+ raw_user_num_prefs_dict['Price'] = (float(preferences['price_range'][0]) + float(preferences['price_range'][1])) / 2
415
+ except: pass # Ignore if parsing fails
416
  if 'slices' in preferences and preferences['slices'] is not None:
417
+ try: raw_user_num_prefs_dict['Slices'] = float(preferences['slices'])
418
+ except: pass
419
  if 'rating' in preferences and preferences['rating'] is not None:
420
+ try: raw_user_num_prefs_dict['Rating'] = float(preferences['rating'])
421
+ except: pass
422
+ if 'prep_time' in preferences and preferences['prep_time'] is not None:
423
+ try: raw_user_num_prefs_dict['Preparation_Time'] = float(str(preferences['prep_time']).lower().replace("min","").strip())
424
+ except: pass
425
+ # Numerical Spice_Level: Only if *one* spice level is selected, use its mapped value.
426
+ # Otherwise, rely on the one-hot encoded spice level features.
427
+ if 'spicelevel' in preferences and isinstance(preferences['spicelevel'], list) and len(preferences['spicelevel']) == 1:
428
+ selected_spice = preferences['spicelevel'][0]
429
+ if selected_spice in spice_map_for_num_pref:
430
+ raw_user_num_prefs_dict['Spice_Level'] = spice_map_for_num_pref[selected_spice]
431
+
432
+ # Scale these raw numerical preferences using the SCALER
433
+ # Create a temporary DataFrame for scaling, ensuring all NUMERICAL_COLS are present
 
 
 
 
 
 
 
 
 
434
  temp_scaling_df = pd.DataFrame(columns=NUMERICAL_COLS, index=[0])
435
  for col in NUMERICAL_COLS:
436
+ # Default to the column's mean from FEATURE_DF if user didn't specify,
437
+ # or 0 if that's also not available (shouldn't happen if SCALER is fit)
438
+ # SCALER.data_min_ / SCALER.data_max_ or SCALER.mean_ could be used if available
439
+ default_val = 0.0
440
+ if hasattr(SCALER, 'data_min_') and col in FEATURE_DF.columns: # Check if scaler is fit and col exists
441
+ # Use the minimum of the scaled range as a neutral default if user didn't specify
442
+ col_idx_in_scaler = -1
443
+ try: col_idx_in_scaler = NUMERICAL_COLS.index(col)
444
+ except ValueError: pass
445
+
446
+ if col_idx_in_scaler != -1 and col_idx_in_scaler < len(SCALER.data_min_):
447
+ default_val = SCALER.data_min_[col_idx_in_scaler] # This is the original min, not scaled min (0)
448
+ else: # Fallback if col not in NUMERICAL_COLS used for SCALER fitting
449
+ logger.warning(f"Column {col} not found in SCALER's fitted columns during user vector creation. Defaulting to 0.")
450
+
451
+ temp_scaling_df.loc[0, col] = raw_user_num_prefs_dict.get(col, default_val)
452
+
453
+
454
+ if hasattr(SCALER, 'n_features_in_') : # Check if scaler has been fit
455
+ scaled_user_num_values = SCALER.transform(temp_scaling_df[NUMERICAL_COLS])[0]
456
+ for i, col_name in enumerate(NUMERICAL_COLS):
457
+ if col_name in raw_user_num_prefs_dict: # Only update user_vector if user specified this preference
458
+ user_vector[col_name] = scaled_user_num_values[i]
459
+ else:
460
+ logger.warning("SCALER is not fit. Cannot scale user's numerical preferences. Using raw values (0-1 range assumed).")
461
+ for col_name in NUMERICAL_COLS:
462
+ if col_name in raw_user_num_prefs_dict:
463
+ # Attempt a rough normalization if scaler is not fit, assuming values are in a reasonable range
464
+ # This is a fallback and might not be accurate.
465
+ user_vector[col_name] = raw_user_num_prefs_dict[col_name] / 100.0 # Example, needs domain knowledge
466
 
 
 
 
 
467
 
468
+ # Calculate Cosine Similarities
469
  feature_matrix_filtered = filtered_feature_df.values
470
  user_array = user_vector.values.reshape(1, -1)
471
 
472
+ # Ensure shapes match if FEATURE_DF columns changed dynamically (should not happen with current setup)
473
  if user_array.shape[1] != feature_matrix_filtered.shape[1]:
474
  current_app.logger.error(
475
+ f"Shape mismatch! User vector: {user_array.shape}, Feature matrix: {feature_matrix_filtered.shape}. "
476
+ f"User cols: {user_vector.index.tolist()[:5]}, Feature cols: {filtered_feature_df.columns.tolist()[:5]}"
477
+ )
478
+ # Attempt to align columns as a robust measure, though this indicates a deeper issue if it occurs.
479
+ common_cols = filtered_feature_df.columns.intersection(user_vector.index)
480
+ aligned_user_vector = pd.Series(0.0, index=filtered_feature_df.columns)
481
+ aligned_user_vector[common_cols] = user_vector[common_cols]
482
  user_array = aligned_user_vector.values.reshape(1, -1)
483
+
484
  if user_array.shape[1] != feature_matrix_filtered.shape[1]:
485
+ current_app.logger.critical(f"Persistent shape mismatch even after alignment. Cannot compute similarity.")
 
486
  return []
487
 
488
+
489
  similarities = cosine_similarity(user_array, feature_matrix_filtered)[0]
490
+ # Get indices sorted by similarity (descending) from the filtered_feature_df
491
  sorted_indices_in_filtered_df = similarities.argsort()[::-1]
492
+ # Map these sorted indices back to original DF indices
493
  final_recommendation_indices = valid_indices_for_feature_df[sorted_indices_in_filtered_df]
494
 
495
+ # Prepare list of recommendations
496
  recommendations_list = []
497
+ # frontend_keys and df_to_frontend_map are defined in index_route, can be reused or redefined here
498
+ # For safety, redefine here or pass as argument if refactoring
499
+ frontend_keys_rec = [
500
  'id', 'name', 'toppings', 'price', 'slices', 'serving_size', 'rating', 'rating_count',
501
  'description', 'popular_group', 'dietary_category', 'spice_level', 'sauce_type',
502
  'cheese_amount', 'calories', 'allergens', 'prep_time', 'restaurant', 'seasonal',
503
  'bread_type', 'image_url', 'crust_type'
504
  ]
505
+ df_to_frontend_map_rec = {
506
  'id': None, 'name': 'Pizza_Name', 'toppings': 'Toppings', 'price': 'Price_Rs', 'slices': 'Slices',
507
  'serving_size': 'Serving_Size', 'rating': 'Rating', 'rating_count': 'Rating_Count',
508
  'description': 'Description', 'popular_group': 'Popular_Group',
 
517
  for original_idx in final_recommendation_indices:
518
  pizza_series = DF.iloc[original_idx]
519
  rec_item = {}
520
+ for key in frontend_keys_rec:
521
+ df_col = df_to_frontend_map_rec.get(key)
522
  if key == 'id':
523
  rec_item[key] = int(original_idx)
524
  elif df_col and df_col in pizza_series:
525
  value = pizza_series[df_col]
526
+ if isinstance(value, np.integer): value = int(value)
527
+ elif isinstance(value, np.floating): value = float(value)
528
+ elif isinstance(value, np.ndarray): value = value.tolist()
 
 
 
529
  rec_item[key] = "" if pd.isna(value) else value
530
+ elif key == 'crust_type' and not CRUST_TYPE_COL :
531
+ rec_item[key] = "N/A"
532
  else:
533
  rec_item[key] = ""
534
 
535
  rec_item['rating_count'] = int(rec_item.get('rating_count', 0) or 0)
536
  rec_item['image_url'] = rec_item.get('image_url') if rec_item.get('image_url') else DEFAULT_IMAGE_URL
537
+ for k_final, v_final in rec_item.items(): # Final numpy type check
 
538
  if isinstance(v_final, np.generic): rec_item[k_final] = v_final.item()
 
539
  recommendations_list.append(rec_item)
540
 
541
+ current_app.logger.info(f"Final recommendations count: {len(recommendations_list)}")
542
  return recommendations_list
543
 
544
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
545
  @app.route('/recommend', methods=['POST'])
546
  def recommend():
547
  try:
548
  data = request.json
549
+ preferences = {} # Store processed preferences
550
  current_app.logger.info(f"Received recommendation request with data: {data}")
551
 
552
+ # Numerical/Range preferences from JS
553
+ # Keys in `data` should match JS: 'slices', 'rating', 'prep_time', 'price_range'
554
+ simple_numerical_prefs_js = ['slices', 'rating', 'prep_time']
555
+ for key_js in simple_numerical_prefs_js:
556
+ if key_js in data and data[key_js] is not None:
 
 
557
  try:
558
+ if key_js == 'rating': preferences[key_js] = float(data[key_js])
559
+ else: preferences[key_js] = int(data[key_js]) # slices, prep_time
 
 
560
  except ValueError:
561
+ current_app.logger.warning(f"Could not parse numerical preference '{key_js}': {data[key_js]}")
562
+
563
  if 'price_range' in data and data['price_range']:
564
  try:
565
  preferences['price_range'] = [float(p) for p in data['price_range']]
566
  except (ValueError, TypeError):
567
+ current_app.logger.warning(f"Could not parse price_range: {data['price_range']}")
568
 
569
+ # Multi-select categorical preferences from JS
570
+ # Keys in `data` should match JS: 'toppings', 'servingsize', 'dietarycategory', etc.
571
+ multi_select_prefs_js = [
572
  'toppings', 'servingsize', 'populargroup', 'dietarycategory',
573
  'spicelevel', 'saucetype', 'cheeseamount', 'restaurantchain',
574
  'seasonalavailability', 'breadtype', 'crusttype'
575
  ]
576
+ for key_js in multi_select_prefs_js:
577
+ if key_js in data and isinstance(data[key_js], list):
578
+ preferences[key_js] = data[key_js] # Expecting a list (can be empty for "Any")
579
+ elif key_js in data: # If not a list, log warning
580
+ current_app.logger.warning(f"Preference for '{key_js}' was not a list: {data[key_js]}. Treating as empty (Any).")
581
+ preferences[key_js] = [] # Default to empty list if not a list
 
 
 
 
582
 
583
  current_app.logger.info(f"Processed preferences for filtering: {preferences}")
584
  recommendations = get_recommendations(preferences)
585
  current_app.logger.info(f"Returning {len(recommendations)} recommendations after filtering and scoring.")
586
  return jsonify(recommendations)
587
+
588
  except Exception as e:
589
+ current_app.logger.error(f"Error in /recommend endpoint: {e}", exc_info=True)
590
  return jsonify({"error": "Failed to get recommendations due to a server issue.", "details": str(e)}), 500
591
 
592
 
593
+ # --- Main Application Execution ---
594
+ # Call preprocess_data() at the module level.
595
+ # This ensures it runs once when the application (or each Gunicorn worker) starts.
596
+ try:
597
+ logger.info("----- Starting data preprocessing at module load... -----")
598
+ preprocess_data() # Use default 'pizza.csv'
599
+ logger.info("----- Data preprocessing completed successfully at module load. -----")
600
+ if DF is None:
601
+ logger.critical("CRITICAL AT STARTUP: Global DF is None after preprocess_data(). App will likely fail.")
602
+ if FEATURE_DF is None:
603
+ logger.critical("CRITICAL AT STARTUP: Global FEATURE_DF is None after preprocess_data(). App will likely fail.")
604
+ if SCALER is None: # SCALER should be initialized even if fitting fails
605
+ logger.critical("CRITICAL AT STARTUP: Global SCALER is None after preprocess_data(). App will likely fail.")
606
+
607
+ except FileNotFoundError as e:
608
+ logger.critical(f"CRITICAL ERROR AT MODULE LOAD (FileNotFoundError): {e}. Ensure 'pizza.csv' is in the /app directory (or same dir as app.py).")
609
+ # In a production Gunicorn setup, the app might still try to start, leading to errors in routes.
610
+ # For Hugging Face, it's better to log and let it attempt to run, as exiting might obscure logs.
611
+ except Exception as e:
612
+ logger.critical(f"Unexpected critical startup error during preprocessing at module load: {e}", exc_info=True)
613
+
614
+
615
  if __name__ == '__main__':
616
+ # This block is primarily for local development using `python app.py`.
617
+ # preprocess_data() is already called above when the module is imported by Python interpreter.
618
+ logger.info("----- Running Flask app directly (e.g., python app.py) -----")
619
+ # Sanity check for local run, though globals should be set by the module-level call.
620
+ if DF is None or FEATURE_DF is None or SCALER is None:
621
+ logger.warning("One or more global data variables (DF, FEATURE_DF, SCALER) are None before local app.run(). This is unexpected if module-level preprocessing ran.")
622
+ # Optionally, re-run preprocessing if critical for local dev and something went wrong with module-level load
623
+ # logger.info("Attempting to re-run preprocess_data() for local development.")
624
+ # preprocess_data()
625
+
626
+ app.run(debug=True, host='0.0.0.0', port=7860, use_reloader=False)
627
+ # use_reloader=False is generally better when you have global state initialized at module level.
628
+ # If True, it might re-initialize globals on each reload, which can be slow.